1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36 /* Assumed in hexadecimal significand parsing, and conversion to
37 hexadecimal strings. */
38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
42 /* Represents floating point arithmetic semantics. */
44 /* The largest E such that 2^E is representable; this matches the
45 definition of IEEE 754. */
46 APFloat::ExponentType maxExponent;
48 /* The smallest E such that 2^E is a normalized number; this
49 matches the definition of IEEE 754. */
50 APFloat::ExponentType minExponent;
52 /* Number of bits in the significand. This includes the integer
54 unsigned int precision;
56 /* Number of bits actually used in the semantics. */
57 unsigned int sizeInBits;
60 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
61 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
62 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
63 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
64 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
65 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
67 /* The PowerPC format consists of two doubles. It does not map cleanly
68 onto the usual format above. It is approximated using twice the
69 mantissa bits. Note that for exponents near the double minimum,
70 we no longer can represent the full 106 mantissa bits, so those
71 will be treated as denormal numbers.
73 FIXME: While this approximation is equivalent to what GCC uses for
74 compile-time arithmetic on PPC double-double numbers, it is not able
75 to represent all possible values held by a PPC double-double number,
76 for example: (long double) 1.0 + (long double) 0x1p-106
77 Should this be replaced by a full emulation of PPC double-double? */
78 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
80 /* A tight upper bound on number of parts required to hold the value
83 power * 815 / (351 * integerPartWidth) + 1
85 However, whilst the result may require only this many parts,
86 because we are multiplying two values to get it, the
87 multiplication may require an extra part with the excess part
88 being zero (consider the trivial case of 1 * 1, tcFullMultiply
89 requires two parts to hold the single-part result). So we add an
90 extra one to guarantee enough space whilst multiplying. */
91 const unsigned int maxExponent = 16383;
92 const unsigned int maxPrecision = 113;
93 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
94 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
95 / (351 * integerPartWidth));
98 /* A bunch of private, handy routines. */
100 static inline unsigned int
101 partCountForBits(unsigned int bits)
103 return ((bits) + integerPartWidth - 1) / integerPartWidth;
106 /* Returns 0U-9U. Return values >= 10U are not digits. */
107 static inline unsigned int
108 decDigitValue(unsigned int c)
113 /* Return the value of a decimal exponent of the form
116 If the exponent overflows, returns a large exponent with the
119 readExponent(StringRef::iterator begin, StringRef::iterator end)
122 unsigned int absExponent;
123 const unsigned int overlargeExponent = 24000; /* FIXME. */
124 StringRef::iterator p = begin;
126 assert(p != end && "Exponent has no digits");
128 isNegative = (*p == '-');
129 if (*p == '-' || *p == '+') {
131 assert(p != end && "Exponent has no digits");
134 absExponent = decDigitValue(*p++);
135 assert(absExponent < 10U && "Invalid character in exponent");
137 for (; p != end; ++p) {
140 value = decDigitValue(*p);
141 assert(value < 10U && "Invalid character in exponent");
143 value += absExponent * 10;
144 if (absExponent >= overlargeExponent) {
145 absExponent = overlargeExponent;
146 p = end; /* outwit assert below */
152 assert(p == end && "Invalid exponent in exponent");
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(StringRef::iterator p, StringRef::iterator end,
164 int exponentAdjustment)
166 int unsignedExponent;
167 bool negative, overflow;
170 assert(p != end && "Exponent has no digits");
172 negative = *p == '-';
173 if (*p == '-' || *p == '+') {
175 assert(p != end && "Exponent has no digits");
178 unsignedExponent = 0;
180 for (; p != end; ++p) {
183 value = decDigitValue(*p);
184 assert(value < 10U && "Invalid character in exponent");
186 unsignedExponent = unsignedExponent * 10 + value;
187 if (unsignedExponent > 32767) {
193 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
197 exponent = unsignedExponent;
199 exponent = -exponent;
200 exponent += exponentAdjustment;
201 if (exponent > 32767 || exponent < -32768)
206 exponent = negative ? -32768: 32767;
211 static StringRef::iterator
212 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
213 StringRef::iterator *dot)
215 StringRef::iterator p = begin;
217 while (p != end && *p == '0')
220 if (p != end && *p == '.') {
223 assert(end - begin != 1 && "Significand has no digits");
225 while (p != end && *p == '0')
232 /* Given a normal decimal floating point number of the form
236 where the decimal point and exponent are optional, fill out the
237 structure D. Exponent is appropriate if the significand is
238 treated as an integer, and normalizedExponent if the significand
239 is taken to have the decimal point after a single leading
242 If the value is zero, V->firstSigDigit points to a non-digit, and
243 the return exponent is zero.
246 const char *firstSigDigit;
247 const char *lastSigDigit;
249 int normalizedExponent;
253 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
256 StringRef::iterator dot = end;
257 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
259 D->firstSigDigit = p;
261 D->normalizedExponent = 0;
263 for (; p != end; ++p) {
265 assert(dot == end && "String contains multiple dots");
270 if (decDigitValue(*p) >= 10U)
275 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
276 assert(p != begin && "Significand has no digits");
277 assert((dot == end || p - begin != 1) && "Significand has no digits");
279 /* p points to the first non-digit in the string */
280 D->exponent = readExponent(p + 1, end);
282 /* Implied decimal point? */
287 /* If number is all zeroes accept any exponent. */
288 if (p != D->firstSigDigit) {
289 /* Drop insignificant trailing zeroes. */
294 while (p != begin && *p == '0');
295 while (p != begin && *p == '.');
298 /* Adjust the exponents for any decimal point. */
299 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
300 D->normalizedExponent = (D->exponent +
301 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
302 - (dot > D->firstSigDigit && dot < p)));
308 /* Return the trailing fraction of a hexadecimal number.
309 DIGITVALUE is the first hex digit of the fraction, P points to
312 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
313 unsigned int digitValue)
315 unsigned int hexDigit;
317 /* If the first trailing digit isn't 0 or 8 we can work out the
318 fraction immediately. */
320 return lfMoreThanHalf;
321 else if (digitValue < 8 && digitValue > 0)
322 return lfLessThanHalf;
324 // Otherwise we need to find the first non-zero digit.
325 while (p != end && (*p == '0' || *p == '.'))
328 assert(p != end && "Invalid trailing hexadecimal fraction!");
330 hexDigit = hexDigitValue(*p);
332 /* If we ran off the end it is exactly zero or one-half, otherwise
335 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
337 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
340 /* Return the fraction lost were a bignum truncated losing the least
341 significant BITS bits. */
343 lostFractionThroughTruncation(const integerPart *parts,
344 unsigned int partCount,
349 lsb = APInt::tcLSB(parts, partCount);
351 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
353 return lfExactlyZero;
355 return lfExactlyHalf;
356 if (bits <= partCount * integerPartWidth &&
357 APInt::tcExtractBit(parts, bits - 1))
358 return lfMoreThanHalf;
360 return lfLessThanHalf;
363 /* Shift DST right BITS bits noting lost fraction. */
365 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
367 lostFraction lost_fraction;
369 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
371 APInt::tcShiftRight(dst, parts, bits);
373 return lost_fraction;
376 /* Combine the effect of two lost fractions. */
378 combineLostFractions(lostFraction moreSignificant,
379 lostFraction lessSignificant)
381 if (lessSignificant != lfExactlyZero) {
382 if (moreSignificant == lfExactlyZero)
383 moreSignificant = lfLessThanHalf;
384 else if (moreSignificant == lfExactlyHalf)
385 moreSignificant = lfMoreThanHalf;
388 return moreSignificant;
391 /* The error from the true value, in half-ulps, on multiplying two
392 floating point numbers, which differ from the value they
393 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
394 than the returned value.
396 See "How to Read Floating Point Numbers Accurately" by William D
399 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
401 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
403 if (HUerr1 + HUerr2 == 0)
404 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
406 return inexactMultiply + 2 * (HUerr1 + HUerr2);
409 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
410 when the least significant BITS are truncated. BITS cannot be
413 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
415 unsigned int count, partBits;
416 integerPart part, boundary;
421 count = bits / integerPartWidth;
422 partBits = bits % integerPartWidth + 1;
424 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
427 boundary = (integerPart) 1 << (partBits - 1);
432 if (part - boundary <= boundary - part)
433 return part - boundary;
435 return boundary - part;
438 if (part == boundary) {
441 return ~(integerPart) 0; /* A lot. */
444 } else if (part == boundary - 1) {
447 return ~(integerPart) 0; /* A lot. */
452 return ~(integerPart) 0; /* A lot. */
455 /* Place pow(5, power) in DST, and return the number of parts used.
456 DST must be at least one part larger than size of the answer. */
458 powerOf5(integerPart *dst, unsigned int power)
460 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
462 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
463 pow5s[0] = 78125 * 5;
465 unsigned int partsCount[16] = { 1 };
466 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
468 assert(power <= maxExponent);
473 *p1 = firstEightPowers[power & 7];
479 for (unsigned int n = 0; power; power >>= 1, n++) {
484 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
486 pc = partsCount[n - 1];
487 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
489 if (pow5[pc - 1] == 0)
497 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
499 if (p2[result - 1] == 0)
502 /* Now result is in p1 with partsCount parts and p2 is scratch
504 tmp = p1, p1 = p2, p2 = tmp;
511 APInt::tcAssign(dst, p1, result);
516 /* Zero at the end to avoid modular arithmetic when adding one; used
517 when rounding up during hexadecimal output. */
518 static const char hexDigitsLower[] = "0123456789abcdef0";
519 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
520 static const char infinityL[] = "infinity";
521 static const char infinityU[] = "INFINITY";
522 static const char NaNL[] = "nan";
523 static const char NaNU[] = "NAN";
525 /* Write out an integerPart in hexadecimal, starting with the most
526 significant nibble. Write out exactly COUNT hexdigits, return
529 partAsHex (char *dst, integerPart part, unsigned int count,
530 const char *hexDigitChars)
532 unsigned int result = count;
534 assert(count != 0 && count <= integerPartWidth / 4);
536 part >>= (integerPartWidth - 4 * count);
538 dst[count] = hexDigitChars[part & 0xf];
545 /* Write out an unsigned decimal integer. */
547 writeUnsignedDecimal (char *dst, unsigned int n)
563 /* Write out a signed decimal integer. */
565 writeSignedDecimal (char *dst, int value)
569 dst = writeUnsignedDecimal(dst, -(unsigned) value);
571 dst = writeUnsignedDecimal(dst, value);
578 APFloat::initialize(const fltSemantics *ourSemantics)
582 semantics = ourSemantics;
585 significand.parts = new integerPart[count];
589 APFloat::freeSignificand()
592 delete [] significand.parts;
596 APFloat::assign(const APFloat &rhs)
598 assert(semantics == rhs.semantics);
601 category = rhs.category;
602 exponent = rhs.exponent;
603 if (isFiniteNonZero() || category == fcNaN)
604 copySignificand(rhs);
608 APFloat::copySignificand(const APFloat &rhs)
610 assert(isFiniteNonZero() || category == fcNaN);
611 assert(rhs.partCount() >= partCount());
613 APInt::tcAssign(significandParts(), rhs.significandParts(),
617 /* Make this number a NaN, with an arbitrary but deterministic value
618 for the significand. If double or longer, this is a signalling NaN,
619 which may not be ideal. If float, this is QNaN(0). */
620 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
625 integerPart *significand = significandParts();
626 unsigned numParts = partCount();
628 // Set the significand bits to the fill.
629 if (!fill || fill->getNumWords() < numParts)
630 APInt::tcSet(significand, 0, numParts);
632 APInt::tcAssign(significand, fill->getRawData(),
633 std::min(fill->getNumWords(), numParts));
635 // Zero out the excess bits of the significand.
636 unsigned bitsToPreserve = semantics->precision - 1;
637 unsigned part = bitsToPreserve / 64;
638 bitsToPreserve %= 64;
639 significand[part] &= ((1ULL << bitsToPreserve) - 1);
640 for (part++; part != numParts; ++part)
641 significand[part] = 0;
644 unsigned QNaNBit = semantics->precision - 2;
647 // We always have to clear the QNaN bit to make it an SNaN.
648 APInt::tcClearBit(significand, QNaNBit);
650 // If there are no bits set in the payload, we have to set
651 // *something* to make it a NaN instead of an infinity;
652 // conventionally, this is the next bit down from the QNaN bit.
653 if (APInt::tcIsZero(significand, numParts))
654 APInt::tcSetBit(significand, QNaNBit - 1);
656 // We always have to set the QNaN bit to make it a QNaN.
657 APInt::tcSetBit(significand, QNaNBit);
660 // For x87 extended precision, we want to make a NaN, not a
661 // pseudo-NaN. Maybe we should expose the ability to make
663 if (semantics == &APFloat::x87DoubleExtended)
664 APInt::tcSetBit(significand, QNaNBit + 1);
667 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
669 APFloat value(Sem, uninitialized);
670 value.makeNaN(SNaN, Negative, fill);
675 APFloat::operator=(const APFloat &rhs)
678 if (semantics != rhs.semantics) {
680 initialize(rhs.semantics);
689 APFloat::operator=(APFloat &&rhs) {
692 semantics = rhs.semantics;
693 significand = rhs.significand;
694 exponent = rhs.exponent;
695 category = rhs.category;
698 rhs.semantics = &Bogus;
703 APFloat::isDenormal() const {
704 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
705 (APInt::tcExtractBit(significandParts(),
706 semantics->precision - 1) == 0);
710 APFloat::isSmallest() const {
711 // The smallest number by magnitude in our format will be the smallest
712 // denormal, i.e. the floating point number with exponent being minimum
713 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
714 return isFiniteNonZero() && exponent == semantics->minExponent &&
715 significandMSB() == 0;
718 bool APFloat::isSignificandAllOnes() const {
719 // Test if the significand excluding the integral bit is all ones. This allows
720 // us to test for binade boundaries.
721 const integerPart *Parts = significandParts();
722 const unsigned PartCount = partCount();
723 for (unsigned i = 0; i < PartCount - 1; i++)
727 // Set the unused high bits to all ones when we compare.
728 const unsigned NumHighBits =
729 PartCount*integerPartWidth - semantics->precision + 1;
730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
731 "fill than integerPartWidth");
732 const integerPart HighBitFill =
733 ~integerPart(0) << (integerPartWidth - NumHighBits);
734 if (~(Parts[PartCount - 1] | HighBitFill))
740 bool APFloat::isSignificandAllZeros() const {
741 // Test if the significand excluding the integral bit is all zeros. This
742 // allows us to test for binade boundaries.
743 const integerPart *Parts = significandParts();
744 const unsigned PartCount = partCount();
746 for (unsigned i = 0; i < PartCount - 1; i++)
750 const unsigned NumHighBits =
751 PartCount*integerPartWidth - semantics->precision + 1;
752 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
753 "clear than integerPartWidth");
754 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
756 if (Parts[PartCount - 1] & HighBitMask)
763 APFloat::isLargest() const {
764 // The largest number by magnitude in our format will be the floating point
765 // number with maximum exponent and with significand that is all ones.
766 return isFiniteNonZero() && exponent == semantics->maxExponent
767 && isSignificandAllOnes();
771 APFloat::isInteger() const {
772 // This could be made more efficient; I'm going for obviously correct.
773 if (!isFinite()) return false;
774 APFloat truncated = *this;
775 truncated.roundToIntegral(rmTowardZero);
776 return compare(truncated) == cmpEqual;
780 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
783 if (semantics != rhs.semantics ||
784 category != rhs.category ||
787 if (category==fcZero || category==fcInfinity)
790 if (isFiniteNonZero() && exponent != rhs.exponent)
793 return std::equal(significandParts(), significandParts() + partCount(),
794 rhs.significandParts());
797 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
798 initialize(&ourSemantics);
802 exponent = ourSemantics.precision - 1;
803 significandParts()[0] = value;
804 normalize(rmNearestTiesToEven, lfExactlyZero);
807 APFloat::APFloat(const fltSemantics &ourSemantics) {
808 initialize(&ourSemantics);
813 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
814 // Allocates storage if necessary but does not initialize it.
815 initialize(&ourSemantics);
818 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
819 initialize(&ourSemantics);
820 convertFromString(text, rmNearestTiesToEven);
823 APFloat::APFloat(const APFloat &rhs) {
824 initialize(rhs.semantics);
828 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
829 *this = std::move(rhs);
837 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
838 void APFloat::Profile(FoldingSetNodeID& ID) const {
839 ID.Add(bitcastToAPInt());
843 APFloat::partCount() const
845 return partCountForBits(semantics->precision + 1);
849 APFloat::semanticsPrecision(const fltSemantics &semantics)
851 return semantics.precision;
853 APFloat::ExponentType
854 APFloat::semanticsMaxExponent(const fltSemantics &semantics)
856 return semantics.maxExponent;
858 APFloat::ExponentType
859 APFloat::semanticsMinExponent(const fltSemantics &semantics)
861 return semantics.minExponent;
864 APFloat::semanticsSizeInBits(const fltSemantics &semantics)
866 return semantics.sizeInBits;
870 APFloat::significandParts() const
872 return const_cast<APFloat *>(this)->significandParts();
876 APFloat::significandParts()
879 return significand.parts;
881 return &significand.part;
885 APFloat::zeroSignificand()
887 APInt::tcSet(significandParts(), 0, partCount());
890 /* Increment an fcNormal floating point number's significand. */
892 APFloat::incrementSignificand()
896 carry = APInt::tcIncrement(significandParts(), partCount());
898 /* Our callers should never cause us to overflow. */
903 /* Add the significand of the RHS. Returns the carry flag. */
905 APFloat::addSignificand(const APFloat &rhs)
909 parts = significandParts();
911 assert(semantics == rhs.semantics);
912 assert(exponent == rhs.exponent);
914 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
917 /* Subtract the significand of the RHS with a borrow flag. Returns
920 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
924 parts = significandParts();
926 assert(semantics == rhs.semantics);
927 assert(exponent == rhs.exponent);
929 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
933 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
934 on to the full-precision result of the multiplication. Returns the
937 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
939 unsigned int omsb; // One, not zero, based MSB.
940 unsigned int partsCount, newPartsCount, precision;
941 integerPart *lhsSignificand;
942 integerPart scratch[4];
943 integerPart *fullSignificand;
944 lostFraction lost_fraction;
947 assert(semantics == rhs.semantics);
949 precision = semantics->precision;
951 // Allocate space for twice as many bits as the original significand, plus one
952 // extra bit for the addition to overflow into.
953 newPartsCount = partCountForBits(precision * 2 + 1);
955 if (newPartsCount > 4)
956 fullSignificand = new integerPart[newPartsCount];
958 fullSignificand = scratch;
960 lhsSignificand = significandParts();
961 partsCount = partCount();
963 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
964 rhs.significandParts(), partsCount, partsCount);
966 lost_fraction = lfExactlyZero;
967 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
968 exponent += rhs.exponent;
970 // Assume the operands involved in the multiplication are single-precision
971 // FP, and the two multiplicants are:
972 // *this = a23 . a22 ... a0 * 2^e1
973 // rhs = b23 . b22 ... b0 * 2^e2
974 // the result of multiplication is:
975 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
976 // Note that there are three significant bits at the left-hand side of the
977 // radix point: two for the multiplication, and an overflow bit for the
978 // addition (that will always be zero at this point). Move the radix point
979 // toward left by two bits, and adjust exponent accordingly.
982 if (addend && addend->isNonZero()) {
983 // The intermediate result of the multiplication has "2 * precision"
984 // signicant bit; adjust the addend to be consistent with mul result.
986 Significand savedSignificand = significand;
987 const fltSemantics *savedSemantics = semantics;
988 fltSemantics extendedSemantics;
990 unsigned int extendedPrecision;
992 // Normalize our MSB to one below the top bit to allow for overflow.
993 extendedPrecision = 2 * precision + 1;
994 if (omsb != extendedPrecision - 1) {
995 assert(extendedPrecision > omsb);
996 APInt::tcShiftLeft(fullSignificand, newPartsCount,
997 (extendedPrecision - 1) - omsb);
998 exponent -= (extendedPrecision - 1) - omsb;
1001 /* Create new semantics. */
1002 extendedSemantics = *semantics;
1003 extendedSemantics.precision = extendedPrecision;
1005 if (newPartsCount == 1)
1006 significand.part = fullSignificand[0];
1008 significand.parts = fullSignificand;
1009 semantics = &extendedSemantics;
1011 APFloat extendedAddend(*addend);
1012 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1013 assert(status == opOK);
1016 // Shift the significand of the addend right by one bit. This guarantees
1017 // that the high bit of the significand is zero (same as fullSignificand),
1018 // so the addition will overflow (if it does overflow at all) into the top bit.
1019 lost_fraction = extendedAddend.shiftSignificandRight(1);
1020 assert(lost_fraction == lfExactlyZero &&
1021 "Lost precision while shifting addend for fused-multiply-add.");
1023 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1025 /* Restore our state. */
1026 if (newPartsCount == 1)
1027 fullSignificand[0] = significand.part;
1028 significand = savedSignificand;
1029 semantics = savedSemantics;
1031 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1034 // Convert the result having "2 * precision" significant-bits back to the one
1035 // having "precision" significant-bits. First, move the radix point from
1036 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1037 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1038 exponent -= precision + 1;
1040 // In case MSB resides at the left-hand side of radix point, shift the
1041 // mantissa right by some amount to make sure the MSB reside right before
1042 // the radix point (i.e. "MSB . rest-significant-bits").
1044 // Note that the result is not normalized when "omsb < precision". So, the
1045 // caller needs to call APFloat::normalize() if normalized value is expected.
1046 if (omsb > precision) {
1047 unsigned int bits, significantParts;
1050 bits = omsb - precision;
1051 significantParts = partCountForBits(omsb);
1052 lf = shiftRight(fullSignificand, significantParts, bits);
1053 lost_fraction = combineLostFractions(lf, lost_fraction);
1057 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1059 if (newPartsCount > 4)
1060 delete [] fullSignificand;
1062 return lost_fraction;
1065 /* Multiply the significands of LHS and RHS to DST. */
1067 APFloat::divideSignificand(const APFloat &rhs)
1069 unsigned int bit, i, partsCount;
1070 const integerPart *rhsSignificand;
1071 integerPart *lhsSignificand, *dividend, *divisor;
1072 integerPart scratch[4];
1073 lostFraction lost_fraction;
1075 assert(semantics == rhs.semantics);
1077 lhsSignificand = significandParts();
1078 rhsSignificand = rhs.significandParts();
1079 partsCount = partCount();
1082 dividend = new integerPart[partsCount * 2];
1086 divisor = dividend + partsCount;
1088 /* Copy the dividend and divisor as they will be modified in-place. */
1089 for (i = 0; i < partsCount; i++) {
1090 dividend[i] = lhsSignificand[i];
1091 divisor[i] = rhsSignificand[i];
1092 lhsSignificand[i] = 0;
1095 exponent -= rhs.exponent;
1097 unsigned int precision = semantics->precision;
1099 /* Normalize the divisor. */
1100 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1103 APInt::tcShiftLeft(divisor, partsCount, bit);
1106 /* Normalize the dividend. */
1107 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1110 APInt::tcShiftLeft(dividend, partsCount, bit);
1113 /* Ensure the dividend >= divisor initially for the loop below.
1114 Incidentally, this means that the division loop below is
1115 guaranteed to set the integer bit to one. */
1116 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1118 APInt::tcShiftLeft(dividend, partsCount, 1);
1119 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1122 /* Long division. */
1123 for (bit = precision; bit; bit -= 1) {
1124 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1125 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1126 APInt::tcSetBit(lhsSignificand, bit - 1);
1129 APInt::tcShiftLeft(dividend, partsCount, 1);
1132 /* Figure out the lost fraction. */
1133 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1136 lost_fraction = lfMoreThanHalf;
1138 lost_fraction = lfExactlyHalf;
1139 else if (APInt::tcIsZero(dividend, partsCount))
1140 lost_fraction = lfExactlyZero;
1142 lost_fraction = lfLessThanHalf;
1147 return lost_fraction;
1151 APFloat::significandMSB() const
1153 return APInt::tcMSB(significandParts(), partCount());
1157 APFloat::significandLSB() const
1159 return APInt::tcLSB(significandParts(), partCount());
1162 /* Note that a zero result is NOT normalized to fcZero. */
1164 APFloat::shiftSignificandRight(unsigned int bits)
1166 /* Our exponent should not overflow. */
1167 assert((ExponentType) (exponent + bits) >= exponent);
1171 return shiftRight(significandParts(), partCount(), bits);
1174 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1176 APFloat::shiftSignificandLeft(unsigned int bits)
1178 assert(bits < semantics->precision);
1181 unsigned int partsCount = partCount();
1183 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1186 assert(!APInt::tcIsZero(significandParts(), partsCount));
1191 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1195 assert(semantics == rhs.semantics);
1196 assert(isFiniteNonZero());
1197 assert(rhs.isFiniteNonZero());
1199 compare = exponent - rhs.exponent;
1201 /* If exponents are equal, do an unsigned bignum comparison of the
1204 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1208 return cmpGreaterThan;
1209 else if (compare < 0)
1215 /* Handle overflow. Sign is preserved. We either become infinity or
1216 the largest finite number. */
1218 APFloat::handleOverflow(roundingMode rounding_mode)
1221 if (rounding_mode == rmNearestTiesToEven ||
1222 rounding_mode == rmNearestTiesToAway ||
1223 (rounding_mode == rmTowardPositive && !sign) ||
1224 (rounding_mode == rmTowardNegative && sign)) {
1225 category = fcInfinity;
1226 return (opStatus) (opOverflow | opInexact);
1229 /* Otherwise we become the largest finite number. */
1230 category = fcNormal;
1231 exponent = semantics->maxExponent;
1232 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1233 semantics->precision);
1238 /* Returns TRUE if, when truncating the current number, with BIT the
1239 new LSB, with the given lost fraction and rounding mode, the result
1240 would need to be rounded away from zero (i.e., by increasing the
1241 signficand). This routine must work for fcZero of both signs, and
1242 fcNormal numbers. */
1244 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1245 lostFraction lost_fraction,
1246 unsigned int bit) const
1248 /* NaNs and infinities should not have lost fractions. */
1249 assert(isFiniteNonZero() || category == fcZero);
1251 /* Current callers never pass this so we don't handle it. */
1252 assert(lost_fraction != lfExactlyZero);
1254 switch (rounding_mode) {
1255 case rmNearestTiesToAway:
1256 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1258 case rmNearestTiesToEven:
1259 if (lost_fraction == lfMoreThanHalf)
1262 /* Our zeroes don't have a significand to test. */
1263 if (lost_fraction == lfExactlyHalf && category != fcZero)
1264 return APInt::tcExtractBit(significandParts(), bit);
1271 case rmTowardPositive:
1274 case rmTowardNegative:
1277 llvm_unreachable("Invalid rounding mode found");
1281 APFloat::normalize(roundingMode rounding_mode,
1282 lostFraction lost_fraction)
1284 unsigned int omsb; /* One, not zero, based MSB. */
1287 if (!isFiniteNonZero())
1290 /* Before rounding normalize the exponent of fcNormal numbers. */
1291 omsb = significandMSB() + 1;
1294 /* OMSB is numbered from 1. We want to place it in the integer
1295 bit numbered PRECISION if possible, with a compensating change in
1297 exponentChange = omsb - semantics->precision;
1299 /* If the resulting exponent is too high, overflow according to
1300 the rounding mode. */
1301 if (exponent + exponentChange > semantics->maxExponent)
1302 return handleOverflow(rounding_mode);
1304 /* Subnormal numbers have exponent minExponent, and their MSB
1305 is forced based on that. */
1306 if (exponent + exponentChange < semantics->minExponent)
1307 exponentChange = semantics->minExponent - exponent;
1309 /* Shifting left is easy as we don't lose precision. */
1310 if (exponentChange < 0) {
1311 assert(lost_fraction == lfExactlyZero);
1313 shiftSignificandLeft(-exponentChange);
1318 if (exponentChange > 0) {
1321 /* Shift right and capture any new lost fraction. */
1322 lf = shiftSignificandRight(exponentChange);
1324 lost_fraction = combineLostFractions(lf, lost_fraction);
1326 /* Keep OMSB up-to-date. */
1327 if (omsb > (unsigned) exponentChange)
1328 omsb -= exponentChange;
1334 /* Now round the number according to rounding_mode given the lost
1337 /* As specified in IEEE 754, since we do not trap we do not report
1338 underflow for exact results. */
1339 if (lost_fraction == lfExactlyZero) {
1340 /* Canonicalize zeroes. */
1347 /* Increment the significand if we're rounding away from zero. */
1348 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1350 exponent = semantics->minExponent;
1352 incrementSignificand();
1353 omsb = significandMSB() + 1;
1355 /* Did the significand increment overflow? */
1356 if (omsb == (unsigned) semantics->precision + 1) {
1357 /* Renormalize by incrementing the exponent and shifting our
1358 significand right one. However if we already have the
1359 maximum exponent we overflow to infinity. */
1360 if (exponent == semantics->maxExponent) {
1361 category = fcInfinity;
1363 return (opStatus) (opOverflow | opInexact);
1366 shiftSignificandRight(1);
1372 /* The normal case - we were and are not denormal, and any
1373 significand increment above didn't overflow. */
1374 if (omsb == semantics->precision)
1377 /* We have a non-zero denormal. */
1378 assert(omsb < semantics->precision);
1380 /* Canonicalize zeroes. */
1384 /* The fcZero case is a denormal that underflowed to zero. */
1385 return (opStatus) (opUnderflow | opInexact);
1389 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1391 switch (PackCategoriesIntoKey(category, rhs.category)) {
1393 llvm_unreachable(nullptr);
1395 case PackCategoriesIntoKey(fcNaN, fcZero):
1396 case PackCategoriesIntoKey(fcNaN, fcNormal):
1397 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1398 case PackCategoriesIntoKey(fcNaN, fcNaN):
1399 case PackCategoriesIntoKey(fcNormal, fcZero):
1400 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1401 case PackCategoriesIntoKey(fcInfinity, fcZero):
1404 case PackCategoriesIntoKey(fcZero, fcNaN):
1405 case PackCategoriesIntoKey(fcNormal, fcNaN):
1406 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1407 // We need to be sure to flip the sign here for subtraction because we
1408 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1409 sign = rhs.sign ^ subtract;
1411 copySignificand(rhs);
1414 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1415 case PackCategoriesIntoKey(fcZero, fcInfinity):
1416 category = fcInfinity;
1417 sign = rhs.sign ^ subtract;
1420 case PackCategoriesIntoKey(fcZero, fcNormal):
1422 sign = rhs.sign ^ subtract;
1425 case PackCategoriesIntoKey(fcZero, fcZero):
1426 /* Sign depends on rounding mode; handled by caller. */
1429 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1430 /* Differently signed infinities can only be validly
1432 if (((sign ^ rhs.sign)!=0) != subtract) {
1439 case PackCategoriesIntoKey(fcNormal, fcNormal):
1444 /* Add or subtract two normal numbers. */
1446 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1449 lostFraction lost_fraction;
1452 /* Determine if the operation on the absolute values is effectively
1453 an addition or subtraction. */
1454 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1456 /* Are we bigger exponent-wise than the RHS? */
1457 bits = exponent - rhs.exponent;
1459 /* Subtraction is more subtle than one might naively expect. */
1461 APFloat temp_rhs(rhs);
1465 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1466 lost_fraction = lfExactlyZero;
1467 } else if (bits > 0) {
1468 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1469 shiftSignificandLeft(1);
1472 lost_fraction = shiftSignificandRight(-bits - 1);
1473 temp_rhs.shiftSignificandLeft(1);
1478 carry = temp_rhs.subtractSignificand
1479 (*this, lost_fraction != lfExactlyZero);
1480 copySignificand(temp_rhs);
1483 carry = subtractSignificand
1484 (temp_rhs, lost_fraction != lfExactlyZero);
1487 /* Invert the lost fraction - it was on the RHS and
1489 if (lost_fraction == lfLessThanHalf)
1490 lost_fraction = lfMoreThanHalf;
1491 else if (lost_fraction == lfMoreThanHalf)
1492 lost_fraction = lfLessThanHalf;
1494 /* The code above is intended to ensure that no borrow is
1500 APFloat temp_rhs(rhs);
1502 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1503 carry = addSignificand(temp_rhs);
1505 lost_fraction = shiftSignificandRight(-bits);
1506 carry = addSignificand(rhs);
1509 /* We have a guard bit; generating a carry cannot happen. */
1514 return lost_fraction;
1518 APFloat::multiplySpecials(const APFloat &rhs)
1520 switch (PackCategoriesIntoKey(category, rhs.category)) {
1522 llvm_unreachable(nullptr);
1524 case PackCategoriesIntoKey(fcNaN, fcZero):
1525 case PackCategoriesIntoKey(fcNaN, fcNormal):
1526 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1527 case PackCategoriesIntoKey(fcNaN, fcNaN):
1531 case PackCategoriesIntoKey(fcZero, fcNaN):
1532 case PackCategoriesIntoKey(fcNormal, fcNaN):
1533 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1536 copySignificand(rhs);
1539 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1540 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1541 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1542 category = fcInfinity;
1545 case PackCategoriesIntoKey(fcZero, fcNormal):
1546 case PackCategoriesIntoKey(fcNormal, fcZero):
1547 case PackCategoriesIntoKey(fcZero, fcZero):
1551 case PackCategoriesIntoKey(fcZero, fcInfinity):
1552 case PackCategoriesIntoKey(fcInfinity, fcZero):
1556 case PackCategoriesIntoKey(fcNormal, fcNormal):
1562 APFloat::divideSpecials(const APFloat &rhs)
1564 switch (PackCategoriesIntoKey(category, rhs.category)) {
1566 llvm_unreachable(nullptr);
1568 case PackCategoriesIntoKey(fcZero, fcNaN):
1569 case PackCategoriesIntoKey(fcNormal, fcNaN):
1570 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1572 copySignificand(rhs);
1573 case PackCategoriesIntoKey(fcNaN, fcZero):
1574 case PackCategoriesIntoKey(fcNaN, fcNormal):
1575 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1576 case PackCategoriesIntoKey(fcNaN, fcNaN):
1578 case PackCategoriesIntoKey(fcInfinity, fcZero):
1579 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1580 case PackCategoriesIntoKey(fcZero, fcInfinity):
1581 case PackCategoriesIntoKey(fcZero, fcNormal):
1584 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1588 case PackCategoriesIntoKey(fcNormal, fcZero):
1589 category = fcInfinity;
1592 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1593 case PackCategoriesIntoKey(fcZero, fcZero):
1597 case PackCategoriesIntoKey(fcNormal, fcNormal):
1603 APFloat::modSpecials(const APFloat &rhs)
1605 switch (PackCategoriesIntoKey(category, rhs.category)) {
1607 llvm_unreachable(nullptr);
1609 case PackCategoriesIntoKey(fcNaN, fcZero):
1610 case PackCategoriesIntoKey(fcNaN, fcNormal):
1611 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1612 case PackCategoriesIntoKey(fcNaN, fcNaN):
1613 case PackCategoriesIntoKey(fcZero, fcInfinity):
1614 case PackCategoriesIntoKey(fcZero, fcNormal):
1615 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1618 case PackCategoriesIntoKey(fcZero, fcNaN):
1619 case PackCategoriesIntoKey(fcNormal, fcNaN):
1620 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1623 copySignificand(rhs);
1626 case PackCategoriesIntoKey(fcNormal, fcZero):
1627 case PackCategoriesIntoKey(fcInfinity, fcZero):
1628 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1629 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1630 case PackCategoriesIntoKey(fcZero, fcZero):
1634 case PackCategoriesIntoKey(fcNormal, fcNormal):
1641 APFloat::changeSign()
1643 /* Look mummy, this one's easy. */
1648 APFloat::clearSign()
1650 /* So is this one. */
1655 APFloat::copySign(const APFloat &rhs)
1661 /* Normalized addition or subtraction. */
1663 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1668 fs = addOrSubtractSpecials(rhs, subtract);
1670 /* This return code means it was not a simple case. */
1671 if (fs == opDivByZero) {
1672 lostFraction lost_fraction;
1674 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1675 fs = normalize(rounding_mode, lost_fraction);
1677 /* Can only be zero if we lost no fraction. */
1678 assert(category != fcZero || lost_fraction == lfExactlyZero);
1681 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1682 positive zero unless rounding to minus infinity, except that
1683 adding two like-signed zeroes gives that zero. */
1684 if (category == fcZero) {
1685 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1686 sign = (rounding_mode == rmTowardNegative);
1692 /* Normalized addition. */
1694 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1696 return addOrSubtract(rhs, rounding_mode, false);
1699 /* Normalized subtraction. */
1701 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1703 return addOrSubtract(rhs, rounding_mode, true);
1706 /* Normalized multiply. */
1708 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1713 fs = multiplySpecials(rhs);
1715 if (isFiniteNonZero()) {
1716 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1717 fs = normalize(rounding_mode, lost_fraction);
1718 if (lost_fraction != lfExactlyZero)
1719 fs = (opStatus) (fs | opInexact);
1725 /* Normalized divide. */
1727 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1732 fs = divideSpecials(rhs);
1734 if (isFiniteNonZero()) {
1735 lostFraction lost_fraction = divideSignificand(rhs);
1736 fs = normalize(rounding_mode, lost_fraction);
1737 if (lost_fraction != lfExactlyZero)
1738 fs = (opStatus) (fs | opInexact);
1744 /* Normalized remainder. This is not currently correct in all cases. */
1746 APFloat::remainder(const APFloat &rhs)
1750 unsigned int origSign = sign;
1752 fs = V.divide(rhs, rmNearestTiesToEven);
1753 if (fs == opDivByZero)
1756 int parts = partCount();
1757 integerPart *x = new integerPart[parts];
1759 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1760 rmNearestTiesToEven, &ignored);
1761 if (fs==opInvalidOp)
1764 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1765 rmNearestTiesToEven);
1766 assert(fs==opOK); // should always work
1768 fs = V.multiply(rhs, rmNearestTiesToEven);
1769 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1771 fs = subtract(V, rmNearestTiesToEven);
1772 assert(fs==opOK || fs==opInexact); // likewise
1775 sign = origSign; // IEEE754 requires this
1780 /* Normalized llvm frem (C fmod).
1781 This is not currently correct in all cases. */
1783 APFloat::mod(const APFloat &rhs)
1786 fs = modSpecials(rhs);
1788 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1790 unsigned int origSign = sign;
1792 fs = V.divide(rhs, rmNearestTiesToEven);
1793 if (fs == opDivByZero)
1796 int parts = partCount();
1797 integerPart *x = new integerPart[parts];
1799 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1800 rmTowardZero, &ignored);
1801 if (fs==opInvalidOp)
1804 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1805 rmNearestTiesToEven);
1806 assert(fs==opOK); // should always work
1808 fs = V.multiply(rhs, rmNearestTiesToEven);
1809 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1811 fs = subtract(V, rmNearestTiesToEven);
1812 assert(fs==opOK || fs==opInexact); // likewise
1815 sign = origSign; // IEEE754 requires this
1821 /* Normalized fused-multiply-add. */
1823 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1824 const APFloat &addend,
1825 roundingMode rounding_mode)
1829 /* Post-multiplication sign, before addition. */
1830 sign ^= multiplicand.sign;
1832 /* If and only if all arguments are normal do we need to do an
1833 extended-precision calculation. */
1834 if (isFiniteNonZero() &&
1835 multiplicand.isFiniteNonZero() &&
1836 addend.isFinite()) {
1837 lostFraction lost_fraction;
1839 lost_fraction = multiplySignificand(multiplicand, &addend);
1840 fs = normalize(rounding_mode, lost_fraction);
1841 if (lost_fraction != lfExactlyZero)
1842 fs = (opStatus) (fs | opInexact);
1844 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1845 positive zero unless rounding to minus infinity, except that
1846 adding two like-signed zeroes gives that zero. */
1847 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1848 sign = (rounding_mode == rmTowardNegative);
1850 fs = multiplySpecials(multiplicand);
1852 /* FS can only be opOK or opInvalidOp. There is no more work
1853 to do in the latter case. The IEEE-754R standard says it is
1854 implementation-defined in this case whether, if ADDEND is a
1855 quiet NaN, we raise invalid op; this implementation does so.
1857 If we need to do the addition we can do so with normal
1860 fs = addOrSubtract(addend, rounding_mode, false);
1866 /* Rounding-mode corrrect round to integral value. */
1867 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1870 // If the exponent is large enough, we know that this value is already
1871 // integral, and the arithmetic below would potentially cause it to saturate
1872 // to +/-Inf. Bail out early instead.
1873 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1876 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1877 // precision of our format, and then subtract it back off again. The choice
1878 // of rounding modes for the addition/subtraction determines the rounding mode
1879 // for our integral rounding as well.
1880 // NOTE: When the input value is negative, we do subtraction followed by
1881 // addition instead.
1882 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1883 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1884 APFloat MagicConstant(*semantics);
1885 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1886 rmNearestTiesToEven);
1887 MagicConstant.copySign(*this);
1892 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1893 bool inputSign = isNegative();
1895 fs = add(MagicConstant, rounding_mode);
1896 if (fs != opOK && fs != opInexact)
1899 fs = subtract(MagicConstant, rounding_mode);
1901 // Restore the input sign.
1902 if (inputSign != isNegative())
1909 /* Comparison requires normalized numbers. */
1911 APFloat::compare(const APFloat &rhs) const
1915 assert(semantics == rhs.semantics);
1917 switch (PackCategoriesIntoKey(category, rhs.category)) {
1919 llvm_unreachable(nullptr);
1921 case PackCategoriesIntoKey(fcNaN, fcZero):
1922 case PackCategoriesIntoKey(fcNaN, fcNormal):
1923 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1924 case PackCategoriesIntoKey(fcNaN, fcNaN):
1925 case PackCategoriesIntoKey(fcZero, fcNaN):
1926 case PackCategoriesIntoKey(fcNormal, fcNaN):
1927 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1928 return cmpUnordered;
1930 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1931 case PackCategoriesIntoKey(fcInfinity, fcZero):
1932 case PackCategoriesIntoKey(fcNormal, fcZero):
1936 return cmpGreaterThan;
1938 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1939 case PackCategoriesIntoKey(fcZero, fcInfinity):
1940 case PackCategoriesIntoKey(fcZero, fcNormal):
1942 return cmpGreaterThan;
1946 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1947 if (sign == rhs.sign)
1952 return cmpGreaterThan;
1954 case PackCategoriesIntoKey(fcZero, fcZero):
1957 case PackCategoriesIntoKey(fcNormal, fcNormal):
1961 /* Two normal numbers. Do they have the same sign? */
1962 if (sign != rhs.sign) {
1964 result = cmpLessThan;
1966 result = cmpGreaterThan;
1968 /* Compare absolute values; invert result if negative. */
1969 result = compareAbsoluteValue(rhs);
1972 if (result == cmpLessThan)
1973 result = cmpGreaterThan;
1974 else if (result == cmpGreaterThan)
1975 result = cmpLessThan;
1982 /// APFloat::convert - convert a value of one floating point type to another.
1983 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1984 /// records whether the transformation lost information, i.e. whether
1985 /// converting the result back to the original type will produce the
1986 /// original value (this is almost the same as return value==fsOK, but there
1987 /// are edge cases where this is not so).
1990 APFloat::convert(const fltSemantics &toSemantics,
1991 roundingMode rounding_mode, bool *losesInfo)
1993 lostFraction lostFraction;
1994 unsigned int newPartCount, oldPartCount;
1997 const fltSemantics &fromSemantics = *semantics;
1999 lostFraction = lfExactlyZero;
2000 newPartCount = partCountForBits(toSemantics.precision + 1);
2001 oldPartCount = partCount();
2002 shift = toSemantics.precision - fromSemantics.precision;
2004 bool X86SpecialNan = false;
2005 if (&fromSemantics == &APFloat::x87DoubleExtended &&
2006 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
2007 (!(*significandParts() & 0x8000000000000000ULL) ||
2008 !(*significandParts() & 0x4000000000000000ULL))) {
2009 // x86 has some unusual NaNs which cannot be represented in any other
2010 // format; note them here.
2011 X86SpecialNan = true;
2014 // If this is a truncation of a denormal number, and the target semantics
2015 // has larger exponent range than the source semantics (this can happen
2016 // when truncating from PowerPC double-double to double format), the
2017 // right shift could lose result mantissa bits. Adjust exponent instead
2018 // of performing excessive shift.
2019 if (shift < 0 && isFiniteNonZero()) {
2020 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2021 if (exponent + exponentChange < toSemantics.minExponent)
2022 exponentChange = toSemantics.minExponent - exponent;
2023 if (exponentChange < shift)
2024 exponentChange = shift;
2025 if (exponentChange < 0) {
2026 shift -= exponentChange;
2027 exponent += exponentChange;
2031 // If this is a truncation, perform the shift before we narrow the storage.
2032 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2033 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2035 // Fix the storage so it can hold to new value.
2036 if (newPartCount > oldPartCount) {
2037 // The new type requires more storage; make it available.
2038 integerPart *newParts;
2039 newParts = new integerPart[newPartCount];
2040 APInt::tcSet(newParts, 0, newPartCount);
2041 if (isFiniteNonZero() || category==fcNaN)
2042 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2044 significand.parts = newParts;
2045 } else if (newPartCount == 1 && oldPartCount != 1) {
2046 // Switch to built-in storage for a single part.
2047 integerPart newPart = 0;
2048 if (isFiniteNonZero() || category==fcNaN)
2049 newPart = significandParts()[0];
2051 significand.part = newPart;
2054 // Now that we have the right storage, switch the semantics.
2055 semantics = &toSemantics;
2057 // If this is an extension, perform the shift now that the storage is
2059 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2060 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2062 if (isFiniteNonZero()) {
2063 fs = normalize(rounding_mode, lostFraction);
2064 *losesInfo = (fs != opOK);
2065 } else if (category == fcNaN) {
2066 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2068 // For x87 extended precision, we want to make a NaN, not a special NaN if
2069 // the input wasn't special either.
2070 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2071 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2073 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2074 // does not give you back the same bits. This is dubious, and we
2075 // don't currently do it. You're really supposed to get
2076 // an invalid operation signal at runtime, but nobody does that.
2086 /* Convert a floating point number to an integer according to the
2087 rounding mode. If the rounded integer value is out of range this
2088 returns an invalid operation exception and the contents of the
2089 destination parts are unspecified. If the rounded value is in
2090 range but the floating point number is not the exact integer, the C
2091 standard doesn't require an inexact exception to be raised. IEEE
2092 854 does require it so we do that.
2094 Note that for conversions to integer type the C standard requires
2095 round-to-zero to always be used. */
2097 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2099 roundingMode rounding_mode,
2100 bool *isExact) const
2102 lostFraction lost_fraction;
2103 const integerPart *src;
2104 unsigned int dstPartsCount, truncatedBits;
2108 /* Handle the three special cases first. */
2109 if (category == fcInfinity || category == fcNaN)
2112 dstPartsCount = partCountForBits(width);
2114 if (category == fcZero) {
2115 APInt::tcSet(parts, 0, dstPartsCount);
2116 // Negative zero can't be represented as an int.
2121 src = significandParts();
2123 /* Step 1: place our absolute value, with any fraction truncated, in
2126 /* Our absolute value is less than one; truncate everything. */
2127 APInt::tcSet(parts, 0, dstPartsCount);
2128 /* For exponent -1 the integer bit represents .5, look at that.
2129 For smaller exponents leftmost truncated bit is 0. */
2130 truncatedBits = semantics->precision -1U - exponent;
2132 /* We want the most significant (exponent + 1) bits; the rest are
2134 unsigned int bits = exponent + 1U;
2136 /* Hopelessly large in magnitude? */
2140 if (bits < semantics->precision) {
2141 /* We truncate (semantics->precision - bits) bits. */
2142 truncatedBits = semantics->precision - bits;
2143 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2145 /* We want at least as many bits as are available. */
2146 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2147 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2152 /* Step 2: work out any lost fraction, and increment the absolute
2153 value if we would round away from zero. */
2154 if (truncatedBits) {
2155 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2157 if (lost_fraction != lfExactlyZero &&
2158 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2159 if (APInt::tcIncrement(parts, dstPartsCount))
2160 return opInvalidOp; /* Overflow. */
2163 lost_fraction = lfExactlyZero;
2166 /* Step 3: check if we fit in the destination. */
2167 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2171 /* Negative numbers cannot be represented as unsigned. */
2175 /* It takes omsb bits to represent the unsigned integer value.
2176 We lose a bit for the sign, but care is needed as the
2177 maximally negative integer is a special case. */
2178 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2181 /* This case can happen because of rounding. */
2186 APInt::tcNegate (parts, dstPartsCount);
2188 if (omsb >= width + !isSigned)
2192 if (lost_fraction == lfExactlyZero) {
2199 /* Same as convertToSignExtendedInteger, except we provide
2200 deterministic values in case of an invalid operation exception,
2201 namely zero for NaNs and the minimal or maximal value respectively
2202 for underflow or overflow.
2203 The *isExact output tells whether the result is exact, in the sense
2204 that converting it back to the original floating point type produces
2205 the original value. This is almost equivalent to result==opOK,
2206 except for negative zeroes.
2209 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2211 roundingMode rounding_mode, bool *isExact) const
2215 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2218 if (fs == opInvalidOp) {
2219 unsigned int bits, dstPartsCount;
2221 dstPartsCount = partCountForBits(width);
2223 if (category == fcNaN)
2228 bits = width - isSigned;
2230 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2231 if (sign && isSigned)
2232 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2238 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2239 an APSInt, whose initial bit-width and signed-ness are used to determine the
2240 precision of the conversion.
2243 APFloat::convertToInteger(APSInt &result,
2244 roundingMode rounding_mode, bool *isExact) const
2246 unsigned bitWidth = result.getBitWidth();
2247 SmallVector<uint64_t, 4> parts(result.getNumWords());
2248 opStatus status = convertToInteger(
2249 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2250 // Keeps the original signed-ness.
2251 result = APInt(bitWidth, parts);
2255 /* Convert an unsigned integer SRC to a floating point number,
2256 rounding according to ROUNDING_MODE. The sign of the floating
2257 point number is not modified. */
2259 APFloat::convertFromUnsignedParts(const integerPart *src,
2260 unsigned int srcCount,
2261 roundingMode rounding_mode)
2263 unsigned int omsb, precision, dstCount;
2265 lostFraction lost_fraction;
2267 category = fcNormal;
2268 omsb = APInt::tcMSB(src, srcCount) + 1;
2269 dst = significandParts();
2270 dstCount = partCount();
2271 precision = semantics->precision;
2273 /* We want the most significant PRECISION bits of SRC. There may not
2274 be that many; extract what we can. */
2275 if (precision <= omsb) {
2276 exponent = omsb - 1;
2277 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2279 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2281 exponent = precision - 1;
2282 lost_fraction = lfExactlyZero;
2283 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2286 return normalize(rounding_mode, lost_fraction);
2290 APFloat::convertFromAPInt(const APInt &Val,
2292 roundingMode rounding_mode)
2294 unsigned int partCount = Val.getNumWords();
2298 if (isSigned && api.isNegative()) {
2303 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2306 /* Convert a two's complement integer SRC to a floating point number,
2307 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2308 integer is signed, in which case it must be sign-extended. */
2310 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2311 unsigned int srcCount,
2313 roundingMode rounding_mode)
2318 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2321 /* If we're signed and negative negate a copy. */
2323 copy = new integerPart[srcCount];
2324 APInt::tcAssign(copy, src, srcCount);
2325 APInt::tcNegate(copy, srcCount);
2326 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2330 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2336 /* FIXME: should this just take a const APInt reference? */
2338 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2339 unsigned int width, bool isSigned,
2340 roundingMode rounding_mode)
2342 unsigned int partCount = partCountForBits(width);
2343 APInt api = APInt(width, makeArrayRef(parts, partCount));
2346 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2351 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2355 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2357 lostFraction lost_fraction = lfExactlyZero;
2359 category = fcNormal;
2363 integerPart *significand = significandParts();
2364 unsigned partsCount = partCount();
2365 unsigned bitPos = partsCount * integerPartWidth;
2366 bool computedTrailingFraction = false;
2368 // Skip leading zeroes and any (hexa)decimal point.
2369 StringRef::iterator begin = s.begin();
2370 StringRef::iterator end = s.end();
2371 StringRef::iterator dot;
2372 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2373 StringRef::iterator firstSignificantDigit = p;
2376 integerPart hex_value;
2379 assert(dot == end && "String contains multiple dots");
2384 hex_value = hexDigitValue(*p);
2385 if (hex_value == -1U)
2390 // Store the number while we have space.
2393 hex_value <<= bitPos % integerPartWidth;
2394 significand[bitPos / integerPartWidth] |= hex_value;
2395 } else if (!computedTrailingFraction) {
2396 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2397 computedTrailingFraction = true;
2401 /* Hex floats require an exponent but not a hexadecimal point. */
2402 assert(p != end && "Hex strings require an exponent");
2403 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2404 assert(p != begin && "Significand has no digits");
2405 assert((dot == end || p - begin != 1) && "Significand has no digits");
2407 /* Ignore the exponent if we are zero. */
2408 if (p != firstSignificantDigit) {
2411 /* Implicit hexadecimal point? */
2415 /* Calculate the exponent adjustment implicit in the number of
2416 significant digits. */
2417 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2418 if (expAdjustment < 0)
2420 expAdjustment = expAdjustment * 4 - 1;
2422 /* Adjust for writing the significand starting at the most
2423 significant nibble. */
2424 expAdjustment += semantics->precision;
2425 expAdjustment -= partsCount * integerPartWidth;
2427 /* Adjust for the given exponent. */
2428 exponent = totalExponent(p + 1, end, expAdjustment);
2431 return normalize(rounding_mode, lost_fraction);
2435 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2436 unsigned sigPartCount, int exp,
2437 roundingMode rounding_mode)
2439 unsigned int parts, pow5PartCount;
2440 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2441 integerPart pow5Parts[maxPowerOfFiveParts];
2444 isNearest = (rounding_mode == rmNearestTiesToEven ||
2445 rounding_mode == rmNearestTiesToAway);
2447 parts = partCountForBits(semantics->precision + 11);
2449 /* Calculate pow(5, abs(exp)). */
2450 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2452 for (;; parts *= 2) {
2453 opStatus sigStatus, powStatus;
2454 unsigned int excessPrecision, truncatedBits;
2456 calcSemantics.precision = parts * integerPartWidth - 1;
2457 excessPrecision = calcSemantics.precision - semantics->precision;
2458 truncatedBits = excessPrecision;
2460 APFloat decSig = APFloat::getZero(calcSemantics, sign);
2461 APFloat pow5(calcSemantics);
2463 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2464 rmNearestTiesToEven);
2465 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2466 rmNearestTiesToEven);
2467 /* Add exp, as 10^n = 5^n * 2^n. */
2468 decSig.exponent += exp;
2470 lostFraction calcLostFraction;
2471 integerPart HUerr, HUdistance;
2472 unsigned int powHUerr;
2475 /* multiplySignificand leaves the precision-th bit set to 1. */
2476 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2477 powHUerr = powStatus != opOK;
2479 calcLostFraction = decSig.divideSignificand(pow5);
2480 /* Denormal numbers have less precision. */
2481 if (decSig.exponent < semantics->minExponent) {
2482 excessPrecision += (semantics->minExponent - decSig.exponent);
2483 truncatedBits = excessPrecision;
2484 if (excessPrecision > calcSemantics.precision)
2485 excessPrecision = calcSemantics.precision;
2487 /* Extra half-ulp lost in reciprocal of exponent. */
2488 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2491 /* Both multiplySignificand and divideSignificand return the
2492 result with the integer bit set. */
2493 assert(APInt::tcExtractBit
2494 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2496 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2498 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2499 excessPrecision, isNearest);
2501 /* Are we guaranteed to round correctly if we truncate? */
2502 if (HUdistance >= HUerr) {
2503 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2504 calcSemantics.precision - excessPrecision,
2506 /* Take the exponent of decSig. If we tcExtract-ed less bits
2507 above we must adjust our exponent to compensate for the
2508 implicit right shift. */
2509 exponent = (decSig.exponent + semantics->precision
2510 - (calcSemantics.precision - excessPrecision));
2511 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2514 return normalize(rounding_mode, calcLostFraction);
2520 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2525 /* Scan the text. */
2526 StringRef::iterator p = str.begin();
2527 interpretDecimal(p, str.end(), &D);
2529 /* Handle the quick cases. First the case of no significant digits,
2530 i.e. zero, and then exponents that are obviously too large or too
2531 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2532 definitely overflows if
2534 (exp - 1) * L >= maxExponent
2536 and definitely underflows to zero where
2538 (exp + 1) * L <= minExponent - precision
2540 With integer arithmetic the tightest bounds for L are
2542 93/28 < L < 196/59 [ numerator <= 256 ]
2543 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2546 // Test if we have a zero number allowing for strings with no null terminators
2547 // and zero decimals with non-zero exponents.
2549 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2550 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2551 // be at most one dot. On the other hand, if we have a zero with a non-zero
2552 // exponent, then we know that D.firstSigDigit will be non-numeric.
2553 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2557 /* Check whether the normalized exponent is high enough to overflow
2558 max during the log-rebasing in the max-exponent check below. */
2559 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2560 fs = handleOverflow(rounding_mode);
2562 /* If it wasn't, then it also wasn't high enough to overflow max
2563 during the log-rebasing in the min-exponent check. Check that it
2564 won't overflow min in either check, then perform the min-exponent
2566 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2567 (D.normalizedExponent + 1) * 28738 <=
2568 8651 * (semantics->minExponent - (int) semantics->precision)) {
2569 /* Underflow to zero and round. */
2570 category = fcNormal;
2572 fs = normalize(rounding_mode, lfLessThanHalf);
2574 /* We can finally safely perform the max-exponent check. */
2575 } else if ((D.normalizedExponent - 1) * 42039
2576 >= 12655 * semantics->maxExponent) {
2577 /* Overflow and round. */
2578 fs = handleOverflow(rounding_mode);
2580 integerPart *decSignificand;
2581 unsigned int partCount;
2583 /* A tight upper bound on number of bits required to hold an
2584 N-digit decimal integer is N * 196 / 59. Allocate enough space
2585 to hold the full significand, and an extra part required by
2587 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2588 partCount = partCountForBits(1 + 196 * partCount / 59);
2589 decSignificand = new integerPart[partCount + 1];
2592 /* Convert to binary efficiently - we do almost all multiplication
2593 in an integerPart. When this would overflow do we do a single
2594 bignum multiplication, and then revert again to multiplication
2595 in an integerPart. */
2597 integerPart decValue, val, multiplier;
2605 if (p == str.end()) {
2609 decValue = decDigitValue(*p++);
2610 assert(decValue < 10U && "Invalid character in significand");
2612 val = val * 10 + decValue;
2613 /* The maximum number that can be multiplied by ten with any
2614 digit added without overflowing an integerPart. */
2615 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2617 /* Multiply out the current part. */
2618 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2619 partCount, partCount + 1, false);
2621 /* If we used another part (likely but not guaranteed), increase
2623 if (decSignificand[partCount])
2625 } while (p <= D.lastSigDigit);
2627 category = fcNormal;
2628 fs = roundSignificandWithExponent(decSignificand, partCount,
2629 D.exponent, rounding_mode);
2631 delete [] decSignificand;
2638 APFloat::convertFromStringSpecials(StringRef str) {
2639 if (str.equals("inf") || str.equals("INFINITY")) {
2644 if (str.equals("-inf") || str.equals("-INFINITY")) {
2649 if (str.equals("nan") || str.equals("NaN")) {
2650 makeNaN(false, false);
2654 if (str.equals("-nan") || str.equals("-NaN")) {
2655 makeNaN(false, true);
2663 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2665 assert(!str.empty() && "Invalid string length");
2667 // Handle special cases.
2668 if (convertFromStringSpecials(str))
2671 /* Handle a leading minus sign. */
2672 StringRef::iterator p = str.begin();
2673 size_t slen = str.size();
2674 sign = *p == '-' ? 1 : 0;
2675 if (*p == '-' || *p == '+') {
2678 assert(slen && "String has no digits");
2681 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2682 assert(slen - 2 && "Invalid string");
2683 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2687 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2690 /* Write out a hexadecimal representation of the floating point value
2691 to DST, which must be of sufficient size, in the C99 form
2692 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2693 excluding the terminating NUL.
2695 If UPPERCASE, the output is in upper case, otherwise in lower case.
2697 HEXDIGITS digits appear altogether, rounding the value if
2698 necessary. If HEXDIGITS is 0, the minimal precision to display the
2699 number precisely is used instead. If nothing would appear after
2700 the decimal point it is suppressed.
2702 The decimal exponent is always printed and has at least one digit.
2703 Zero values display an exponent of zero. Infinities and NaNs
2704 appear as "infinity" or "nan" respectively.
2706 The above rules are as specified by C99. There is ambiguity about
2707 what the leading hexadecimal digit should be. This implementation
2708 uses whatever is necessary so that the exponent is displayed as
2709 stored. This implies the exponent will fall within the IEEE format
2710 range, and the leading hexadecimal digit will be 0 (for denormals),
2711 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2712 any other digits zero).
2715 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2716 bool upperCase, roundingMode rounding_mode) const
2726 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2727 dst += sizeof infinityL - 1;
2731 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2732 dst += sizeof NaNU - 1;
2737 *dst++ = upperCase ? 'X': 'x';
2739 if (hexDigits > 1) {
2741 memset (dst, '0', hexDigits - 1);
2742 dst += hexDigits - 1;
2744 *dst++ = upperCase ? 'P': 'p';
2749 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2755 return static_cast<unsigned int>(dst - p);
2758 /* Does the hard work of outputting the correctly rounded hexadecimal
2759 form of a normal floating point number with the specified number of
2760 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2761 digits necessary to print the value precisely is output. */
2763 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2765 roundingMode rounding_mode) const
2767 unsigned int count, valueBits, shift, partsCount, outputDigits;
2768 const char *hexDigitChars;
2769 const integerPart *significand;
2774 *dst++ = upperCase ? 'X': 'x';
2777 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2779 significand = significandParts();
2780 partsCount = partCount();
2782 /* +3 because the first digit only uses the single integer bit, so
2783 we have 3 virtual zero most-significant-bits. */
2784 valueBits = semantics->precision + 3;
2785 shift = integerPartWidth - valueBits % integerPartWidth;
2787 /* The natural number of digits required ignoring trailing
2788 insignificant zeroes. */
2789 outputDigits = (valueBits - significandLSB () + 3) / 4;
2791 /* hexDigits of zero means use the required number for the
2792 precision. Otherwise, see if we are truncating. If we are,
2793 find out if we need to round away from zero. */
2795 if (hexDigits < outputDigits) {
2796 /* We are dropping non-zero bits, so need to check how to round.
2797 "bits" is the number of dropped bits. */
2799 lostFraction fraction;
2801 bits = valueBits - hexDigits * 4;
2802 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2803 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2805 outputDigits = hexDigits;
2808 /* Write the digits consecutively, and start writing in the location
2809 of the hexadecimal point. We move the most significant digit
2810 left and add the hexadecimal point later. */
2813 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2815 while (outputDigits && count) {
2818 /* Put the most significant integerPartWidth bits in "part". */
2819 if (--count == partsCount)
2820 part = 0; /* An imaginary higher zero part. */
2822 part = significand[count] << shift;
2825 part |= significand[count - 1] >> (integerPartWidth - shift);
2827 /* Convert as much of "part" to hexdigits as we can. */
2828 unsigned int curDigits = integerPartWidth / 4;
2830 if (curDigits > outputDigits)
2831 curDigits = outputDigits;
2832 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2833 outputDigits -= curDigits;
2839 /* Note that hexDigitChars has a trailing '0'. */
2842 *q = hexDigitChars[hexDigitValue (*q) + 1];
2843 } while (*q == '0');
2846 /* Add trailing zeroes. */
2847 memset (dst, '0', outputDigits);
2848 dst += outputDigits;
2851 /* Move the most significant digit to before the point, and if there
2852 is something after the decimal point add it. This must come
2853 after rounding above. */
2860 /* Finally output the exponent. */
2861 *dst++ = upperCase ? 'P': 'p';
2863 return writeSignedDecimal (dst, exponent);
2866 hash_code llvm::hash_value(const APFloat &Arg) {
2867 if (!Arg.isFiniteNonZero())
2868 return hash_combine((uint8_t)Arg.category,
2869 // NaN has no sign, fix it at zero.
2870 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2871 Arg.semantics->precision);
2873 // Normal floats need their exponent and significand hashed.
2874 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2875 Arg.semantics->precision, Arg.exponent,
2877 Arg.significandParts(),
2878 Arg.significandParts() + Arg.partCount()));
2881 // Conversion from APFloat to/from host float/double. It may eventually be
2882 // possible to eliminate these and have everybody deal with APFloats, but that
2883 // will take a while. This approach will not easily extend to long double.
2884 // Current implementation requires integerPartWidth==64, which is correct at
2885 // the moment but could be made more general.
2887 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2888 // the actual IEEE respresentations. We compensate for that here.
2891 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2893 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2894 assert(partCount()==2);
2896 uint64_t myexponent, mysignificand;
2898 if (isFiniteNonZero()) {
2899 myexponent = exponent+16383; //bias
2900 mysignificand = significandParts()[0];
2901 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2902 myexponent = 0; // denormal
2903 } else if (category==fcZero) {
2906 } else if (category==fcInfinity) {
2907 myexponent = 0x7fff;
2908 mysignificand = 0x8000000000000000ULL;
2910 assert(category == fcNaN && "Unknown category");
2911 myexponent = 0x7fff;
2912 mysignificand = significandParts()[0];
2916 words[0] = mysignificand;
2917 words[1] = ((uint64_t)(sign & 1) << 15) |
2918 (myexponent & 0x7fffLL);
2919 return APInt(80, words);
2923 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2925 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2926 assert(partCount()==2);
2932 // Convert number to double. To avoid spurious underflows, we re-
2933 // normalize against the "double" minExponent first, and only *then*
2934 // truncate the mantissa. The result of that second conversion
2935 // may be inexact, but should never underflow.
2936 // Declare fltSemantics before APFloat that uses it (and
2937 // saves pointer to it) to ensure correct destruction order.
2938 fltSemantics extendedSemantics = *semantics;
2939 extendedSemantics.minExponent = IEEEdouble.minExponent;
2940 APFloat extended(*this);
2941 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2942 assert(fs == opOK && !losesInfo);
2945 APFloat u(extended);
2946 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2947 assert(fs == opOK || fs == opInexact);
2949 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2951 // If conversion was exact or resulted in a special case, we're done;
2952 // just set the second double to zero. Otherwise, re-convert back to
2953 // the extended format and compute the difference. This now should
2954 // convert exactly to double.
2955 if (u.isFiniteNonZero() && losesInfo) {
2956 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2957 assert(fs == opOK && !losesInfo);
2960 APFloat v(extended);
2961 v.subtract(u, rmNearestTiesToEven);
2962 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2963 assert(fs == opOK && !losesInfo);
2965 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2970 return APInt(128, words);
2974 APFloat::convertQuadrupleAPFloatToAPInt() const
2976 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2977 assert(partCount()==2);
2979 uint64_t myexponent, mysignificand, mysignificand2;
2981 if (isFiniteNonZero()) {
2982 myexponent = exponent+16383; //bias
2983 mysignificand = significandParts()[0];
2984 mysignificand2 = significandParts()[1];
2985 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2986 myexponent = 0; // denormal
2987 } else if (category==fcZero) {
2989 mysignificand = mysignificand2 = 0;
2990 } else if (category==fcInfinity) {
2991 myexponent = 0x7fff;
2992 mysignificand = mysignificand2 = 0;
2994 assert(category == fcNaN && "Unknown category!");
2995 myexponent = 0x7fff;
2996 mysignificand = significandParts()[0];
2997 mysignificand2 = significandParts()[1];
3001 words[0] = mysignificand;
3002 words[1] = ((uint64_t)(sign & 1) << 63) |
3003 ((myexponent & 0x7fff) << 48) |
3004 (mysignificand2 & 0xffffffffffffLL);
3006 return APInt(128, words);
3010 APFloat::convertDoubleAPFloatToAPInt() const
3012 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
3013 assert(partCount()==1);
3015 uint64_t myexponent, mysignificand;
3017 if (isFiniteNonZero()) {
3018 myexponent = exponent+1023; //bias
3019 mysignificand = *significandParts();
3020 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3021 myexponent = 0; // denormal
3022 } else if (category==fcZero) {
3025 } else if (category==fcInfinity) {
3029 assert(category == fcNaN && "Unknown category!");
3031 mysignificand = *significandParts();
3034 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3035 ((myexponent & 0x7ff) << 52) |
3036 (mysignificand & 0xfffffffffffffLL))));
3040 APFloat::convertFloatAPFloatToAPInt() const
3042 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3043 assert(partCount()==1);
3045 uint32_t myexponent, mysignificand;
3047 if (isFiniteNonZero()) {
3048 myexponent = exponent+127; //bias
3049 mysignificand = (uint32_t)*significandParts();
3050 if (myexponent == 1 && !(mysignificand & 0x800000))
3051 myexponent = 0; // denormal
3052 } else if (category==fcZero) {
3055 } else if (category==fcInfinity) {
3059 assert(category == fcNaN && "Unknown category!");
3061 mysignificand = (uint32_t)*significandParts();
3064 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3065 (mysignificand & 0x7fffff)));
3069 APFloat::convertHalfAPFloatToAPInt() const
3071 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3072 assert(partCount()==1);
3074 uint32_t myexponent, mysignificand;
3076 if (isFiniteNonZero()) {
3077 myexponent = exponent+15; //bias
3078 mysignificand = (uint32_t)*significandParts();
3079 if (myexponent == 1 && !(mysignificand & 0x400))
3080 myexponent = 0; // denormal
3081 } else if (category==fcZero) {
3084 } else if (category==fcInfinity) {
3088 assert(category == fcNaN && "Unknown category!");
3090 mysignificand = (uint32_t)*significandParts();
3093 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3094 (mysignificand & 0x3ff)));
3097 // This function creates an APInt that is just a bit map of the floating
3098 // point constant as it would appear in memory. It is not a conversion,
3099 // and treating the result as a normal integer is unlikely to be useful.
3102 APFloat::bitcastToAPInt() const
3104 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3105 return convertHalfAPFloatToAPInt();
3107 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3108 return convertFloatAPFloatToAPInt();
3110 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3111 return convertDoubleAPFloatToAPInt();
3113 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3114 return convertQuadrupleAPFloatToAPInt();
3116 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3117 return convertPPCDoubleDoubleAPFloatToAPInt();
3119 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3121 return convertF80LongDoubleAPFloatToAPInt();
3125 APFloat::convertToFloat() const
3127 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3128 "Float semantics are not IEEEsingle");
3129 APInt api = bitcastToAPInt();
3130 return api.bitsToFloat();
3134 APFloat::convertToDouble() const
3136 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3137 "Float semantics are not IEEEdouble");
3138 APInt api = bitcastToAPInt();
3139 return api.bitsToDouble();
3142 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3143 /// does not support these bit patterns:
3144 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3145 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3146 /// exponent = 0, integer bit 1 ("pseudodenormal")
3147 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3148 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3150 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3152 assert(api.getBitWidth()==80);
3153 uint64_t i1 = api.getRawData()[0];
3154 uint64_t i2 = api.getRawData()[1];
3155 uint64_t myexponent = (i2 & 0x7fff);
3156 uint64_t mysignificand = i1;
3158 initialize(&APFloat::x87DoubleExtended);
3159 assert(partCount()==2);
3161 sign = static_cast<unsigned int>(i2>>15);
3162 if (myexponent==0 && mysignificand==0) {
3163 // exponent, significand meaningless
3165 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3166 // exponent, significand meaningless
3167 category = fcInfinity;
3168 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3169 // exponent meaningless
3171 significandParts()[0] = mysignificand;
3172 significandParts()[1] = 0;
3174 category = fcNormal;
3175 exponent = myexponent - 16383;
3176 significandParts()[0] = mysignificand;
3177 significandParts()[1] = 0;
3178 if (myexponent==0) // denormal
3184 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3186 assert(api.getBitWidth()==128);
3187 uint64_t i1 = api.getRawData()[0];
3188 uint64_t i2 = api.getRawData()[1];
3192 // Get the first double and convert to our format.
3193 initFromDoubleAPInt(APInt(64, i1));
3194 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3195 assert(fs == opOK && !losesInfo);
3198 // Unless we have a special case, add in second double.
3199 if (isFiniteNonZero()) {
3200 APFloat v(IEEEdouble, APInt(64, i2));
3201 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3202 assert(fs == opOK && !losesInfo);
3205 add(v, rmNearestTiesToEven);
3210 APFloat::initFromQuadrupleAPInt(const APInt &api)
3212 assert(api.getBitWidth()==128);
3213 uint64_t i1 = api.getRawData()[0];
3214 uint64_t i2 = api.getRawData()[1];
3215 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3216 uint64_t mysignificand = i1;
3217 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3219 initialize(&APFloat::IEEEquad);
3220 assert(partCount()==2);
3222 sign = static_cast<unsigned int>(i2>>63);
3223 if (myexponent==0 &&
3224 (mysignificand==0 && mysignificand2==0)) {
3225 // exponent, significand meaningless
3227 } else if (myexponent==0x7fff &&
3228 (mysignificand==0 && mysignificand2==0)) {
3229 // exponent, significand meaningless
3230 category = fcInfinity;
3231 } else if (myexponent==0x7fff &&
3232 (mysignificand!=0 || mysignificand2 !=0)) {
3233 // exponent meaningless
3235 significandParts()[0] = mysignificand;
3236 significandParts()[1] = mysignificand2;
3238 category = fcNormal;
3239 exponent = myexponent - 16383;
3240 significandParts()[0] = mysignificand;
3241 significandParts()[1] = mysignificand2;
3242 if (myexponent==0) // denormal
3245 significandParts()[1] |= 0x1000000000000LL; // integer bit
3250 APFloat::initFromDoubleAPInt(const APInt &api)
3252 assert(api.getBitWidth()==64);
3253 uint64_t i = *api.getRawData();
3254 uint64_t myexponent = (i >> 52) & 0x7ff;
3255 uint64_t mysignificand = i & 0xfffffffffffffLL;
3257 initialize(&APFloat::IEEEdouble);
3258 assert(partCount()==1);
3260 sign = static_cast<unsigned int>(i>>63);
3261 if (myexponent==0 && mysignificand==0) {
3262 // exponent, significand meaningless
3264 } else if (myexponent==0x7ff && mysignificand==0) {
3265 // exponent, significand meaningless
3266 category = fcInfinity;
3267 } else if (myexponent==0x7ff && mysignificand!=0) {
3268 // exponent meaningless
3270 *significandParts() = mysignificand;
3272 category = fcNormal;
3273 exponent = myexponent - 1023;
3274 *significandParts() = mysignificand;
3275 if (myexponent==0) // denormal
3278 *significandParts() |= 0x10000000000000LL; // integer bit
3283 APFloat::initFromFloatAPInt(const APInt & api)
3285 assert(api.getBitWidth()==32);
3286 uint32_t i = (uint32_t)*api.getRawData();
3287 uint32_t myexponent = (i >> 23) & 0xff;
3288 uint32_t mysignificand = i & 0x7fffff;
3290 initialize(&APFloat::IEEEsingle);
3291 assert(partCount()==1);
3294 if (myexponent==0 && mysignificand==0) {
3295 // exponent, significand meaningless
3297 } else if (myexponent==0xff && mysignificand==0) {
3298 // exponent, significand meaningless
3299 category = fcInfinity;
3300 } else if (myexponent==0xff && mysignificand!=0) {
3301 // sign, exponent, significand meaningless
3303 *significandParts() = mysignificand;
3305 category = fcNormal;
3306 exponent = myexponent - 127; //bias
3307 *significandParts() = mysignificand;
3308 if (myexponent==0) // denormal
3311 *significandParts() |= 0x800000; // integer bit
3316 APFloat::initFromHalfAPInt(const APInt & api)
3318 assert(api.getBitWidth()==16);
3319 uint32_t i = (uint32_t)*api.getRawData();
3320 uint32_t myexponent = (i >> 10) & 0x1f;
3321 uint32_t mysignificand = i & 0x3ff;
3323 initialize(&APFloat::IEEEhalf);
3324 assert(partCount()==1);
3327 if (myexponent==0 && mysignificand==0) {
3328 // exponent, significand meaningless
3330 } else if (myexponent==0x1f && mysignificand==0) {
3331 // exponent, significand meaningless
3332 category = fcInfinity;
3333 } else if (myexponent==0x1f && mysignificand!=0) {
3334 // sign, exponent, significand meaningless
3336 *significandParts() = mysignificand;
3338 category = fcNormal;
3339 exponent = myexponent - 15; //bias
3340 *significandParts() = mysignificand;
3341 if (myexponent==0) // denormal
3344 *significandParts() |= 0x400; // integer bit
3348 /// Treat api as containing the bits of a floating point number. Currently
3349 /// we infer the floating point type from the size of the APInt. The
3350 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3351 /// when the size is anything else).
3353 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3355 if (Sem == &IEEEhalf)
3356 return initFromHalfAPInt(api);
3357 if (Sem == &IEEEsingle)
3358 return initFromFloatAPInt(api);
3359 if (Sem == &IEEEdouble)
3360 return initFromDoubleAPInt(api);
3361 if (Sem == &x87DoubleExtended)
3362 return initFromF80LongDoubleAPInt(api);
3363 if (Sem == &IEEEquad)
3364 return initFromQuadrupleAPInt(api);
3365 if (Sem == &PPCDoubleDouble)
3366 return initFromPPCDoubleDoubleAPInt(api);
3368 llvm_unreachable(nullptr);
3372 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3376 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3378 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3380 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3382 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3385 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3386 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3388 llvm_unreachable("Unknown floating bit width");
3392 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
3393 return Sem.sizeInBits;
3396 /// Make this number the largest magnitude normal number in the given
3398 void APFloat::makeLargest(bool Negative) {
3399 // We want (in interchange format):
3400 // sign = {Negative}
3402 // significand = 1..1
3403 category = fcNormal;
3405 exponent = semantics->maxExponent;
3407 // Use memset to set all but the highest integerPart to all ones.
3408 integerPart *significand = significandParts();
3409 unsigned PartCount = partCount();
3410 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3412 // Set the high integerPart especially setting all unused top bits for
3413 // internal consistency.
3414 const unsigned NumUnusedHighBits =
3415 PartCount*integerPartWidth - semantics->precision;
3416 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3417 ? (~integerPart(0) >> NumUnusedHighBits)
3421 /// Make this number the smallest magnitude denormal number in the given
3423 void APFloat::makeSmallest(bool Negative) {
3424 // We want (in interchange format):
3425 // sign = {Negative}
3427 // significand = 0..01
3428 category = fcNormal;
3430 exponent = semantics->minExponent;
3431 APInt::tcSet(significandParts(), 1, partCount());
3435 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3436 // We want (in interchange format):
3437 // sign = {Negative}
3439 // significand = 1..1
3440 APFloat Val(Sem, uninitialized);
3441 Val.makeLargest(Negative);
3445 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3446 // We want (in interchange format):
3447 // sign = {Negative}
3449 // significand = 0..01
3450 APFloat Val(Sem, uninitialized);
3451 Val.makeSmallest(Negative);
3455 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3456 APFloat Val(Sem, uninitialized);
3458 // We want (in interchange format):
3459 // sign = {Negative}
3461 // significand = 10..0
3463 Val.category = fcNormal;
3464 Val.zeroSignificand();
3465 Val.sign = Negative;
3466 Val.exponent = Sem.minExponent;
3467 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3468 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3473 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3474 initFromAPInt(&Sem, API);
3477 APFloat::APFloat(float f) {
3478 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3481 APFloat::APFloat(double d) {
3482 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3486 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3487 Buffer.append(Str.begin(), Str.end());
3490 /// Removes data from the given significand until it is no more
3491 /// precise than is required for the desired precision.
3492 void AdjustToPrecision(APInt &significand,
3493 int &exp, unsigned FormatPrecision) {
3494 unsigned bits = significand.getActiveBits();
3496 // 196/59 is a very slight overestimate of lg_2(10).
3497 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3499 if (bits <= bitsRequired) return;
3501 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3502 if (!tensRemovable) return;
3504 exp += tensRemovable;
3506 APInt divisor(significand.getBitWidth(), 1);
3507 APInt powten(significand.getBitWidth(), 10);
3509 if (tensRemovable & 1)
3511 tensRemovable >>= 1;
3512 if (!tensRemovable) break;
3516 significand = significand.udiv(divisor);
3518 // Truncate the significand down to its active bit count.
3519 significand = significand.trunc(significand.getActiveBits());
3523 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3524 int &exp, unsigned FormatPrecision) {
3525 unsigned N = buffer.size();
3526 if (N <= FormatPrecision) return;
3528 // The most significant figures are the last ones in the buffer.
3529 unsigned FirstSignificant = N - FormatPrecision;
3532 // FIXME: this probably shouldn't use 'round half up'.
3534 // Rounding down is just a truncation, except we also want to drop
3535 // trailing zeros from the new result.
3536 if (buffer[FirstSignificant - 1] < '5') {
3537 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3540 exp += FirstSignificant;
3541 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3545 // Rounding up requires a decimal add-with-carry. If we continue
3546 // the carry, the newly-introduced zeros will just be truncated.
3547 for (unsigned I = FirstSignificant; I != N; ++I) {
3548 if (buffer[I] == '9') {
3556 // If we carried through, we have exactly one digit of precision.
3557 if (FirstSignificant == N) {
3558 exp += FirstSignificant;
3560 buffer.push_back('1');
3564 exp += FirstSignificant;
3565 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3569 void APFloat::toString(SmallVectorImpl<char> &Str,
3570 unsigned FormatPrecision,
3571 unsigned FormatMaxPadding) const {
3575 return append(Str, "-Inf");
3577 return append(Str, "+Inf");
3579 case fcNaN: return append(Str, "NaN");
3585 if (!FormatMaxPadding)
3586 append(Str, "0.0E+0");
3598 // Decompose the number into an APInt and an exponent.
3599 int exp = exponent - ((int) semantics->precision - 1);
3600 APInt significand(semantics->precision,
3601 makeArrayRef(significandParts(),
3602 partCountForBits(semantics->precision)));
3604 // Set FormatPrecision if zero. We want to do this before we
3605 // truncate trailing zeros, as those are part of the precision.
3606 if (!FormatPrecision) {
3607 // We use enough digits so the number can be round-tripped back to an
3608 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3609 // Accurately" by Steele and White.
3610 // FIXME: Using a formula based purely on the precision is conservative;
3611 // we can print fewer digits depending on the actual value being printed.
3613 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3614 FormatPrecision = 2 + semantics->precision * 59 / 196;
3617 // Ignore trailing binary zeros.
3618 int trailingZeros = significand.countTrailingZeros();
3619 exp += trailingZeros;
3620 significand = significand.lshr(trailingZeros);
3622 // Change the exponent from 2^e to 10^e.
3625 } else if (exp > 0) {
3627 significand = significand.zext(semantics->precision + exp);
3628 significand <<= exp;
3630 } else { /* exp < 0 */
3633 // We transform this using the identity:
3634 // (N)(2^-e) == (N)(5^e)(10^-e)
3635 // This means we have to multiply N (the significand) by 5^e.
3636 // To avoid overflow, we have to operate on numbers large
3637 // enough to store N * 5^e:
3638 // log2(N * 5^e) == log2(N) + e * log2(5)
3639 // <= semantics->precision + e * 137 / 59
3640 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3642 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3644 // Multiply significand by 5^e.
3645 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3646 significand = significand.zext(precision);
3647 APInt five_to_the_i(precision, 5);
3649 if (texp & 1) significand *= five_to_the_i;
3653 five_to_the_i *= five_to_the_i;
3657 AdjustToPrecision(significand, exp, FormatPrecision);
3659 SmallVector<char, 256> buffer;
3662 unsigned precision = significand.getBitWidth();
3663 APInt ten(precision, 10);
3664 APInt digit(precision, 0);
3666 bool inTrail = true;
3667 while (significand != 0) {
3668 // digit <- significand % 10
3669 // significand <- significand / 10
3670 APInt::udivrem(significand, ten, significand, digit);
3672 unsigned d = digit.getZExtValue();
3674 // Drop trailing zeros.
3675 if (inTrail && !d) exp++;
3677 buffer.push_back((char) ('0' + d));
3682 assert(!buffer.empty() && "no characters in buffer!");
3684 // Drop down to FormatPrecision.
3685 // TODO: don't do more precise calculations above than are required.
3686 AdjustToPrecision(buffer, exp, FormatPrecision);
3688 unsigned NDigits = buffer.size();
3690 // Check whether we should use scientific notation.
3691 bool FormatScientific;
3692 if (!FormatMaxPadding)
3693 FormatScientific = true;
3698 // But we shouldn't make the number look more precise than it is.
3699 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3700 NDigits + (unsigned) exp > FormatPrecision);
3702 // Power of the most significant digit.
3703 int MSD = exp + (int) (NDigits - 1);
3706 FormatScientific = false;
3708 // 765e-5 == 0.00765
3710 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3715 // Scientific formatting is pretty straightforward.
3716 if (FormatScientific) {
3717 exp += (NDigits - 1);
3719 Str.push_back(buffer[NDigits-1]);
3724 for (unsigned I = 1; I != NDigits; ++I)
3725 Str.push_back(buffer[NDigits-1-I]);
3728 Str.push_back(exp >= 0 ? '+' : '-');
3729 if (exp < 0) exp = -exp;
3730 SmallVector<char, 6> expbuf;
3732 expbuf.push_back((char) ('0' + (exp % 10)));
3735 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3736 Str.push_back(expbuf[E-1-I]);
3740 // Non-scientific, positive exponents.
3742 for (unsigned I = 0; I != NDigits; ++I)
3743 Str.push_back(buffer[NDigits-1-I]);
3744 for (unsigned I = 0; I != (unsigned) exp; ++I)
3749 // Non-scientific, negative exponents.
3751 // The number of digits to the left of the decimal point.
3752 int NWholeDigits = exp + (int) NDigits;
3755 if (NWholeDigits > 0) {
3756 for (; I != (unsigned) NWholeDigits; ++I)
3757 Str.push_back(buffer[NDigits-I-1]);
3760 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3764 for (unsigned Z = 1; Z != NZeros; ++Z)
3768 for (; I != NDigits; ++I)
3769 Str.push_back(buffer[NDigits-I-1]);
3772 bool APFloat::getExactInverse(APFloat *inv) const {
3773 // Special floats and denormals have no exact inverse.
3774 if (!isFiniteNonZero())
3777 // Check that the number is a power of two by making sure that only the
3778 // integer bit is set in the significand.
3779 if (significandLSB() != semantics->precision - 1)
3783 APFloat reciprocal(*semantics, 1ULL);
3784 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3787 // Avoid multiplication with a denormal, it is not safe on all platforms and
3788 // may be slower than a normal division.
3789 if (reciprocal.isDenormal())
3792 assert(reciprocal.isFiniteNonZero() &&
3793 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3801 bool APFloat::isSignaling() const {
3805 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3806 // first bit of the trailing significand being 0.
3807 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3810 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3812 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3813 /// appropriate sign switching before/after the computation.
3814 APFloat::opStatus APFloat::next(bool nextDown) {
3815 // If we are performing nextDown, swap sign so we have -x.
3819 // Compute nextUp(x)
3820 opStatus result = opOK;
3822 // Handle each float category separately.
3825 // nextUp(+inf) = +inf
3828 // nextUp(-inf) = -getLargest()
3832 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3833 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3834 // change the payload.
3835 if (isSignaling()) {
3836 result = opInvalidOp;
3837 // For consistency, propagate the sign of the sNaN to the qNaN.
3838 makeNaN(false, isNegative(), nullptr);
3842 // nextUp(pm 0) = +getSmallest()
3843 makeSmallest(false);
3846 // nextUp(-getSmallest()) = -0
3847 if (isSmallest() && isNegative()) {
3848 APInt::tcSet(significandParts(), 0, partCount());
3854 // nextUp(getLargest()) == INFINITY
3855 if (isLargest() && !isNegative()) {
3856 APInt::tcSet(significandParts(), 0, partCount());
3857 category = fcInfinity;
3858 exponent = semantics->maxExponent + 1;
3862 // nextUp(normal) == normal + inc.
3864 // If we are negative, we need to decrement the significand.
3866 // We only cross a binade boundary that requires adjusting the exponent
3868 // 1. exponent != semantics->minExponent. This implies we are not in the
3869 // smallest binade or are dealing with denormals.
3870 // 2. Our significand excluding the integral bit is all zeros.
3871 bool WillCrossBinadeBoundary =
3872 exponent != semantics->minExponent && isSignificandAllZeros();
3874 // Decrement the significand.
3876 // We always do this since:
3877 // 1. If we are dealing with a non-binade decrement, by definition we
3878 // just decrement the significand.
3879 // 2. If we are dealing with a normal -> normal binade decrement, since
3880 // we have an explicit integral bit the fact that all bits but the
3881 // integral bit are zero implies that subtracting one will yield a
3882 // significand with 0 integral bit and 1 in all other spots. Thus we
3883 // must just adjust the exponent and set the integral bit to 1.
3884 // 3. If we are dealing with a normal -> denormal binade decrement,
3885 // since we set the integral bit to 0 when we represent denormals, we
3886 // just decrement the significand.
3887 integerPart *Parts = significandParts();
3888 APInt::tcDecrement(Parts, partCount());
3890 if (WillCrossBinadeBoundary) {
3891 // Our result is a normal number. Do the following:
3892 // 1. Set the integral bit to 1.
3893 // 2. Decrement the exponent.
3894 APInt::tcSetBit(Parts, semantics->precision - 1);
3898 // If we are positive, we need to increment the significand.
3900 // We only cross a binade boundary that requires adjusting the exponent if
3901 // the input is not a denormal and all of said input's significand bits
3902 // are set. If all of said conditions are true: clear the significand, set
3903 // the integral bit to 1, and increment the exponent. If we have a
3904 // denormal always increment since moving denormals and the numbers in the
3905 // smallest normal binade have the same exponent in our representation.
3906 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3908 if (WillCrossBinadeBoundary) {
3909 integerPart *Parts = significandParts();
3910 APInt::tcSet(Parts, 0, partCount());
3911 APInt::tcSetBit(Parts, semantics->precision - 1);
3912 assert(exponent != semantics->maxExponent &&
3913 "We can not increment an exponent beyond the maxExponent allowed"
3914 " by the given floating point semantics.");
3917 incrementSignificand();
3923 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3931 APFloat::makeInf(bool Negative) {
3932 category = fcInfinity;
3934 exponent = semantics->maxExponent + 1;
3935 APInt::tcSet(significandParts(), 0, partCount());
3939 APFloat::makeZero(bool Negative) {
3942 exponent = semantics->minExponent-1;
3943 APInt::tcSet(significandParts(), 0, partCount());
3946 APFloat llvm::scalbn(APFloat X, int Exp) {
3947 if (X.isInfinity() || X.isZero() || X.isNaN())
3950 auto MaxExp = X.getSemantics().maxExponent;
3951 auto MinExp = X.getSemantics().minExponent;
3952 if (Exp > (MaxExp - X.exponent))
3953 // Overflow saturates to infinity.
3954 return APFloat::getInf(X.getSemantics(), X.isNegative());
3955 if (Exp < (MinExp - X.exponent))
3956 // Underflow saturates to zero.
3957 return APFloat::getZero(X.getSemantics(), X.isNegative());