1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36 /* Assumed in hexadecimal significand parsing, and conversion to
37 hexadecimal strings. */
38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
42 /* Represents floating point arithmetic semantics. */
44 /* The largest E such that 2^E is representable; this matches the
45 definition of IEEE 754. */
46 APFloat::ExponentType maxExponent;
48 /* The smallest E such that 2^E is a normalized number; this
49 matches the definition of IEEE 754. */
50 APFloat::ExponentType minExponent;
52 /* Number of bits in the significand. This includes the integer
54 unsigned int precision;
56 /* Number of bits actually used in the semantics. */
57 unsigned int sizeInBits;
60 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
61 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
62 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
63 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
64 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
65 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
67 /* The PowerPC format consists of two doubles. It does not map cleanly
68 onto the usual format above. It is approximated using twice the
69 mantissa bits. Note that for exponents near the double minimum,
70 we no longer can represent the full 106 mantissa bits, so those
71 will be treated as denormal numbers.
73 FIXME: While this approximation is equivalent to what GCC uses for
74 compile-time arithmetic on PPC double-double numbers, it is not able
75 to represent all possible values held by a PPC double-double number,
76 for example: (long double) 1.0 + (long double) 0x1p-106
77 Should this be replaced by a full emulation of PPC double-double? */
78 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
80 /* A tight upper bound on number of parts required to hold the value
83 power * 815 / (351 * integerPartWidth) + 1
85 However, whilst the result may require only this many parts,
86 because we are multiplying two values to get it, the
87 multiplication may require an extra part with the excess part
88 being zero (consider the trivial case of 1 * 1, tcFullMultiply
89 requires two parts to hold the single-part result). So we add an
90 extra one to guarantee enough space whilst multiplying. */
91 const unsigned int maxExponent = 16383;
92 const unsigned int maxPrecision = 113;
93 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
94 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
95 / (351 * integerPartWidth));
98 /* A bunch of private, handy routines. */
100 static inline unsigned int
101 partCountForBits(unsigned int bits)
103 return ((bits) + integerPartWidth - 1) / integerPartWidth;
106 /* Returns 0U-9U. Return values >= 10U are not digits. */
107 static inline unsigned int
108 decDigitValue(unsigned int c)
113 /* Return the value of a decimal exponent of the form
116 If the exponent overflows, returns a large exponent with the
119 readExponent(StringRef::iterator begin, StringRef::iterator end)
122 unsigned int absExponent;
123 const unsigned int overlargeExponent = 24000; /* FIXME. */
124 StringRef::iterator p = begin;
126 assert(p != end && "Exponent has no digits");
128 isNegative = (*p == '-');
129 if (*p == '-' || *p == '+') {
131 assert(p != end && "Exponent has no digits");
134 absExponent = decDigitValue(*p++);
135 assert(absExponent < 10U && "Invalid character in exponent");
137 for (; p != end; ++p) {
140 value = decDigitValue(*p);
141 assert(value < 10U && "Invalid character in exponent");
143 value += absExponent * 10;
144 if (absExponent >= overlargeExponent) {
145 absExponent = overlargeExponent;
146 p = end; /* outwit assert below */
152 assert(p == end && "Invalid exponent in exponent");
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(StringRef::iterator p, StringRef::iterator end,
164 int exponentAdjustment)
166 int unsignedExponent;
167 bool negative, overflow;
170 assert(p != end && "Exponent has no digits");
172 negative = *p == '-';
173 if (*p == '-' || *p == '+') {
175 assert(p != end && "Exponent has no digits");
178 unsignedExponent = 0;
180 for (; p != end; ++p) {
183 value = decDigitValue(*p);
184 assert(value < 10U && "Invalid character in exponent");
186 unsignedExponent = unsignedExponent * 10 + value;
187 if (unsignedExponent > 32767) {
193 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
197 exponent = unsignedExponent;
199 exponent = -exponent;
200 exponent += exponentAdjustment;
201 if (exponent > 32767 || exponent < -32768)
206 exponent = negative ? -32768: 32767;
211 static StringRef::iterator
212 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
213 StringRef::iterator *dot)
215 StringRef::iterator p = begin;
217 while (p != end && *p == '0')
220 if (p != end && *p == '.') {
223 assert(end - begin != 1 && "Significand has no digits");
225 while (p != end && *p == '0')
232 /* Given a normal decimal floating point number of the form
236 where the decimal point and exponent are optional, fill out the
237 structure D. Exponent is appropriate if the significand is
238 treated as an integer, and normalizedExponent if the significand
239 is taken to have the decimal point after a single leading
242 If the value is zero, V->firstSigDigit points to a non-digit, and
243 the return exponent is zero.
246 const char *firstSigDigit;
247 const char *lastSigDigit;
249 int normalizedExponent;
253 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
256 StringRef::iterator dot = end;
257 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
259 D->firstSigDigit = p;
261 D->normalizedExponent = 0;
263 for (; p != end; ++p) {
265 assert(dot == end && "String contains multiple dots");
270 if (decDigitValue(*p) >= 10U)
275 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
276 assert(p != begin && "Significand has no digits");
277 assert((dot == end || p - begin != 1) && "Significand has no digits");
279 /* p points to the first non-digit in the string */
280 D->exponent = readExponent(p + 1, end);
282 /* Implied decimal point? */
287 /* If number is all zeroes accept any exponent. */
288 if (p != D->firstSigDigit) {
289 /* Drop insignificant trailing zeroes. */
294 while (p != begin && *p == '0');
295 while (p != begin && *p == '.');
298 /* Adjust the exponents for any decimal point. */
299 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
300 D->normalizedExponent = (D->exponent +
301 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
302 - (dot > D->firstSigDigit && dot < p)));
308 /* Return the trailing fraction of a hexadecimal number.
309 DIGITVALUE is the first hex digit of the fraction, P points to
312 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
313 unsigned int digitValue)
315 unsigned int hexDigit;
317 /* If the first trailing digit isn't 0 or 8 we can work out the
318 fraction immediately. */
320 return lfMoreThanHalf;
321 else if (digitValue < 8 && digitValue > 0)
322 return lfLessThanHalf;
324 // Otherwise we need to find the first non-zero digit.
325 while (p != end && (*p == '0' || *p == '.'))
328 assert(p != end && "Invalid trailing hexadecimal fraction!");
330 hexDigit = hexDigitValue(*p);
332 /* If we ran off the end it is exactly zero or one-half, otherwise
335 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
337 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
340 /* Return the fraction lost were a bignum truncated losing the least
341 significant BITS bits. */
343 lostFractionThroughTruncation(const integerPart *parts,
344 unsigned int partCount,
349 lsb = APInt::tcLSB(parts, partCount);
351 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
353 return lfExactlyZero;
355 return lfExactlyHalf;
356 if (bits <= partCount * integerPartWidth &&
357 APInt::tcExtractBit(parts, bits - 1))
358 return lfMoreThanHalf;
360 return lfLessThanHalf;
363 /* Shift DST right BITS bits noting lost fraction. */
365 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
367 lostFraction lost_fraction;
369 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
371 APInt::tcShiftRight(dst, parts, bits);
373 return lost_fraction;
376 /* Combine the effect of two lost fractions. */
378 combineLostFractions(lostFraction moreSignificant,
379 lostFraction lessSignificant)
381 if (lessSignificant != lfExactlyZero) {
382 if (moreSignificant == lfExactlyZero)
383 moreSignificant = lfLessThanHalf;
384 else if (moreSignificant == lfExactlyHalf)
385 moreSignificant = lfMoreThanHalf;
388 return moreSignificant;
391 /* The error from the true value, in half-ulps, on multiplying two
392 floating point numbers, which differ from the value they
393 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
394 than the returned value.
396 See "How to Read Floating Point Numbers Accurately" by William D
399 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
401 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
403 if (HUerr1 + HUerr2 == 0)
404 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
406 return inexactMultiply + 2 * (HUerr1 + HUerr2);
409 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
410 when the least significant BITS are truncated. BITS cannot be
413 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
415 unsigned int count, partBits;
416 integerPart part, boundary;
421 count = bits / integerPartWidth;
422 partBits = bits % integerPartWidth + 1;
424 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
427 boundary = (integerPart) 1 << (partBits - 1);
432 if (part - boundary <= boundary - part)
433 return part - boundary;
435 return boundary - part;
438 if (part == boundary) {
441 return ~(integerPart) 0; /* A lot. */
444 } else if (part == boundary - 1) {
447 return ~(integerPart) 0; /* A lot. */
452 return ~(integerPart) 0; /* A lot. */
455 /* Place pow(5, power) in DST, and return the number of parts used.
456 DST must be at least one part larger than size of the answer. */
458 powerOf5(integerPart *dst, unsigned int power)
460 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
462 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
463 pow5s[0] = 78125 * 5;
465 unsigned int partsCount[16] = { 1 };
466 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
468 assert(power <= maxExponent);
473 *p1 = firstEightPowers[power & 7];
479 for (unsigned int n = 0; power; power >>= 1, n++) {
484 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
486 pc = partsCount[n - 1];
487 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
489 if (pow5[pc - 1] == 0)
497 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
499 if (p2[result - 1] == 0)
502 /* Now result is in p1 with partsCount parts and p2 is scratch
504 tmp = p1, p1 = p2, p2 = tmp;
511 APInt::tcAssign(dst, p1, result);
516 /* Zero at the end to avoid modular arithmetic when adding one; used
517 when rounding up during hexadecimal output. */
518 static const char hexDigitsLower[] = "0123456789abcdef0";
519 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
520 static const char infinityL[] = "infinity";
521 static const char infinityU[] = "INFINITY";
522 static const char NaNL[] = "nan";
523 static const char NaNU[] = "NAN";
525 /* Write out an integerPart in hexadecimal, starting with the most
526 significant nibble. Write out exactly COUNT hexdigits, return
529 partAsHex (char *dst, integerPart part, unsigned int count,
530 const char *hexDigitChars)
532 unsigned int result = count;
534 assert(count != 0 && count <= integerPartWidth / 4);
536 part >>= (integerPartWidth - 4 * count);
538 dst[count] = hexDigitChars[part & 0xf];
545 /* Write out an unsigned decimal integer. */
547 writeUnsignedDecimal (char *dst, unsigned int n)
563 /* Write out a signed decimal integer. */
565 writeSignedDecimal (char *dst, int value)
569 dst = writeUnsignedDecimal(dst, -(unsigned) value);
571 dst = writeUnsignedDecimal(dst, value);
578 APFloat::initialize(const fltSemantics *ourSemantics)
582 semantics = ourSemantics;
585 significand.parts = new integerPart[count];
589 APFloat::freeSignificand()
592 delete [] significand.parts;
596 APFloat::assign(const APFloat &rhs)
598 assert(semantics == rhs.semantics);
601 category = rhs.category;
602 exponent = rhs.exponent;
603 if (isFiniteNonZero() || category == fcNaN)
604 copySignificand(rhs);
608 APFloat::copySignificand(const APFloat &rhs)
610 assert(isFiniteNonZero() || category == fcNaN);
611 assert(rhs.partCount() >= partCount());
613 APInt::tcAssign(significandParts(), rhs.significandParts(),
617 /* Make this number a NaN, with an arbitrary but deterministic value
618 for the significand. If double or longer, this is a signalling NaN,
619 which may not be ideal. If float, this is QNaN(0). */
620 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
625 integerPart *significand = significandParts();
626 unsigned numParts = partCount();
628 // Set the significand bits to the fill.
629 if (!fill || fill->getNumWords() < numParts)
630 APInt::tcSet(significand, 0, numParts);
632 APInt::tcAssign(significand, fill->getRawData(),
633 std::min(fill->getNumWords(), numParts));
635 // Zero out the excess bits of the significand.
636 unsigned bitsToPreserve = semantics->precision - 1;
637 unsigned part = bitsToPreserve / 64;
638 bitsToPreserve %= 64;
639 significand[part] &= ((1ULL << bitsToPreserve) - 1);
640 for (part++; part != numParts; ++part)
641 significand[part] = 0;
644 unsigned QNaNBit = semantics->precision - 2;
647 // We always have to clear the QNaN bit to make it an SNaN.
648 APInt::tcClearBit(significand, QNaNBit);
650 // If there are no bits set in the payload, we have to set
651 // *something* to make it a NaN instead of an infinity;
652 // conventionally, this is the next bit down from the QNaN bit.
653 if (APInt::tcIsZero(significand, numParts))
654 APInt::tcSetBit(significand, QNaNBit - 1);
656 // We always have to set the QNaN bit to make it a QNaN.
657 APInt::tcSetBit(significand, QNaNBit);
660 // For x87 extended precision, we want to make a NaN, not a
661 // pseudo-NaN. Maybe we should expose the ability to make
663 if (semantics == &APFloat::x87DoubleExtended)
664 APInt::tcSetBit(significand, QNaNBit + 1);
667 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
669 APFloat value(Sem, uninitialized);
670 value.makeNaN(SNaN, Negative, fill);
675 APFloat::operator=(const APFloat &rhs)
678 if (semantics != rhs.semantics) {
680 initialize(rhs.semantics);
689 APFloat::operator=(APFloat &&rhs) {
692 semantics = rhs.semantics;
693 significand = rhs.significand;
694 exponent = rhs.exponent;
695 category = rhs.category;
698 rhs.semantics = &Bogus;
703 APFloat::isDenormal() const {
704 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
705 (APInt::tcExtractBit(significandParts(),
706 semantics->precision - 1) == 0);
710 APFloat::isSmallest() const {
711 // The smallest number by magnitude in our format will be the smallest
712 // denormal, i.e. the floating point number with exponent being minimum
713 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
714 return isFiniteNonZero() && exponent == semantics->minExponent &&
715 significandMSB() == 0;
718 bool APFloat::isSignificandAllOnes() const {
719 // Test if the significand excluding the integral bit is all ones. This allows
720 // us to test for binade boundaries.
721 const integerPart *Parts = significandParts();
722 const unsigned PartCount = partCount();
723 for (unsigned i = 0; i < PartCount - 1; i++)
727 // Set the unused high bits to all ones when we compare.
728 const unsigned NumHighBits =
729 PartCount*integerPartWidth - semantics->precision + 1;
730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
731 "fill than integerPartWidth");
732 const integerPart HighBitFill =
733 ~integerPart(0) << (integerPartWidth - NumHighBits);
734 if (~(Parts[PartCount - 1] | HighBitFill))
740 bool APFloat::isSignificandAllZeros() const {
741 // Test if the significand excluding the integral bit is all zeros. This
742 // allows us to test for binade boundaries.
743 const integerPart *Parts = significandParts();
744 const unsigned PartCount = partCount();
746 for (unsigned i = 0; i < PartCount - 1; i++)
750 const unsigned NumHighBits =
751 PartCount*integerPartWidth - semantics->precision + 1;
752 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
753 "clear than integerPartWidth");
754 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
756 if (Parts[PartCount - 1] & HighBitMask)
763 APFloat::isLargest() const {
764 // The largest number by magnitude in our format will be the floating point
765 // number with maximum exponent and with significand that is all ones.
766 return isFiniteNonZero() && exponent == semantics->maxExponent
767 && isSignificandAllOnes();
771 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
774 if (semantics != rhs.semantics ||
775 category != rhs.category ||
778 if (category==fcZero || category==fcInfinity)
781 if (isFiniteNonZero() && exponent != rhs.exponent)
784 return std::equal(significandParts(), significandParts() + partCount(),
785 rhs.significandParts());
788 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
789 initialize(&ourSemantics);
793 exponent = ourSemantics.precision - 1;
794 significandParts()[0] = value;
795 normalize(rmNearestTiesToEven, lfExactlyZero);
798 APFloat::APFloat(const fltSemantics &ourSemantics) {
799 initialize(&ourSemantics);
804 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
805 // Allocates storage if necessary but does not initialize it.
806 initialize(&ourSemantics);
809 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
810 initialize(&ourSemantics);
811 convertFromString(text, rmNearestTiesToEven);
814 APFloat::APFloat(const APFloat &rhs) {
815 initialize(rhs.semantics);
819 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
820 *this = std::move(rhs);
828 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
829 void APFloat::Profile(FoldingSetNodeID& ID) const {
830 ID.Add(bitcastToAPInt());
834 APFloat::partCount() const
836 return partCountForBits(semantics->precision + 1);
840 APFloat::semanticsPrecision(const fltSemantics &semantics)
842 return semantics.precision;
844 APFloat::ExponentType
845 APFloat::semanticsMaxExponent(const fltSemantics &semantics)
847 return semantics.maxExponent;
849 APFloat::ExponentType
850 APFloat::semanticsMinExponent(const fltSemantics &semantics)
852 return semantics.minExponent;
855 APFloat::semanticsSizeInBits(const fltSemantics &semantics)
857 return semantics.sizeInBits;
861 APFloat::significandParts() const
863 return const_cast<APFloat *>(this)->significandParts();
867 APFloat::significandParts()
870 return significand.parts;
872 return &significand.part;
876 APFloat::zeroSignificand()
878 APInt::tcSet(significandParts(), 0, partCount());
881 /* Increment an fcNormal floating point number's significand. */
883 APFloat::incrementSignificand()
887 carry = APInt::tcIncrement(significandParts(), partCount());
889 /* Our callers should never cause us to overflow. */
894 /* Add the significand of the RHS. Returns the carry flag. */
896 APFloat::addSignificand(const APFloat &rhs)
900 parts = significandParts();
902 assert(semantics == rhs.semantics);
903 assert(exponent == rhs.exponent);
905 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
908 /* Subtract the significand of the RHS with a borrow flag. Returns
911 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
915 parts = significandParts();
917 assert(semantics == rhs.semantics);
918 assert(exponent == rhs.exponent);
920 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
924 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
925 on to the full-precision result of the multiplication. Returns the
928 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
930 unsigned int omsb; // One, not zero, based MSB.
931 unsigned int partsCount, newPartsCount, precision;
932 integerPart *lhsSignificand;
933 integerPart scratch[4];
934 integerPart *fullSignificand;
935 lostFraction lost_fraction;
938 assert(semantics == rhs.semantics);
940 precision = semantics->precision;
942 // Allocate space for twice as many bits as the original significand, plus one
943 // extra bit for the addition to overflow into.
944 newPartsCount = partCountForBits(precision * 2 + 1);
946 if (newPartsCount > 4)
947 fullSignificand = new integerPart[newPartsCount];
949 fullSignificand = scratch;
951 lhsSignificand = significandParts();
952 partsCount = partCount();
954 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
955 rhs.significandParts(), partsCount, partsCount);
957 lost_fraction = lfExactlyZero;
958 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
959 exponent += rhs.exponent;
961 // Assume the operands involved in the multiplication are single-precision
962 // FP, and the two multiplicants are:
963 // *this = a23 . a22 ... a0 * 2^e1
964 // rhs = b23 . b22 ... b0 * 2^e2
965 // the result of multiplication is:
966 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
967 // Note that there are three significant bits at the left-hand side of the
968 // radix point: two for the multiplication, and an overflow bit for the
969 // addition (that will always be zero at this point). Move the radix point
970 // toward left by two bits, and adjust exponent accordingly.
973 if (addend && addend->isNonZero()) {
974 // The intermediate result of the multiplication has "2 * precision"
975 // signicant bit; adjust the addend to be consistent with mul result.
977 Significand savedSignificand = significand;
978 const fltSemantics *savedSemantics = semantics;
979 fltSemantics extendedSemantics;
981 unsigned int extendedPrecision;
983 // Normalize our MSB to one below the top bit to allow for overflow.
984 extendedPrecision = 2 * precision + 1;
985 if (omsb != extendedPrecision - 1) {
986 assert(extendedPrecision > omsb);
987 APInt::tcShiftLeft(fullSignificand, newPartsCount,
988 (extendedPrecision - 1) - omsb);
989 exponent -= (extendedPrecision - 1) - omsb;
992 /* Create new semantics. */
993 extendedSemantics = *semantics;
994 extendedSemantics.precision = extendedPrecision;
996 if (newPartsCount == 1)
997 significand.part = fullSignificand[0];
999 significand.parts = fullSignificand;
1000 semantics = &extendedSemantics;
1002 APFloat extendedAddend(*addend);
1003 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1004 assert(status == opOK);
1007 // Shift the significand of the addend right by one bit. This guarantees
1008 // that the high bit of the significand is zero (same as fullSignificand),
1009 // so the addition will overflow (if it does overflow at all) into the top bit.
1010 lost_fraction = extendedAddend.shiftSignificandRight(1);
1011 assert(lost_fraction == lfExactlyZero &&
1012 "Lost precision while shifting addend for fused-multiply-add.");
1014 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1016 /* Restore our state. */
1017 if (newPartsCount == 1)
1018 fullSignificand[0] = significand.part;
1019 significand = savedSignificand;
1020 semantics = savedSemantics;
1022 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1025 // Convert the result having "2 * precision" significant-bits back to the one
1026 // having "precision" significant-bits. First, move the radix point from
1027 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1028 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1029 exponent -= precision + 1;
1031 // In case MSB resides at the left-hand side of radix point, shift the
1032 // mantissa right by some amount to make sure the MSB reside right before
1033 // the radix point (i.e. "MSB . rest-significant-bits").
1035 // Note that the result is not normalized when "omsb < precision". So, the
1036 // caller needs to call APFloat::normalize() if normalized value is expected.
1037 if (omsb > precision) {
1038 unsigned int bits, significantParts;
1041 bits = omsb - precision;
1042 significantParts = partCountForBits(omsb);
1043 lf = shiftRight(fullSignificand, significantParts, bits);
1044 lost_fraction = combineLostFractions(lf, lost_fraction);
1048 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1050 if (newPartsCount > 4)
1051 delete [] fullSignificand;
1053 return lost_fraction;
1056 /* Multiply the significands of LHS and RHS to DST. */
1058 APFloat::divideSignificand(const APFloat &rhs)
1060 unsigned int bit, i, partsCount;
1061 const integerPart *rhsSignificand;
1062 integerPart *lhsSignificand, *dividend, *divisor;
1063 integerPart scratch[4];
1064 lostFraction lost_fraction;
1066 assert(semantics == rhs.semantics);
1068 lhsSignificand = significandParts();
1069 rhsSignificand = rhs.significandParts();
1070 partsCount = partCount();
1073 dividend = new integerPart[partsCount * 2];
1077 divisor = dividend + partsCount;
1079 /* Copy the dividend and divisor as they will be modified in-place. */
1080 for (i = 0; i < partsCount; i++) {
1081 dividend[i] = lhsSignificand[i];
1082 divisor[i] = rhsSignificand[i];
1083 lhsSignificand[i] = 0;
1086 exponent -= rhs.exponent;
1088 unsigned int precision = semantics->precision;
1090 /* Normalize the divisor. */
1091 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1094 APInt::tcShiftLeft(divisor, partsCount, bit);
1097 /* Normalize the dividend. */
1098 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1101 APInt::tcShiftLeft(dividend, partsCount, bit);
1104 /* Ensure the dividend >= divisor initially for the loop below.
1105 Incidentally, this means that the division loop below is
1106 guaranteed to set the integer bit to one. */
1107 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1109 APInt::tcShiftLeft(dividend, partsCount, 1);
1110 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1113 /* Long division. */
1114 for (bit = precision; bit; bit -= 1) {
1115 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1116 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1117 APInt::tcSetBit(lhsSignificand, bit - 1);
1120 APInt::tcShiftLeft(dividend, partsCount, 1);
1123 /* Figure out the lost fraction. */
1124 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1127 lost_fraction = lfMoreThanHalf;
1129 lost_fraction = lfExactlyHalf;
1130 else if (APInt::tcIsZero(dividend, partsCount))
1131 lost_fraction = lfExactlyZero;
1133 lost_fraction = lfLessThanHalf;
1138 return lost_fraction;
1142 APFloat::significandMSB() const
1144 return APInt::tcMSB(significandParts(), partCount());
1148 APFloat::significandLSB() const
1150 return APInt::tcLSB(significandParts(), partCount());
1153 /* Note that a zero result is NOT normalized to fcZero. */
1155 APFloat::shiftSignificandRight(unsigned int bits)
1157 /* Our exponent should not overflow. */
1158 assert((ExponentType) (exponent + bits) >= exponent);
1162 return shiftRight(significandParts(), partCount(), bits);
1165 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1167 APFloat::shiftSignificandLeft(unsigned int bits)
1169 assert(bits < semantics->precision);
1172 unsigned int partsCount = partCount();
1174 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1177 assert(!APInt::tcIsZero(significandParts(), partsCount));
1182 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1186 assert(semantics == rhs.semantics);
1187 assert(isFiniteNonZero());
1188 assert(rhs.isFiniteNonZero());
1190 compare = exponent - rhs.exponent;
1192 /* If exponents are equal, do an unsigned bignum comparison of the
1195 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1199 return cmpGreaterThan;
1200 else if (compare < 0)
1206 /* Handle overflow. Sign is preserved. We either become infinity or
1207 the largest finite number. */
1209 APFloat::handleOverflow(roundingMode rounding_mode)
1212 if (rounding_mode == rmNearestTiesToEven ||
1213 rounding_mode == rmNearestTiesToAway ||
1214 (rounding_mode == rmTowardPositive && !sign) ||
1215 (rounding_mode == rmTowardNegative && sign)) {
1216 category = fcInfinity;
1217 return (opStatus) (opOverflow | opInexact);
1220 /* Otherwise we become the largest finite number. */
1221 category = fcNormal;
1222 exponent = semantics->maxExponent;
1223 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1224 semantics->precision);
1229 /* Returns TRUE if, when truncating the current number, with BIT the
1230 new LSB, with the given lost fraction and rounding mode, the result
1231 would need to be rounded away from zero (i.e., by increasing the
1232 signficand). This routine must work for fcZero of both signs, and
1233 fcNormal numbers. */
1235 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1236 lostFraction lost_fraction,
1237 unsigned int bit) const
1239 /* NaNs and infinities should not have lost fractions. */
1240 assert(isFiniteNonZero() || category == fcZero);
1242 /* Current callers never pass this so we don't handle it. */
1243 assert(lost_fraction != lfExactlyZero);
1245 switch (rounding_mode) {
1246 case rmNearestTiesToAway:
1247 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1249 case rmNearestTiesToEven:
1250 if (lost_fraction == lfMoreThanHalf)
1253 /* Our zeroes don't have a significand to test. */
1254 if (lost_fraction == lfExactlyHalf && category != fcZero)
1255 return APInt::tcExtractBit(significandParts(), bit);
1262 case rmTowardPositive:
1265 case rmTowardNegative:
1268 llvm_unreachable("Invalid rounding mode found");
1272 APFloat::normalize(roundingMode rounding_mode,
1273 lostFraction lost_fraction)
1275 unsigned int omsb; /* One, not zero, based MSB. */
1278 if (!isFiniteNonZero())
1281 /* Before rounding normalize the exponent of fcNormal numbers. */
1282 omsb = significandMSB() + 1;
1285 /* OMSB is numbered from 1. We want to place it in the integer
1286 bit numbered PRECISION if possible, with a compensating change in
1288 exponentChange = omsb - semantics->precision;
1290 /* If the resulting exponent is too high, overflow according to
1291 the rounding mode. */
1292 if (exponent + exponentChange > semantics->maxExponent)
1293 return handleOverflow(rounding_mode);
1295 /* Subnormal numbers have exponent minExponent, and their MSB
1296 is forced based on that. */
1297 if (exponent + exponentChange < semantics->minExponent)
1298 exponentChange = semantics->minExponent - exponent;
1300 /* Shifting left is easy as we don't lose precision. */
1301 if (exponentChange < 0) {
1302 assert(lost_fraction == lfExactlyZero);
1304 shiftSignificandLeft(-exponentChange);
1309 if (exponentChange > 0) {
1312 /* Shift right and capture any new lost fraction. */
1313 lf = shiftSignificandRight(exponentChange);
1315 lost_fraction = combineLostFractions(lf, lost_fraction);
1317 /* Keep OMSB up-to-date. */
1318 if (omsb > (unsigned) exponentChange)
1319 omsb -= exponentChange;
1325 /* Now round the number according to rounding_mode given the lost
1328 /* As specified in IEEE 754, since we do not trap we do not report
1329 underflow for exact results. */
1330 if (lost_fraction == lfExactlyZero) {
1331 /* Canonicalize zeroes. */
1338 /* Increment the significand if we're rounding away from zero. */
1339 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1341 exponent = semantics->minExponent;
1343 incrementSignificand();
1344 omsb = significandMSB() + 1;
1346 /* Did the significand increment overflow? */
1347 if (omsb == (unsigned) semantics->precision + 1) {
1348 /* Renormalize by incrementing the exponent and shifting our
1349 significand right one. However if we already have the
1350 maximum exponent we overflow to infinity. */
1351 if (exponent == semantics->maxExponent) {
1352 category = fcInfinity;
1354 return (opStatus) (opOverflow | opInexact);
1357 shiftSignificandRight(1);
1363 /* The normal case - we were and are not denormal, and any
1364 significand increment above didn't overflow. */
1365 if (omsb == semantics->precision)
1368 /* We have a non-zero denormal. */
1369 assert(omsb < semantics->precision);
1371 /* Canonicalize zeroes. */
1375 /* The fcZero case is a denormal that underflowed to zero. */
1376 return (opStatus) (opUnderflow | opInexact);
1380 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1382 switch (PackCategoriesIntoKey(category, rhs.category)) {
1384 llvm_unreachable(nullptr);
1386 case PackCategoriesIntoKey(fcNaN, fcZero):
1387 case PackCategoriesIntoKey(fcNaN, fcNormal):
1388 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1389 case PackCategoriesIntoKey(fcNaN, fcNaN):
1390 case PackCategoriesIntoKey(fcNormal, fcZero):
1391 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1392 case PackCategoriesIntoKey(fcInfinity, fcZero):
1395 case PackCategoriesIntoKey(fcZero, fcNaN):
1396 case PackCategoriesIntoKey(fcNormal, fcNaN):
1397 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1398 // We need to be sure to flip the sign here for subtraction because we
1399 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1400 sign = rhs.sign ^ subtract;
1402 copySignificand(rhs);
1405 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1406 case PackCategoriesIntoKey(fcZero, fcInfinity):
1407 category = fcInfinity;
1408 sign = rhs.sign ^ subtract;
1411 case PackCategoriesIntoKey(fcZero, fcNormal):
1413 sign = rhs.sign ^ subtract;
1416 case PackCategoriesIntoKey(fcZero, fcZero):
1417 /* Sign depends on rounding mode; handled by caller. */
1420 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1421 /* Differently signed infinities can only be validly
1423 if (((sign ^ rhs.sign)!=0) != subtract) {
1430 case PackCategoriesIntoKey(fcNormal, fcNormal):
1435 /* Add or subtract two normal numbers. */
1437 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1440 lostFraction lost_fraction;
1443 /* Determine if the operation on the absolute values is effectively
1444 an addition or subtraction. */
1445 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1447 /* Are we bigger exponent-wise than the RHS? */
1448 bits = exponent - rhs.exponent;
1450 /* Subtraction is more subtle than one might naively expect. */
1452 APFloat temp_rhs(rhs);
1456 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1457 lost_fraction = lfExactlyZero;
1458 } else if (bits > 0) {
1459 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1460 shiftSignificandLeft(1);
1463 lost_fraction = shiftSignificandRight(-bits - 1);
1464 temp_rhs.shiftSignificandLeft(1);
1469 carry = temp_rhs.subtractSignificand
1470 (*this, lost_fraction != lfExactlyZero);
1471 copySignificand(temp_rhs);
1474 carry = subtractSignificand
1475 (temp_rhs, lost_fraction != lfExactlyZero);
1478 /* Invert the lost fraction - it was on the RHS and
1480 if (lost_fraction == lfLessThanHalf)
1481 lost_fraction = lfMoreThanHalf;
1482 else if (lost_fraction == lfMoreThanHalf)
1483 lost_fraction = lfLessThanHalf;
1485 /* The code above is intended to ensure that no borrow is
1491 APFloat temp_rhs(rhs);
1493 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1494 carry = addSignificand(temp_rhs);
1496 lost_fraction = shiftSignificandRight(-bits);
1497 carry = addSignificand(rhs);
1500 /* We have a guard bit; generating a carry cannot happen. */
1505 return lost_fraction;
1509 APFloat::multiplySpecials(const APFloat &rhs)
1511 switch (PackCategoriesIntoKey(category, rhs.category)) {
1513 llvm_unreachable(nullptr);
1515 case PackCategoriesIntoKey(fcNaN, fcZero):
1516 case PackCategoriesIntoKey(fcNaN, fcNormal):
1517 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1518 case PackCategoriesIntoKey(fcNaN, fcNaN):
1522 case PackCategoriesIntoKey(fcZero, fcNaN):
1523 case PackCategoriesIntoKey(fcNormal, fcNaN):
1524 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1527 copySignificand(rhs);
1530 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1531 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1532 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1533 category = fcInfinity;
1536 case PackCategoriesIntoKey(fcZero, fcNormal):
1537 case PackCategoriesIntoKey(fcNormal, fcZero):
1538 case PackCategoriesIntoKey(fcZero, fcZero):
1542 case PackCategoriesIntoKey(fcZero, fcInfinity):
1543 case PackCategoriesIntoKey(fcInfinity, fcZero):
1547 case PackCategoriesIntoKey(fcNormal, fcNormal):
1553 APFloat::divideSpecials(const APFloat &rhs)
1555 switch (PackCategoriesIntoKey(category, rhs.category)) {
1557 llvm_unreachable(nullptr);
1559 case PackCategoriesIntoKey(fcZero, fcNaN):
1560 case PackCategoriesIntoKey(fcNormal, fcNaN):
1561 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1563 copySignificand(rhs);
1564 case PackCategoriesIntoKey(fcNaN, fcZero):
1565 case PackCategoriesIntoKey(fcNaN, fcNormal):
1566 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1567 case PackCategoriesIntoKey(fcNaN, fcNaN):
1569 case PackCategoriesIntoKey(fcInfinity, fcZero):
1570 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1571 case PackCategoriesIntoKey(fcZero, fcInfinity):
1572 case PackCategoriesIntoKey(fcZero, fcNormal):
1575 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1579 case PackCategoriesIntoKey(fcNormal, fcZero):
1580 category = fcInfinity;
1583 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1584 case PackCategoriesIntoKey(fcZero, fcZero):
1588 case PackCategoriesIntoKey(fcNormal, fcNormal):
1594 APFloat::modSpecials(const APFloat &rhs)
1596 switch (PackCategoriesIntoKey(category, rhs.category)) {
1598 llvm_unreachable(nullptr);
1600 case PackCategoriesIntoKey(fcNaN, fcZero):
1601 case PackCategoriesIntoKey(fcNaN, fcNormal):
1602 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1603 case PackCategoriesIntoKey(fcNaN, fcNaN):
1604 case PackCategoriesIntoKey(fcZero, fcInfinity):
1605 case PackCategoriesIntoKey(fcZero, fcNormal):
1606 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1609 case PackCategoriesIntoKey(fcZero, fcNaN):
1610 case PackCategoriesIntoKey(fcNormal, fcNaN):
1611 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1614 copySignificand(rhs);
1617 case PackCategoriesIntoKey(fcNormal, fcZero):
1618 case PackCategoriesIntoKey(fcInfinity, fcZero):
1619 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1620 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1621 case PackCategoriesIntoKey(fcZero, fcZero):
1625 case PackCategoriesIntoKey(fcNormal, fcNormal):
1632 APFloat::changeSign()
1634 /* Look mummy, this one's easy. */
1639 APFloat::clearSign()
1641 /* So is this one. */
1646 APFloat::copySign(const APFloat &rhs)
1652 /* Normalized addition or subtraction. */
1654 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1659 fs = addOrSubtractSpecials(rhs, subtract);
1661 /* This return code means it was not a simple case. */
1662 if (fs == opDivByZero) {
1663 lostFraction lost_fraction;
1665 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1666 fs = normalize(rounding_mode, lost_fraction);
1668 /* Can only be zero if we lost no fraction. */
1669 assert(category != fcZero || lost_fraction == lfExactlyZero);
1672 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1673 positive zero unless rounding to minus infinity, except that
1674 adding two like-signed zeroes gives that zero. */
1675 if (category == fcZero) {
1676 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1677 sign = (rounding_mode == rmTowardNegative);
1683 /* Normalized addition. */
1685 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1687 return addOrSubtract(rhs, rounding_mode, false);
1690 /* Normalized subtraction. */
1692 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1694 return addOrSubtract(rhs, rounding_mode, true);
1697 /* Normalized multiply. */
1699 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1704 fs = multiplySpecials(rhs);
1706 if (isFiniteNonZero()) {
1707 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1708 fs = normalize(rounding_mode, lost_fraction);
1709 if (lost_fraction != lfExactlyZero)
1710 fs = (opStatus) (fs | opInexact);
1716 /* Normalized divide. */
1718 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1723 fs = divideSpecials(rhs);
1725 if (isFiniteNonZero()) {
1726 lostFraction lost_fraction = divideSignificand(rhs);
1727 fs = normalize(rounding_mode, lost_fraction);
1728 if (lost_fraction != lfExactlyZero)
1729 fs = (opStatus) (fs | opInexact);
1735 /* Normalized remainder. This is not currently correct in all cases. */
1737 APFloat::remainder(const APFloat &rhs)
1741 unsigned int origSign = sign;
1743 fs = V.divide(rhs, rmNearestTiesToEven);
1744 if (fs == opDivByZero)
1747 int parts = partCount();
1748 integerPart *x = new integerPart[parts];
1750 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1751 rmNearestTiesToEven, &ignored);
1752 if (fs==opInvalidOp)
1755 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1756 rmNearestTiesToEven);
1757 assert(fs==opOK); // should always work
1759 fs = V.multiply(rhs, rmNearestTiesToEven);
1760 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1762 fs = subtract(V, rmNearestTiesToEven);
1763 assert(fs==opOK || fs==opInexact); // likewise
1766 sign = origSign; // IEEE754 requires this
1771 /* Normalized llvm frem (C fmod).
1772 This is not currently correct in all cases. */
1774 APFloat::mod(const APFloat &rhs)
1777 fs = modSpecials(rhs);
1779 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1781 unsigned int origSign = sign;
1783 fs = V.divide(rhs, rmNearestTiesToEven);
1784 if (fs == opDivByZero)
1787 int parts = partCount();
1788 integerPart *x = new integerPart[parts];
1790 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1791 rmTowardZero, &ignored);
1792 if (fs==opInvalidOp)
1795 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1796 rmNearestTiesToEven);
1797 assert(fs==opOK); // should always work
1799 fs = V.multiply(rhs, rmNearestTiesToEven);
1800 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1802 fs = subtract(V, rmNearestTiesToEven);
1803 assert(fs==opOK || fs==opInexact); // likewise
1806 sign = origSign; // IEEE754 requires this
1812 /* Normalized fused-multiply-add. */
1814 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1815 const APFloat &addend,
1816 roundingMode rounding_mode)
1820 /* Post-multiplication sign, before addition. */
1821 sign ^= multiplicand.sign;
1823 /* If and only if all arguments are normal do we need to do an
1824 extended-precision calculation. */
1825 if (isFiniteNonZero() &&
1826 multiplicand.isFiniteNonZero() &&
1827 addend.isFinite()) {
1828 lostFraction lost_fraction;
1830 lost_fraction = multiplySignificand(multiplicand, &addend);
1831 fs = normalize(rounding_mode, lost_fraction);
1832 if (lost_fraction != lfExactlyZero)
1833 fs = (opStatus) (fs | opInexact);
1835 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1836 positive zero unless rounding to minus infinity, except that
1837 adding two like-signed zeroes gives that zero. */
1838 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1839 sign = (rounding_mode == rmTowardNegative);
1841 fs = multiplySpecials(multiplicand);
1843 /* FS can only be opOK or opInvalidOp. There is no more work
1844 to do in the latter case. The IEEE-754R standard says it is
1845 implementation-defined in this case whether, if ADDEND is a
1846 quiet NaN, we raise invalid op; this implementation does so.
1848 If we need to do the addition we can do so with normal
1851 fs = addOrSubtract(addend, rounding_mode, false);
1857 /* Rounding-mode corrrect round to integral value. */
1858 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1861 // If the exponent is large enough, we know that this value is already
1862 // integral, and the arithmetic below would potentially cause it to saturate
1863 // to +/-Inf. Bail out early instead.
1864 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1867 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1868 // precision of our format, and then subtract it back off again. The choice
1869 // of rounding modes for the addition/subtraction determines the rounding mode
1870 // for our integral rounding as well.
1871 // NOTE: When the input value is negative, we do subtraction followed by
1872 // addition instead.
1873 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1874 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1875 APFloat MagicConstant(*semantics);
1876 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1877 rmNearestTiesToEven);
1878 MagicConstant.copySign(*this);
1883 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1884 bool inputSign = isNegative();
1886 fs = add(MagicConstant, rounding_mode);
1887 if (fs != opOK && fs != opInexact)
1890 fs = subtract(MagicConstant, rounding_mode);
1892 // Restore the input sign.
1893 if (inputSign != isNegative())
1900 /* Comparison requires normalized numbers. */
1902 APFloat::compare(const APFloat &rhs) const
1906 assert(semantics == rhs.semantics);
1908 switch (PackCategoriesIntoKey(category, rhs.category)) {
1910 llvm_unreachable(nullptr);
1912 case PackCategoriesIntoKey(fcNaN, fcZero):
1913 case PackCategoriesIntoKey(fcNaN, fcNormal):
1914 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1915 case PackCategoriesIntoKey(fcNaN, fcNaN):
1916 case PackCategoriesIntoKey(fcZero, fcNaN):
1917 case PackCategoriesIntoKey(fcNormal, fcNaN):
1918 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1919 return cmpUnordered;
1921 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1922 case PackCategoriesIntoKey(fcInfinity, fcZero):
1923 case PackCategoriesIntoKey(fcNormal, fcZero):
1927 return cmpGreaterThan;
1929 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1930 case PackCategoriesIntoKey(fcZero, fcInfinity):
1931 case PackCategoriesIntoKey(fcZero, fcNormal):
1933 return cmpGreaterThan;
1937 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1938 if (sign == rhs.sign)
1943 return cmpGreaterThan;
1945 case PackCategoriesIntoKey(fcZero, fcZero):
1948 case PackCategoriesIntoKey(fcNormal, fcNormal):
1952 /* Two normal numbers. Do they have the same sign? */
1953 if (sign != rhs.sign) {
1955 result = cmpLessThan;
1957 result = cmpGreaterThan;
1959 /* Compare absolute values; invert result if negative. */
1960 result = compareAbsoluteValue(rhs);
1963 if (result == cmpLessThan)
1964 result = cmpGreaterThan;
1965 else if (result == cmpGreaterThan)
1966 result = cmpLessThan;
1973 /// APFloat::convert - convert a value of one floating point type to another.
1974 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1975 /// records whether the transformation lost information, i.e. whether
1976 /// converting the result back to the original type will produce the
1977 /// original value (this is almost the same as return value==fsOK, but there
1978 /// are edge cases where this is not so).
1981 APFloat::convert(const fltSemantics &toSemantics,
1982 roundingMode rounding_mode, bool *losesInfo)
1984 lostFraction lostFraction;
1985 unsigned int newPartCount, oldPartCount;
1988 const fltSemantics &fromSemantics = *semantics;
1990 lostFraction = lfExactlyZero;
1991 newPartCount = partCountForBits(toSemantics.precision + 1);
1992 oldPartCount = partCount();
1993 shift = toSemantics.precision - fromSemantics.precision;
1995 bool X86SpecialNan = false;
1996 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1997 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1998 (!(*significandParts() & 0x8000000000000000ULL) ||
1999 !(*significandParts() & 0x4000000000000000ULL))) {
2000 // x86 has some unusual NaNs which cannot be represented in any other
2001 // format; note them here.
2002 X86SpecialNan = true;
2005 // If this is a truncation of a denormal number, and the target semantics
2006 // has larger exponent range than the source semantics (this can happen
2007 // when truncating from PowerPC double-double to double format), the
2008 // right shift could lose result mantissa bits. Adjust exponent instead
2009 // of performing excessive shift.
2010 if (shift < 0 && isFiniteNonZero()) {
2011 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2012 if (exponent + exponentChange < toSemantics.minExponent)
2013 exponentChange = toSemantics.minExponent - exponent;
2014 if (exponentChange < shift)
2015 exponentChange = shift;
2016 if (exponentChange < 0) {
2017 shift -= exponentChange;
2018 exponent += exponentChange;
2022 // If this is a truncation, perform the shift before we narrow the storage.
2023 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2024 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2026 // Fix the storage so it can hold to new value.
2027 if (newPartCount > oldPartCount) {
2028 // The new type requires more storage; make it available.
2029 integerPart *newParts;
2030 newParts = new integerPart[newPartCount];
2031 APInt::tcSet(newParts, 0, newPartCount);
2032 if (isFiniteNonZero() || category==fcNaN)
2033 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2035 significand.parts = newParts;
2036 } else if (newPartCount == 1 && oldPartCount != 1) {
2037 // Switch to built-in storage for a single part.
2038 integerPart newPart = 0;
2039 if (isFiniteNonZero() || category==fcNaN)
2040 newPart = significandParts()[0];
2042 significand.part = newPart;
2045 // Now that we have the right storage, switch the semantics.
2046 semantics = &toSemantics;
2048 // If this is an extension, perform the shift now that the storage is
2050 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2051 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2053 if (isFiniteNonZero()) {
2054 fs = normalize(rounding_mode, lostFraction);
2055 *losesInfo = (fs != opOK);
2056 } else if (category == fcNaN) {
2057 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2059 // For x87 extended precision, we want to make a NaN, not a special NaN if
2060 // the input wasn't special either.
2061 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2062 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2064 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2065 // does not give you back the same bits. This is dubious, and we
2066 // don't currently do it. You're really supposed to get
2067 // an invalid operation signal at runtime, but nobody does that.
2077 /* Convert a floating point number to an integer according to the
2078 rounding mode. If the rounded integer value is out of range this
2079 returns an invalid operation exception and the contents of the
2080 destination parts are unspecified. If the rounded value is in
2081 range but the floating point number is not the exact integer, the C
2082 standard doesn't require an inexact exception to be raised. IEEE
2083 854 does require it so we do that.
2085 Note that for conversions to integer type the C standard requires
2086 round-to-zero to always be used. */
2088 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2090 roundingMode rounding_mode,
2091 bool *isExact) const
2093 lostFraction lost_fraction;
2094 const integerPart *src;
2095 unsigned int dstPartsCount, truncatedBits;
2099 /* Handle the three special cases first. */
2100 if (category == fcInfinity || category == fcNaN)
2103 dstPartsCount = partCountForBits(width);
2105 if (category == fcZero) {
2106 APInt::tcSet(parts, 0, dstPartsCount);
2107 // Negative zero can't be represented as an int.
2112 src = significandParts();
2114 /* Step 1: place our absolute value, with any fraction truncated, in
2117 /* Our absolute value is less than one; truncate everything. */
2118 APInt::tcSet(parts, 0, dstPartsCount);
2119 /* For exponent -1 the integer bit represents .5, look at that.
2120 For smaller exponents leftmost truncated bit is 0. */
2121 truncatedBits = semantics->precision -1U - exponent;
2123 /* We want the most significant (exponent + 1) bits; the rest are
2125 unsigned int bits = exponent + 1U;
2127 /* Hopelessly large in magnitude? */
2131 if (bits < semantics->precision) {
2132 /* We truncate (semantics->precision - bits) bits. */
2133 truncatedBits = semantics->precision - bits;
2134 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2136 /* We want at least as many bits as are available. */
2137 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2138 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2143 /* Step 2: work out any lost fraction, and increment the absolute
2144 value if we would round away from zero. */
2145 if (truncatedBits) {
2146 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2148 if (lost_fraction != lfExactlyZero &&
2149 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2150 if (APInt::tcIncrement(parts, dstPartsCount))
2151 return opInvalidOp; /* Overflow. */
2154 lost_fraction = lfExactlyZero;
2157 /* Step 3: check if we fit in the destination. */
2158 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2162 /* Negative numbers cannot be represented as unsigned. */
2166 /* It takes omsb bits to represent the unsigned integer value.
2167 We lose a bit for the sign, but care is needed as the
2168 maximally negative integer is a special case. */
2169 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2172 /* This case can happen because of rounding. */
2177 APInt::tcNegate (parts, dstPartsCount);
2179 if (omsb >= width + !isSigned)
2183 if (lost_fraction == lfExactlyZero) {
2190 /* Same as convertToSignExtendedInteger, except we provide
2191 deterministic values in case of an invalid operation exception,
2192 namely zero for NaNs and the minimal or maximal value respectively
2193 for underflow or overflow.
2194 The *isExact output tells whether the result is exact, in the sense
2195 that converting it back to the original floating point type produces
2196 the original value. This is almost equivalent to result==opOK,
2197 except for negative zeroes.
2200 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2202 roundingMode rounding_mode, bool *isExact) const
2206 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2209 if (fs == opInvalidOp) {
2210 unsigned int bits, dstPartsCount;
2212 dstPartsCount = partCountForBits(width);
2214 if (category == fcNaN)
2219 bits = width - isSigned;
2221 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2222 if (sign && isSigned)
2223 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2229 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2230 an APSInt, whose initial bit-width and signed-ness are used to determine the
2231 precision of the conversion.
2234 APFloat::convertToInteger(APSInt &result,
2235 roundingMode rounding_mode, bool *isExact) const
2237 unsigned bitWidth = result.getBitWidth();
2238 SmallVector<uint64_t, 4> parts(result.getNumWords());
2239 opStatus status = convertToInteger(
2240 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2241 // Keeps the original signed-ness.
2242 result = APInt(bitWidth, parts);
2246 /* Convert an unsigned integer SRC to a floating point number,
2247 rounding according to ROUNDING_MODE. The sign of the floating
2248 point number is not modified. */
2250 APFloat::convertFromUnsignedParts(const integerPart *src,
2251 unsigned int srcCount,
2252 roundingMode rounding_mode)
2254 unsigned int omsb, precision, dstCount;
2256 lostFraction lost_fraction;
2258 category = fcNormal;
2259 omsb = APInt::tcMSB(src, srcCount) + 1;
2260 dst = significandParts();
2261 dstCount = partCount();
2262 precision = semantics->precision;
2264 /* We want the most significant PRECISION bits of SRC. There may not
2265 be that many; extract what we can. */
2266 if (precision <= omsb) {
2267 exponent = omsb - 1;
2268 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2270 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2272 exponent = precision - 1;
2273 lost_fraction = lfExactlyZero;
2274 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2277 return normalize(rounding_mode, lost_fraction);
2281 APFloat::convertFromAPInt(const APInt &Val,
2283 roundingMode rounding_mode)
2285 unsigned int partCount = Val.getNumWords();
2289 if (isSigned && api.isNegative()) {
2294 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2297 /* Convert a two's complement integer SRC to a floating point number,
2298 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2299 integer is signed, in which case it must be sign-extended. */
2301 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2302 unsigned int srcCount,
2304 roundingMode rounding_mode)
2309 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2312 /* If we're signed and negative negate a copy. */
2314 copy = new integerPart[srcCount];
2315 APInt::tcAssign(copy, src, srcCount);
2316 APInt::tcNegate(copy, srcCount);
2317 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2321 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2327 /* FIXME: should this just take a const APInt reference? */
2329 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2330 unsigned int width, bool isSigned,
2331 roundingMode rounding_mode)
2333 unsigned int partCount = partCountForBits(width);
2334 APInt api = APInt(width, makeArrayRef(parts, partCount));
2337 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2342 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2346 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2348 lostFraction lost_fraction = lfExactlyZero;
2350 category = fcNormal;
2354 integerPart *significand = significandParts();
2355 unsigned partsCount = partCount();
2356 unsigned bitPos = partsCount * integerPartWidth;
2357 bool computedTrailingFraction = false;
2359 // Skip leading zeroes and any (hexa)decimal point.
2360 StringRef::iterator begin = s.begin();
2361 StringRef::iterator end = s.end();
2362 StringRef::iterator dot;
2363 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2364 StringRef::iterator firstSignificantDigit = p;
2367 integerPart hex_value;
2370 assert(dot == end && "String contains multiple dots");
2375 hex_value = hexDigitValue(*p);
2376 if (hex_value == -1U)
2381 // Store the number while we have space.
2384 hex_value <<= bitPos % integerPartWidth;
2385 significand[bitPos / integerPartWidth] |= hex_value;
2386 } else if (!computedTrailingFraction) {
2387 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2388 computedTrailingFraction = true;
2392 /* Hex floats require an exponent but not a hexadecimal point. */
2393 assert(p != end && "Hex strings require an exponent");
2394 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2395 assert(p != begin && "Significand has no digits");
2396 assert((dot == end || p - begin != 1) && "Significand has no digits");
2398 /* Ignore the exponent if we are zero. */
2399 if (p != firstSignificantDigit) {
2402 /* Implicit hexadecimal point? */
2406 /* Calculate the exponent adjustment implicit in the number of
2407 significant digits. */
2408 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2409 if (expAdjustment < 0)
2411 expAdjustment = expAdjustment * 4 - 1;
2413 /* Adjust for writing the significand starting at the most
2414 significant nibble. */
2415 expAdjustment += semantics->precision;
2416 expAdjustment -= partsCount * integerPartWidth;
2418 /* Adjust for the given exponent. */
2419 exponent = totalExponent(p + 1, end, expAdjustment);
2422 return normalize(rounding_mode, lost_fraction);
2426 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2427 unsigned sigPartCount, int exp,
2428 roundingMode rounding_mode)
2430 unsigned int parts, pow5PartCount;
2431 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2432 integerPart pow5Parts[maxPowerOfFiveParts];
2435 isNearest = (rounding_mode == rmNearestTiesToEven ||
2436 rounding_mode == rmNearestTiesToAway);
2438 parts = partCountForBits(semantics->precision + 11);
2440 /* Calculate pow(5, abs(exp)). */
2441 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2443 for (;; parts *= 2) {
2444 opStatus sigStatus, powStatus;
2445 unsigned int excessPrecision, truncatedBits;
2447 calcSemantics.precision = parts * integerPartWidth - 1;
2448 excessPrecision = calcSemantics.precision - semantics->precision;
2449 truncatedBits = excessPrecision;
2451 APFloat decSig = APFloat::getZero(calcSemantics, sign);
2452 APFloat pow5(calcSemantics);
2454 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2455 rmNearestTiesToEven);
2456 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2457 rmNearestTiesToEven);
2458 /* Add exp, as 10^n = 5^n * 2^n. */
2459 decSig.exponent += exp;
2461 lostFraction calcLostFraction;
2462 integerPart HUerr, HUdistance;
2463 unsigned int powHUerr;
2466 /* multiplySignificand leaves the precision-th bit set to 1. */
2467 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2468 powHUerr = powStatus != opOK;
2470 calcLostFraction = decSig.divideSignificand(pow5);
2471 /* Denormal numbers have less precision. */
2472 if (decSig.exponent < semantics->minExponent) {
2473 excessPrecision += (semantics->minExponent - decSig.exponent);
2474 truncatedBits = excessPrecision;
2475 if (excessPrecision > calcSemantics.precision)
2476 excessPrecision = calcSemantics.precision;
2478 /* Extra half-ulp lost in reciprocal of exponent. */
2479 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2482 /* Both multiplySignificand and divideSignificand return the
2483 result with the integer bit set. */
2484 assert(APInt::tcExtractBit
2485 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2487 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2489 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2490 excessPrecision, isNearest);
2492 /* Are we guaranteed to round correctly if we truncate? */
2493 if (HUdistance >= HUerr) {
2494 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2495 calcSemantics.precision - excessPrecision,
2497 /* Take the exponent of decSig. If we tcExtract-ed less bits
2498 above we must adjust our exponent to compensate for the
2499 implicit right shift. */
2500 exponent = (decSig.exponent + semantics->precision
2501 - (calcSemantics.precision - excessPrecision));
2502 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2505 return normalize(rounding_mode, calcLostFraction);
2511 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2516 /* Scan the text. */
2517 StringRef::iterator p = str.begin();
2518 interpretDecimal(p, str.end(), &D);
2520 /* Handle the quick cases. First the case of no significant digits,
2521 i.e. zero, and then exponents that are obviously too large or too
2522 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2523 definitely overflows if
2525 (exp - 1) * L >= maxExponent
2527 and definitely underflows to zero where
2529 (exp + 1) * L <= minExponent - precision
2531 With integer arithmetic the tightest bounds for L are
2533 93/28 < L < 196/59 [ numerator <= 256 ]
2534 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2537 // Test if we have a zero number allowing for strings with no null terminators
2538 // and zero decimals with non-zero exponents.
2540 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2541 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2542 // be at most one dot. On the other hand, if we have a zero with a non-zero
2543 // exponent, then we know that D.firstSigDigit will be non-numeric.
2544 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2548 /* Check whether the normalized exponent is high enough to overflow
2549 max during the log-rebasing in the max-exponent check below. */
2550 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2551 fs = handleOverflow(rounding_mode);
2553 /* If it wasn't, then it also wasn't high enough to overflow max
2554 during the log-rebasing in the min-exponent check. Check that it
2555 won't overflow min in either check, then perform the min-exponent
2557 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2558 (D.normalizedExponent + 1) * 28738 <=
2559 8651 * (semantics->minExponent - (int) semantics->precision)) {
2560 /* Underflow to zero and round. */
2561 category = fcNormal;
2563 fs = normalize(rounding_mode, lfLessThanHalf);
2565 /* We can finally safely perform the max-exponent check. */
2566 } else if ((D.normalizedExponent - 1) * 42039
2567 >= 12655 * semantics->maxExponent) {
2568 /* Overflow and round. */
2569 fs = handleOverflow(rounding_mode);
2571 integerPart *decSignificand;
2572 unsigned int partCount;
2574 /* A tight upper bound on number of bits required to hold an
2575 N-digit decimal integer is N * 196 / 59. Allocate enough space
2576 to hold the full significand, and an extra part required by
2578 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2579 partCount = partCountForBits(1 + 196 * partCount / 59);
2580 decSignificand = new integerPart[partCount + 1];
2583 /* Convert to binary efficiently - we do almost all multiplication
2584 in an integerPart. When this would overflow do we do a single
2585 bignum multiplication, and then revert again to multiplication
2586 in an integerPart. */
2588 integerPart decValue, val, multiplier;
2596 if (p == str.end()) {
2600 decValue = decDigitValue(*p++);
2601 assert(decValue < 10U && "Invalid character in significand");
2603 val = val * 10 + decValue;
2604 /* The maximum number that can be multiplied by ten with any
2605 digit added without overflowing an integerPart. */
2606 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2608 /* Multiply out the current part. */
2609 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2610 partCount, partCount + 1, false);
2612 /* If we used another part (likely but not guaranteed), increase
2614 if (decSignificand[partCount])
2616 } while (p <= D.lastSigDigit);
2618 category = fcNormal;
2619 fs = roundSignificandWithExponent(decSignificand, partCount,
2620 D.exponent, rounding_mode);
2622 delete [] decSignificand;
2629 APFloat::convertFromStringSpecials(StringRef str) {
2630 if (str.equals("inf") || str.equals("INFINITY")) {
2635 if (str.equals("-inf") || str.equals("-INFINITY")) {
2640 if (str.equals("nan") || str.equals("NaN")) {
2641 makeNaN(false, false);
2645 if (str.equals("-nan") || str.equals("-NaN")) {
2646 makeNaN(false, true);
2654 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2656 assert(!str.empty() && "Invalid string length");
2658 // Handle special cases.
2659 if (convertFromStringSpecials(str))
2662 /* Handle a leading minus sign. */
2663 StringRef::iterator p = str.begin();
2664 size_t slen = str.size();
2665 sign = *p == '-' ? 1 : 0;
2666 if (*p == '-' || *p == '+') {
2669 assert(slen && "String has no digits");
2672 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2673 assert(slen - 2 && "Invalid string");
2674 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2678 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2681 /* Write out a hexadecimal representation of the floating point value
2682 to DST, which must be of sufficient size, in the C99 form
2683 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2684 excluding the terminating NUL.
2686 If UPPERCASE, the output is in upper case, otherwise in lower case.
2688 HEXDIGITS digits appear altogether, rounding the value if
2689 necessary. If HEXDIGITS is 0, the minimal precision to display the
2690 number precisely is used instead. If nothing would appear after
2691 the decimal point it is suppressed.
2693 The decimal exponent is always printed and has at least one digit.
2694 Zero values display an exponent of zero. Infinities and NaNs
2695 appear as "infinity" or "nan" respectively.
2697 The above rules are as specified by C99. There is ambiguity about
2698 what the leading hexadecimal digit should be. This implementation
2699 uses whatever is necessary so that the exponent is displayed as
2700 stored. This implies the exponent will fall within the IEEE format
2701 range, and the leading hexadecimal digit will be 0 (for denormals),
2702 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2703 any other digits zero).
2706 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2707 bool upperCase, roundingMode rounding_mode) const
2717 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2718 dst += sizeof infinityL - 1;
2722 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2723 dst += sizeof NaNU - 1;
2728 *dst++ = upperCase ? 'X': 'x';
2730 if (hexDigits > 1) {
2732 memset (dst, '0', hexDigits - 1);
2733 dst += hexDigits - 1;
2735 *dst++ = upperCase ? 'P': 'p';
2740 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2746 return static_cast<unsigned int>(dst - p);
2749 /* Does the hard work of outputting the correctly rounded hexadecimal
2750 form of a normal floating point number with the specified number of
2751 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2752 digits necessary to print the value precisely is output. */
2754 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2756 roundingMode rounding_mode) const
2758 unsigned int count, valueBits, shift, partsCount, outputDigits;
2759 const char *hexDigitChars;
2760 const integerPart *significand;
2765 *dst++ = upperCase ? 'X': 'x';
2768 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2770 significand = significandParts();
2771 partsCount = partCount();
2773 /* +3 because the first digit only uses the single integer bit, so
2774 we have 3 virtual zero most-significant-bits. */
2775 valueBits = semantics->precision + 3;
2776 shift = integerPartWidth - valueBits % integerPartWidth;
2778 /* The natural number of digits required ignoring trailing
2779 insignificant zeroes. */
2780 outputDigits = (valueBits - significandLSB () + 3) / 4;
2782 /* hexDigits of zero means use the required number for the
2783 precision. Otherwise, see if we are truncating. If we are,
2784 find out if we need to round away from zero. */
2786 if (hexDigits < outputDigits) {
2787 /* We are dropping non-zero bits, so need to check how to round.
2788 "bits" is the number of dropped bits. */
2790 lostFraction fraction;
2792 bits = valueBits - hexDigits * 4;
2793 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2794 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2796 outputDigits = hexDigits;
2799 /* Write the digits consecutively, and start writing in the location
2800 of the hexadecimal point. We move the most significant digit
2801 left and add the hexadecimal point later. */
2804 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2806 while (outputDigits && count) {
2809 /* Put the most significant integerPartWidth bits in "part". */
2810 if (--count == partsCount)
2811 part = 0; /* An imaginary higher zero part. */
2813 part = significand[count] << shift;
2816 part |= significand[count - 1] >> (integerPartWidth - shift);
2818 /* Convert as much of "part" to hexdigits as we can. */
2819 unsigned int curDigits = integerPartWidth / 4;
2821 if (curDigits > outputDigits)
2822 curDigits = outputDigits;
2823 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2824 outputDigits -= curDigits;
2830 /* Note that hexDigitChars has a trailing '0'. */
2833 *q = hexDigitChars[hexDigitValue (*q) + 1];
2834 } while (*q == '0');
2837 /* Add trailing zeroes. */
2838 memset (dst, '0', outputDigits);
2839 dst += outputDigits;
2842 /* Move the most significant digit to before the point, and if there
2843 is something after the decimal point add it. This must come
2844 after rounding above. */
2851 /* Finally output the exponent. */
2852 *dst++ = upperCase ? 'P': 'p';
2854 return writeSignedDecimal (dst, exponent);
2857 hash_code llvm::hash_value(const APFloat &Arg) {
2858 if (!Arg.isFiniteNonZero())
2859 return hash_combine((uint8_t)Arg.category,
2860 // NaN has no sign, fix it at zero.
2861 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2862 Arg.semantics->precision);
2864 // Normal floats need their exponent and significand hashed.
2865 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2866 Arg.semantics->precision, Arg.exponent,
2868 Arg.significandParts(),
2869 Arg.significandParts() + Arg.partCount()));
2872 // Conversion from APFloat to/from host float/double. It may eventually be
2873 // possible to eliminate these and have everybody deal with APFloats, but that
2874 // will take a while. This approach will not easily extend to long double.
2875 // Current implementation requires integerPartWidth==64, which is correct at
2876 // the moment but could be made more general.
2878 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2879 // the actual IEEE respresentations. We compensate for that here.
2882 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2884 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2885 assert(partCount()==2);
2887 uint64_t myexponent, mysignificand;
2889 if (isFiniteNonZero()) {
2890 myexponent = exponent+16383; //bias
2891 mysignificand = significandParts()[0];
2892 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2893 myexponent = 0; // denormal
2894 } else if (category==fcZero) {
2897 } else if (category==fcInfinity) {
2898 myexponent = 0x7fff;
2899 mysignificand = 0x8000000000000000ULL;
2901 assert(category == fcNaN && "Unknown category");
2902 myexponent = 0x7fff;
2903 mysignificand = significandParts()[0];
2907 words[0] = mysignificand;
2908 words[1] = ((uint64_t)(sign & 1) << 15) |
2909 (myexponent & 0x7fffLL);
2910 return APInt(80, words);
2914 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2916 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2917 assert(partCount()==2);
2923 // Convert number to double. To avoid spurious underflows, we re-
2924 // normalize against the "double" minExponent first, and only *then*
2925 // truncate the mantissa. The result of that second conversion
2926 // may be inexact, but should never underflow.
2927 // Declare fltSemantics before APFloat that uses it (and
2928 // saves pointer to it) to ensure correct destruction order.
2929 fltSemantics extendedSemantics = *semantics;
2930 extendedSemantics.minExponent = IEEEdouble.minExponent;
2931 APFloat extended(*this);
2932 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2933 assert(fs == opOK && !losesInfo);
2936 APFloat u(extended);
2937 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2938 assert(fs == opOK || fs == opInexact);
2940 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2942 // If conversion was exact or resulted in a special case, we're done;
2943 // just set the second double to zero. Otherwise, re-convert back to
2944 // the extended format and compute the difference. This now should
2945 // convert exactly to double.
2946 if (u.isFiniteNonZero() && losesInfo) {
2947 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2948 assert(fs == opOK && !losesInfo);
2951 APFloat v(extended);
2952 v.subtract(u, rmNearestTiesToEven);
2953 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2954 assert(fs == opOK && !losesInfo);
2956 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2961 return APInt(128, words);
2965 APFloat::convertQuadrupleAPFloatToAPInt() const
2967 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2968 assert(partCount()==2);
2970 uint64_t myexponent, mysignificand, mysignificand2;
2972 if (isFiniteNonZero()) {
2973 myexponent = exponent+16383; //bias
2974 mysignificand = significandParts()[0];
2975 mysignificand2 = significandParts()[1];
2976 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2977 myexponent = 0; // denormal
2978 } else if (category==fcZero) {
2980 mysignificand = mysignificand2 = 0;
2981 } else if (category==fcInfinity) {
2982 myexponent = 0x7fff;
2983 mysignificand = mysignificand2 = 0;
2985 assert(category == fcNaN && "Unknown category!");
2986 myexponent = 0x7fff;
2987 mysignificand = significandParts()[0];
2988 mysignificand2 = significandParts()[1];
2992 words[0] = mysignificand;
2993 words[1] = ((uint64_t)(sign & 1) << 63) |
2994 ((myexponent & 0x7fff) << 48) |
2995 (mysignificand2 & 0xffffffffffffLL);
2997 return APInt(128, words);
3001 APFloat::convertDoubleAPFloatToAPInt() const
3003 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
3004 assert(partCount()==1);
3006 uint64_t myexponent, mysignificand;
3008 if (isFiniteNonZero()) {
3009 myexponent = exponent+1023; //bias
3010 mysignificand = *significandParts();
3011 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3012 myexponent = 0; // denormal
3013 } else if (category==fcZero) {
3016 } else if (category==fcInfinity) {
3020 assert(category == fcNaN && "Unknown category!");
3022 mysignificand = *significandParts();
3025 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3026 ((myexponent & 0x7ff) << 52) |
3027 (mysignificand & 0xfffffffffffffLL))));
3031 APFloat::convertFloatAPFloatToAPInt() const
3033 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3034 assert(partCount()==1);
3036 uint32_t myexponent, mysignificand;
3038 if (isFiniteNonZero()) {
3039 myexponent = exponent+127; //bias
3040 mysignificand = (uint32_t)*significandParts();
3041 if (myexponent == 1 && !(mysignificand & 0x800000))
3042 myexponent = 0; // denormal
3043 } else if (category==fcZero) {
3046 } else if (category==fcInfinity) {
3050 assert(category == fcNaN && "Unknown category!");
3052 mysignificand = (uint32_t)*significandParts();
3055 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3056 (mysignificand & 0x7fffff)));
3060 APFloat::convertHalfAPFloatToAPInt() const
3062 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3063 assert(partCount()==1);
3065 uint32_t myexponent, mysignificand;
3067 if (isFiniteNonZero()) {
3068 myexponent = exponent+15; //bias
3069 mysignificand = (uint32_t)*significandParts();
3070 if (myexponent == 1 && !(mysignificand & 0x400))
3071 myexponent = 0; // denormal
3072 } else if (category==fcZero) {
3075 } else if (category==fcInfinity) {
3079 assert(category == fcNaN && "Unknown category!");
3081 mysignificand = (uint32_t)*significandParts();
3084 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3085 (mysignificand & 0x3ff)));
3088 // This function creates an APInt that is just a bit map of the floating
3089 // point constant as it would appear in memory. It is not a conversion,
3090 // and treating the result as a normal integer is unlikely to be useful.
3093 APFloat::bitcastToAPInt() const
3095 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3096 return convertHalfAPFloatToAPInt();
3098 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3099 return convertFloatAPFloatToAPInt();
3101 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3102 return convertDoubleAPFloatToAPInt();
3104 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3105 return convertQuadrupleAPFloatToAPInt();
3107 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3108 return convertPPCDoubleDoubleAPFloatToAPInt();
3110 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3112 return convertF80LongDoubleAPFloatToAPInt();
3116 APFloat::convertToFloat() const
3118 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3119 "Float semantics are not IEEEsingle");
3120 APInt api = bitcastToAPInt();
3121 return api.bitsToFloat();
3125 APFloat::convertToDouble() const
3127 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3128 "Float semantics are not IEEEdouble");
3129 APInt api = bitcastToAPInt();
3130 return api.bitsToDouble();
3133 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3134 /// does not support these bit patterns:
3135 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3136 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3137 /// exponent = 0, integer bit 1 ("pseudodenormal")
3138 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3139 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3141 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3143 assert(api.getBitWidth()==80);
3144 uint64_t i1 = api.getRawData()[0];
3145 uint64_t i2 = api.getRawData()[1];
3146 uint64_t myexponent = (i2 & 0x7fff);
3147 uint64_t mysignificand = i1;
3149 initialize(&APFloat::x87DoubleExtended);
3150 assert(partCount()==2);
3152 sign = static_cast<unsigned int>(i2>>15);
3153 if (myexponent==0 && mysignificand==0) {
3154 // exponent, significand meaningless
3156 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3157 // exponent, significand meaningless
3158 category = fcInfinity;
3159 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3160 // exponent meaningless
3162 significandParts()[0] = mysignificand;
3163 significandParts()[1] = 0;
3165 category = fcNormal;
3166 exponent = myexponent - 16383;
3167 significandParts()[0] = mysignificand;
3168 significandParts()[1] = 0;
3169 if (myexponent==0) // denormal
3175 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3177 assert(api.getBitWidth()==128);
3178 uint64_t i1 = api.getRawData()[0];
3179 uint64_t i2 = api.getRawData()[1];
3183 // Get the first double and convert to our format.
3184 initFromDoubleAPInt(APInt(64, i1));
3185 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3186 assert(fs == opOK && !losesInfo);
3189 // Unless we have a special case, add in second double.
3190 if (isFiniteNonZero()) {
3191 APFloat v(IEEEdouble, APInt(64, i2));
3192 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3193 assert(fs == opOK && !losesInfo);
3196 add(v, rmNearestTiesToEven);
3201 APFloat::initFromQuadrupleAPInt(const APInt &api)
3203 assert(api.getBitWidth()==128);
3204 uint64_t i1 = api.getRawData()[0];
3205 uint64_t i2 = api.getRawData()[1];
3206 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3207 uint64_t mysignificand = i1;
3208 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3210 initialize(&APFloat::IEEEquad);
3211 assert(partCount()==2);
3213 sign = static_cast<unsigned int>(i2>>63);
3214 if (myexponent==0 &&
3215 (mysignificand==0 && mysignificand2==0)) {
3216 // exponent, significand meaningless
3218 } else if (myexponent==0x7fff &&
3219 (mysignificand==0 && mysignificand2==0)) {
3220 // exponent, significand meaningless
3221 category = fcInfinity;
3222 } else if (myexponent==0x7fff &&
3223 (mysignificand!=0 || mysignificand2 !=0)) {
3224 // exponent meaningless
3226 significandParts()[0] = mysignificand;
3227 significandParts()[1] = mysignificand2;
3229 category = fcNormal;
3230 exponent = myexponent - 16383;
3231 significandParts()[0] = mysignificand;
3232 significandParts()[1] = mysignificand2;
3233 if (myexponent==0) // denormal
3236 significandParts()[1] |= 0x1000000000000LL; // integer bit
3241 APFloat::initFromDoubleAPInt(const APInt &api)
3243 assert(api.getBitWidth()==64);
3244 uint64_t i = *api.getRawData();
3245 uint64_t myexponent = (i >> 52) & 0x7ff;
3246 uint64_t mysignificand = i & 0xfffffffffffffLL;
3248 initialize(&APFloat::IEEEdouble);
3249 assert(partCount()==1);
3251 sign = static_cast<unsigned int>(i>>63);
3252 if (myexponent==0 && mysignificand==0) {
3253 // exponent, significand meaningless
3255 } else if (myexponent==0x7ff && mysignificand==0) {
3256 // exponent, significand meaningless
3257 category = fcInfinity;
3258 } else if (myexponent==0x7ff && mysignificand!=0) {
3259 // exponent meaningless
3261 *significandParts() = mysignificand;
3263 category = fcNormal;
3264 exponent = myexponent - 1023;
3265 *significandParts() = mysignificand;
3266 if (myexponent==0) // denormal
3269 *significandParts() |= 0x10000000000000LL; // integer bit
3274 APFloat::initFromFloatAPInt(const APInt & api)
3276 assert(api.getBitWidth()==32);
3277 uint32_t i = (uint32_t)*api.getRawData();
3278 uint32_t myexponent = (i >> 23) & 0xff;
3279 uint32_t mysignificand = i & 0x7fffff;
3281 initialize(&APFloat::IEEEsingle);
3282 assert(partCount()==1);
3285 if (myexponent==0 && mysignificand==0) {
3286 // exponent, significand meaningless
3288 } else if (myexponent==0xff && mysignificand==0) {
3289 // exponent, significand meaningless
3290 category = fcInfinity;
3291 } else if (myexponent==0xff && mysignificand!=0) {
3292 // sign, exponent, significand meaningless
3294 *significandParts() = mysignificand;
3296 category = fcNormal;
3297 exponent = myexponent - 127; //bias
3298 *significandParts() = mysignificand;
3299 if (myexponent==0) // denormal
3302 *significandParts() |= 0x800000; // integer bit
3307 APFloat::initFromHalfAPInt(const APInt & api)
3309 assert(api.getBitWidth()==16);
3310 uint32_t i = (uint32_t)*api.getRawData();
3311 uint32_t myexponent = (i >> 10) & 0x1f;
3312 uint32_t mysignificand = i & 0x3ff;
3314 initialize(&APFloat::IEEEhalf);
3315 assert(partCount()==1);
3318 if (myexponent==0 && mysignificand==0) {
3319 // exponent, significand meaningless
3321 } else if (myexponent==0x1f && mysignificand==0) {
3322 // exponent, significand meaningless
3323 category = fcInfinity;
3324 } else if (myexponent==0x1f && mysignificand!=0) {
3325 // sign, exponent, significand meaningless
3327 *significandParts() = mysignificand;
3329 category = fcNormal;
3330 exponent = myexponent - 15; //bias
3331 *significandParts() = mysignificand;
3332 if (myexponent==0) // denormal
3335 *significandParts() |= 0x400; // integer bit
3339 /// Treat api as containing the bits of a floating point number. Currently
3340 /// we infer the floating point type from the size of the APInt. The
3341 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3342 /// when the size is anything else).
3344 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3346 if (Sem == &IEEEhalf)
3347 return initFromHalfAPInt(api);
3348 if (Sem == &IEEEsingle)
3349 return initFromFloatAPInt(api);
3350 if (Sem == &IEEEdouble)
3351 return initFromDoubleAPInt(api);
3352 if (Sem == &x87DoubleExtended)
3353 return initFromF80LongDoubleAPInt(api);
3354 if (Sem == &IEEEquad)
3355 return initFromQuadrupleAPInt(api);
3356 if (Sem == &PPCDoubleDouble)
3357 return initFromPPCDoubleDoubleAPInt(api);
3359 llvm_unreachable(nullptr);
3363 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3367 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3369 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3371 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3373 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3376 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3377 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3379 llvm_unreachable("Unknown floating bit width");
3383 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
3384 return Sem.sizeInBits;
3387 /// Make this number the largest magnitude normal number in the given
3389 void APFloat::makeLargest(bool Negative) {
3390 // We want (in interchange format):
3391 // sign = {Negative}
3393 // significand = 1..1
3394 category = fcNormal;
3396 exponent = semantics->maxExponent;
3398 // Use memset to set all but the highest integerPart to all ones.
3399 integerPart *significand = significandParts();
3400 unsigned PartCount = partCount();
3401 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3403 // Set the high integerPart especially setting all unused top bits for
3404 // internal consistency.
3405 const unsigned NumUnusedHighBits =
3406 PartCount*integerPartWidth - semantics->precision;
3407 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3408 ? (~integerPart(0) >> NumUnusedHighBits)
3412 /// Make this number the smallest magnitude denormal number in the given
3414 void APFloat::makeSmallest(bool Negative) {
3415 // We want (in interchange format):
3416 // sign = {Negative}
3418 // significand = 0..01
3419 category = fcNormal;
3421 exponent = semantics->minExponent;
3422 APInt::tcSet(significandParts(), 1, partCount());
3426 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3427 // We want (in interchange format):
3428 // sign = {Negative}
3430 // significand = 1..1
3431 APFloat Val(Sem, uninitialized);
3432 Val.makeLargest(Negative);
3436 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3437 // We want (in interchange format):
3438 // sign = {Negative}
3440 // significand = 0..01
3441 APFloat Val(Sem, uninitialized);
3442 Val.makeSmallest(Negative);
3446 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3447 APFloat Val(Sem, uninitialized);
3449 // We want (in interchange format):
3450 // sign = {Negative}
3452 // significand = 10..0
3454 Val.category = fcNormal;
3455 Val.zeroSignificand();
3456 Val.sign = Negative;
3457 Val.exponent = Sem.minExponent;
3458 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3459 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3464 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3465 initFromAPInt(&Sem, API);
3468 APFloat::APFloat(float f) {
3469 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3472 APFloat::APFloat(double d) {
3473 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3477 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3478 Buffer.append(Str.begin(), Str.end());
3481 /// Removes data from the given significand until it is no more
3482 /// precise than is required for the desired precision.
3483 void AdjustToPrecision(APInt &significand,
3484 int &exp, unsigned FormatPrecision) {
3485 unsigned bits = significand.getActiveBits();
3487 // 196/59 is a very slight overestimate of lg_2(10).
3488 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3490 if (bits <= bitsRequired) return;
3492 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3493 if (!tensRemovable) return;
3495 exp += tensRemovable;
3497 APInt divisor(significand.getBitWidth(), 1);
3498 APInt powten(significand.getBitWidth(), 10);
3500 if (tensRemovable & 1)
3502 tensRemovable >>= 1;
3503 if (!tensRemovable) break;
3507 significand = significand.udiv(divisor);
3509 // Truncate the significand down to its active bit count.
3510 significand = significand.trunc(significand.getActiveBits());
3514 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3515 int &exp, unsigned FormatPrecision) {
3516 unsigned N = buffer.size();
3517 if (N <= FormatPrecision) return;
3519 // The most significant figures are the last ones in the buffer.
3520 unsigned FirstSignificant = N - FormatPrecision;
3523 // FIXME: this probably shouldn't use 'round half up'.
3525 // Rounding down is just a truncation, except we also want to drop
3526 // trailing zeros from the new result.
3527 if (buffer[FirstSignificant - 1] < '5') {
3528 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3531 exp += FirstSignificant;
3532 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3536 // Rounding up requires a decimal add-with-carry. If we continue
3537 // the carry, the newly-introduced zeros will just be truncated.
3538 for (unsigned I = FirstSignificant; I != N; ++I) {
3539 if (buffer[I] == '9') {
3547 // If we carried through, we have exactly one digit of precision.
3548 if (FirstSignificant == N) {
3549 exp += FirstSignificant;
3551 buffer.push_back('1');
3555 exp += FirstSignificant;
3556 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3560 void APFloat::toString(SmallVectorImpl<char> &Str,
3561 unsigned FormatPrecision,
3562 unsigned FormatMaxPadding) const {
3566 return append(Str, "-Inf");
3568 return append(Str, "+Inf");
3570 case fcNaN: return append(Str, "NaN");
3576 if (!FormatMaxPadding)
3577 append(Str, "0.0E+0");
3589 // Decompose the number into an APInt and an exponent.
3590 int exp = exponent - ((int) semantics->precision - 1);
3591 APInt significand(semantics->precision,
3592 makeArrayRef(significandParts(),
3593 partCountForBits(semantics->precision)));
3595 // Set FormatPrecision if zero. We want to do this before we
3596 // truncate trailing zeros, as those are part of the precision.
3597 if (!FormatPrecision) {
3598 // We use enough digits so the number can be round-tripped back to an
3599 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3600 // Accurately" by Steele and White.
3601 // FIXME: Using a formula based purely on the precision is conservative;
3602 // we can print fewer digits depending on the actual value being printed.
3604 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3605 FormatPrecision = 2 + semantics->precision * 59 / 196;
3608 // Ignore trailing binary zeros.
3609 int trailingZeros = significand.countTrailingZeros();
3610 exp += trailingZeros;
3611 significand = significand.lshr(trailingZeros);
3613 // Change the exponent from 2^e to 10^e.
3616 } else if (exp > 0) {
3618 significand = significand.zext(semantics->precision + exp);
3619 significand <<= exp;
3621 } else { /* exp < 0 */
3624 // We transform this using the identity:
3625 // (N)(2^-e) == (N)(5^e)(10^-e)
3626 // This means we have to multiply N (the significand) by 5^e.
3627 // To avoid overflow, we have to operate on numbers large
3628 // enough to store N * 5^e:
3629 // log2(N * 5^e) == log2(N) + e * log2(5)
3630 // <= semantics->precision + e * 137 / 59
3631 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3633 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3635 // Multiply significand by 5^e.
3636 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3637 significand = significand.zext(precision);
3638 APInt five_to_the_i(precision, 5);
3640 if (texp & 1) significand *= five_to_the_i;
3644 five_to_the_i *= five_to_the_i;
3648 AdjustToPrecision(significand, exp, FormatPrecision);
3650 SmallVector<char, 256> buffer;
3653 unsigned precision = significand.getBitWidth();
3654 APInt ten(precision, 10);
3655 APInt digit(precision, 0);
3657 bool inTrail = true;
3658 while (significand != 0) {
3659 // digit <- significand % 10
3660 // significand <- significand / 10
3661 APInt::udivrem(significand, ten, significand, digit);
3663 unsigned d = digit.getZExtValue();
3665 // Drop trailing zeros.
3666 if (inTrail && !d) exp++;
3668 buffer.push_back((char) ('0' + d));
3673 assert(!buffer.empty() && "no characters in buffer!");
3675 // Drop down to FormatPrecision.
3676 // TODO: don't do more precise calculations above than are required.
3677 AdjustToPrecision(buffer, exp, FormatPrecision);
3679 unsigned NDigits = buffer.size();
3681 // Check whether we should use scientific notation.
3682 bool FormatScientific;
3683 if (!FormatMaxPadding)
3684 FormatScientific = true;
3689 // But we shouldn't make the number look more precise than it is.
3690 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3691 NDigits + (unsigned) exp > FormatPrecision);
3693 // Power of the most significant digit.
3694 int MSD = exp + (int) (NDigits - 1);
3697 FormatScientific = false;
3699 // 765e-5 == 0.00765
3701 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3706 // Scientific formatting is pretty straightforward.
3707 if (FormatScientific) {
3708 exp += (NDigits - 1);
3710 Str.push_back(buffer[NDigits-1]);
3715 for (unsigned I = 1; I != NDigits; ++I)
3716 Str.push_back(buffer[NDigits-1-I]);
3719 Str.push_back(exp >= 0 ? '+' : '-');
3720 if (exp < 0) exp = -exp;
3721 SmallVector<char, 6> expbuf;
3723 expbuf.push_back((char) ('0' + (exp % 10)));
3726 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3727 Str.push_back(expbuf[E-1-I]);
3731 // Non-scientific, positive exponents.
3733 for (unsigned I = 0; I != NDigits; ++I)
3734 Str.push_back(buffer[NDigits-1-I]);
3735 for (unsigned I = 0; I != (unsigned) exp; ++I)
3740 // Non-scientific, negative exponents.
3742 // The number of digits to the left of the decimal point.
3743 int NWholeDigits = exp + (int) NDigits;
3746 if (NWholeDigits > 0) {
3747 for (; I != (unsigned) NWholeDigits; ++I)
3748 Str.push_back(buffer[NDigits-I-1]);
3751 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3755 for (unsigned Z = 1; Z != NZeros; ++Z)
3759 for (; I != NDigits; ++I)
3760 Str.push_back(buffer[NDigits-I-1]);
3763 bool APFloat::getExactInverse(APFloat *inv) const {
3764 // Special floats and denormals have no exact inverse.
3765 if (!isFiniteNonZero())
3768 // Check that the number is a power of two by making sure that only the
3769 // integer bit is set in the significand.
3770 if (significandLSB() != semantics->precision - 1)
3774 APFloat reciprocal(*semantics, 1ULL);
3775 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3778 // Avoid multiplication with a denormal, it is not safe on all platforms and
3779 // may be slower than a normal division.
3780 if (reciprocal.isDenormal())
3783 assert(reciprocal.isFiniteNonZero() &&
3784 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3792 bool APFloat::isSignaling() const {
3796 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3797 // first bit of the trailing significand being 0.
3798 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3801 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3803 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3804 /// appropriate sign switching before/after the computation.
3805 APFloat::opStatus APFloat::next(bool nextDown) {
3806 // If we are performing nextDown, swap sign so we have -x.
3810 // Compute nextUp(x)
3811 opStatus result = opOK;
3813 // Handle each float category separately.
3816 // nextUp(+inf) = +inf
3819 // nextUp(-inf) = -getLargest()
3823 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3824 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3825 // change the payload.
3826 if (isSignaling()) {
3827 result = opInvalidOp;
3828 // For consistency, propagate the sign of the sNaN to the qNaN.
3829 makeNaN(false, isNegative(), nullptr);
3833 // nextUp(pm 0) = +getSmallest()
3834 makeSmallest(false);
3837 // nextUp(-getSmallest()) = -0
3838 if (isSmallest() && isNegative()) {
3839 APInt::tcSet(significandParts(), 0, partCount());
3845 // nextUp(getLargest()) == INFINITY
3846 if (isLargest() && !isNegative()) {
3847 APInt::tcSet(significandParts(), 0, partCount());
3848 category = fcInfinity;
3849 exponent = semantics->maxExponent + 1;
3853 // nextUp(normal) == normal + inc.
3855 // If we are negative, we need to decrement the significand.
3857 // We only cross a binade boundary that requires adjusting the exponent
3859 // 1. exponent != semantics->minExponent. This implies we are not in the
3860 // smallest binade or are dealing with denormals.
3861 // 2. Our significand excluding the integral bit is all zeros.
3862 bool WillCrossBinadeBoundary =
3863 exponent != semantics->minExponent && isSignificandAllZeros();
3865 // Decrement the significand.
3867 // We always do this since:
3868 // 1. If we are dealing with a non-binade decrement, by definition we
3869 // just decrement the significand.
3870 // 2. If we are dealing with a normal -> normal binade decrement, since
3871 // we have an explicit integral bit the fact that all bits but the
3872 // integral bit are zero implies that subtracting one will yield a
3873 // significand with 0 integral bit and 1 in all other spots. Thus we
3874 // must just adjust the exponent and set the integral bit to 1.
3875 // 3. If we are dealing with a normal -> denormal binade decrement,
3876 // since we set the integral bit to 0 when we represent denormals, we
3877 // just decrement the significand.
3878 integerPart *Parts = significandParts();
3879 APInt::tcDecrement(Parts, partCount());
3881 if (WillCrossBinadeBoundary) {
3882 // Our result is a normal number. Do the following:
3883 // 1. Set the integral bit to 1.
3884 // 2. Decrement the exponent.
3885 APInt::tcSetBit(Parts, semantics->precision - 1);
3889 // If we are positive, we need to increment the significand.
3891 // We only cross a binade boundary that requires adjusting the exponent if
3892 // the input is not a denormal and all of said input's significand bits
3893 // are set. If all of said conditions are true: clear the significand, set
3894 // the integral bit to 1, and increment the exponent. If we have a
3895 // denormal always increment since moving denormals and the numbers in the
3896 // smallest normal binade have the same exponent in our representation.
3897 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3899 if (WillCrossBinadeBoundary) {
3900 integerPart *Parts = significandParts();
3901 APInt::tcSet(Parts, 0, partCount());
3902 APInt::tcSetBit(Parts, semantics->precision - 1);
3903 assert(exponent != semantics->maxExponent &&
3904 "We can not increment an exponent beyond the maxExponent allowed"
3905 " by the given floating point semantics.");
3908 incrementSignificand();
3914 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3922 APFloat::makeInf(bool Negative) {
3923 category = fcInfinity;
3925 exponent = semantics->maxExponent + 1;
3926 APInt::tcSet(significandParts(), 0, partCount());
3930 APFloat::makeZero(bool Negative) {
3933 exponent = semantics->minExponent-1;
3934 APInt::tcSet(significandParts(), 0, partCount());
3937 APFloat llvm::scalbn(APFloat X, int Exp) {
3938 if (X.isInfinity() || X.isZero() || X.isNaN())
3941 auto MaxExp = X.getSemantics().maxExponent;
3942 auto MinExp = X.getSemantics().minExponent;
3943 if (Exp > (MaxExp - X.exponent))
3944 // Overflow saturates to infinity.
3945 return APFloat::getInf(X.getSemantics(), X.isNegative());
3946 if (Exp < (MinExp - X.exponent))
3947 // Underflow saturates to zero.
3948 return APFloat::getZero(X.getSemantics(), X.isNegative());