1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36 /* Assumed in hexadecimal significand parsing, and conversion to
37 hexadecimal strings. */
38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
42 /* Represents floating point arithmetic semantics. */
44 /* The largest E such that 2^E is representable; this matches the
45 definition of IEEE 754. */
46 APFloat::ExponentType maxExponent;
48 /* The smallest E such that 2^E is a normalized number; this
49 matches the definition of IEEE 754. */
50 APFloat::ExponentType minExponent;
52 /* Number of bits in the significand. This includes the integer
54 unsigned int precision;
57 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
58 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
59 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
60 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
61 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
62 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
64 /* The PowerPC format consists of two doubles. It does not map cleanly
65 onto the usual format above. It is approximated using twice the
66 mantissa bits. Note that for exponents near the double minimum,
67 we no longer can represent the full 106 mantissa bits, so those
68 will be treated as denormal numbers.
70 FIXME: While this approximation is equivalent to what GCC uses for
71 compile-time arithmetic on PPC double-double numbers, it is not able
72 to represent all possible values held by a PPC double-double number,
73 for example: (long double) 1.0 + (long double) 0x1p-106
74 Should this be replaced by a full emulation of PPC double-double? */
75 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
77 /* A tight upper bound on number of parts required to hold the value
80 power * 815 / (351 * integerPartWidth) + 1
82 However, whilst the result may require only this many parts,
83 because we are multiplying two values to get it, the
84 multiplication may require an extra part with the excess part
85 being zero (consider the trivial case of 1 * 1, tcFullMultiply
86 requires two parts to hold the single-part result). So we add an
87 extra one to guarantee enough space whilst multiplying. */
88 const unsigned int maxExponent = 16383;
89 const unsigned int maxPrecision = 113;
90 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
91 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
92 / (351 * integerPartWidth));
95 /* A bunch of private, handy routines. */
97 static inline unsigned int
98 partCountForBits(unsigned int bits)
100 return ((bits) + integerPartWidth - 1) / integerPartWidth;
103 /* Returns 0U-9U. Return values >= 10U are not digits. */
104 static inline unsigned int
105 decDigitValue(unsigned int c)
110 /* Return the value of a decimal exponent of the form
113 If the exponent overflows, returns a large exponent with the
116 readExponent(StringRef::iterator begin, StringRef::iterator end)
119 unsigned int absExponent;
120 const unsigned int overlargeExponent = 24000; /* FIXME. */
121 StringRef::iterator p = begin;
123 assert(p != end && "Exponent has no digits");
125 isNegative = (*p == '-');
126 if (*p == '-' || *p == '+') {
128 assert(p != end && "Exponent has no digits");
131 absExponent = decDigitValue(*p++);
132 assert(absExponent < 10U && "Invalid character in exponent");
134 for (; p != end; ++p) {
137 value = decDigitValue(*p);
138 assert(value < 10U && "Invalid character in exponent");
140 value += absExponent * 10;
141 if (absExponent >= overlargeExponent) {
142 absExponent = overlargeExponent;
143 p = end; /* outwit assert below */
149 assert(p == end && "Invalid exponent in exponent");
152 return -(int) absExponent;
154 return (int) absExponent;
157 /* This is ugly and needs cleaning up, but I don't immediately see
158 how whilst remaining safe. */
160 totalExponent(StringRef::iterator p, StringRef::iterator end,
161 int exponentAdjustment)
163 int unsignedExponent;
164 bool negative, overflow;
167 assert(p != end && "Exponent has no digits");
169 negative = *p == '-';
170 if (*p == '-' || *p == '+') {
172 assert(p != end && "Exponent has no digits");
175 unsignedExponent = 0;
177 for (; p != end; ++p) {
180 value = decDigitValue(*p);
181 assert(value < 10U && "Invalid character in exponent");
183 unsignedExponent = unsignedExponent * 10 + value;
184 if (unsignedExponent > 32767) {
190 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if (exponent > 32767 || exponent < -32768)
203 exponent = negative ? -32768: 32767;
208 static StringRef::iterator
209 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
210 StringRef::iterator *dot)
212 StringRef::iterator p = begin;
214 while (p != end && *p == '0')
217 if (p != end && *p == '.') {
220 assert(end - begin != 1 && "Significand has no digits");
222 while (p != end && *p == '0')
229 /* Given a normal decimal floating point number of the form
233 where the decimal point and exponent are optional, fill out the
234 structure D. Exponent is appropriate if the significand is
235 treated as an integer, and normalizedExponent if the significand
236 is taken to have the decimal point after a single leading
239 If the value is zero, V->firstSigDigit points to a non-digit, and
240 the return exponent is zero.
243 const char *firstSigDigit;
244 const char *lastSigDigit;
246 int normalizedExponent;
250 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
253 StringRef::iterator dot = end;
254 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
256 D->firstSigDigit = p;
258 D->normalizedExponent = 0;
260 for (; p != end; ++p) {
262 assert(dot == end && "String contains multiple dots");
267 if (decDigitValue(*p) >= 10U)
272 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
273 assert(p != begin && "Significand has no digits");
274 assert((dot == end || p - begin != 1) && "Significand has no digits");
276 /* p points to the first non-digit in the string */
277 D->exponent = readExponent(p + 1, end);
279 /* Implied decimal point? */
284 /* If number is all zeroes accept any exponent. */
285 if (p != D->firstSigDigit) {
286 /* Drop insignificant trailing zeroes. */
291 while (p != begin && *p == '0');
292 while (p != begin && *p == '.');
295 /* Adjust the exponents for any decimal point. */
296 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
297 D->normalizedExponent = (D->exponent +
298 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
299 - (dot > D->firstSigDigit && dot < p)));
305 /* Return the trailing fraction of a hexadecimal number.
306 DIGITVALUE is the first hex digit of the fraction, P points to
309 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
310 unsigned int digitValue)
312 unsigned int hexDigit;
314 /* If the first trailing digit isn't 0 or 8 we can work out the
315 fraction immediately. */
317 return lfMoreThanHalf;
318 else if (digitValue < 8 && digitValue > 0)
319 return lfLessThanHalf;
321 // Otherwise we need to find the first non-zero digit.
322 while (p != end && (*p == '0' || *p == '.'))
325 assert(p != end && "Invalid trailing hexadecimal fraction!");
327 hexDigit = hexDigitValue(*p);
329 /* If we ran off the end it is exactly zero or one-half, otherwise
332 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
334 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
337 /* Return the fraction lost were a bignum truncated losing the least
338 significant BITS bits. */
340 lostFractionThroughTruncation(const integerPart *parts,
341 unsigned int partCount,
346 lsb = APInt::tcLSB(parts, partCount);
348 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
350 return lfExactlyZero;
352 return lfExactlyHalf;
353 if (bits <= partCount * integerPartWidth &&
354 APInt::tcExtractBit(parts, bits - 1))
355 return lfMoreThanHalf;
357 return lfLessThanHalf;
360 /* Shift DST right BITS bits noting lost fraction. */
362 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
364 lostFraction lost_fraction;
366 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
368 APInt::tcShiftRight(dst, parts, bits);
370 return lost_fraction;
373 /* Combine the effect of two lost fractions. */
375 combineLostFractions(lostFraction moreSignificant,
376 lostFraction lessSignificant)
378 if (lessSignificant != lfExactlyZero) {
379 if (moreSignificant == lfExactlyZero)
380 moreSignificant = lfLessThanHalf;
381 else if (moreSignificant == lfExactlyHalf)
382 moreSignificant = lfMoreThanHalf;
385 return moreSignificant;
388 /* The error from the true value, in half-ulps, on multiplying two
389 floating point numbers, which differ from the value they
390 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
391 than the returned value.
393 See "How to Read Floating Point Numbers Accurately" by William D
396 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
398 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
400 if (HUerr1 + HUerr2 == 0)
401 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
403 return inexactMultiply + 2 * (HUerr1 + HUerr2);
406 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
407 when the least significant BITS are truncated. BITS cannot be
410 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
412 unsigned int count, partBits;
413 integerPart part, boundary;
418 count = bits / integerPartWidth;
419 partBits = bits % integerPartWidth + 1;
421 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
424 boundary = (integerPart) 1 << (partBits - 1);
429 if (part - boundary <= boundary - part)
430 return part - boundary;
432 return boundary - part;
435 if (part == boundary) {
438 return ~(integerPart) 0; /* A lot. */
441 } else if (part == boundary - 1) {
444 return ~(integerPart) 0; /* A lot. */
449 return ~(integerPart) 0; /* A lot. */
452 /* Place pow(5, power) in DST, and return the number of parts used.
453 DST must be at least one part larger than size of the answer. */
455 powerOf5(integerPart *dst, unsigned int power)
457 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
459 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
460 pow5s[0] = 78125 * 5;
462 unsigned int partsCount[16] = { 1 };
463 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
465 assert(power <= maxExponent);
470 *p1 = firstEightPowers[power & 7];
476 for (unsigned int n = 0; power; power >>= 1, n++) {
481 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
483 pc = partsCount[n - 1];
484 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
486 if (pow5[pc - 1] == 0)
494 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
496 if (p2[result - 1] == 0)
499 /* Now result is in p1 with partsCount parts and p2 is scratch
501 tmp = p1, p1 = p2, p2 = tmp;
508 APInt::tcAssign(dst, p1, result);
513 /* Zero at the end to avoid modular arithmetic when adding one; used
514 when rounding up during hexadecimal output. */
515 static const char hexDigitsLower[] = "0123456789abcdef0";
516 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
517 static const char infinityL[] = "infinity";
518 static const char infinityU[] = "INFINITY";
519 static const char NaNL[] = "nan";
520 static const char NaNU[] = "NAN";
522 /* Write out an integerPart in hexadecimal, starting with the most
523 significant nibble. Write out exactly COUNT hexdigits, return
526 partAsHex (char *dst, integerPart part, unsigned int count,
527 const char *hexDigitChars)
529 unsigned int result = count;
531 assert(count != 0 && count <= integerPartWidth / 4);
533 part >>= (integerPartWidth - 4 * count);
535 dst[count] = hexDigitChars[part & 0xf];
542 /* Write out an unsigned decimal integer. */
544 writeUnsignedDecimal (char *dst, unsigned int n)
560 /* Write out a signed decimal integer. */
562 writeSignedDecimal (char *dst, int value)
566 dst = writeUnsignedDecimal(dst, -(unsigned) value);
568 dst = writeUnsignedDecimal(dst, value);
575 APFloat::initialize(const fltSemantics *ourSemantics)
579 semantics = ourSemantics;
582 significand.parts = new integerPart[count];
586 APFloat::freeSignificand()
589 delete [] significand.parts;
593 APFloat::assign(const APFloat &rhs)
595 assert(semantics == rhs.semantics);
598 category = rhs.category;
599 exponent = rhs.exponent;
600 if (isFiniteNonZero() || category == fcNaN)
601 copySignificand(rhs);
605 APFloat::copySignificand(const APFloat &rhs)
607 assert(isFiniteNonZero() || category == fcNaN);
608 assert(rhs.partCount() >= partCount());
610 APInt::tcAssign(significandParts(), rhs.significandParts(),
614 /* Make this number a NaN, with an arbitrary but deterministic value
615 for the significand. If double or longer, this is a signalling NaN,
616 which may not be ideal. If float, this is QNaN(0). */
617 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
622 integerPart *significand = significandParts();
623 unsigned numParts = partCount();
625 // Set the significand bits to the fill.
626 if (!fill || fill->getNumWords() < numParts)
627 APInt::tcSet(significand, 0, numParts);
629 APInt::tcAssign(significand, fill->getRawData(),
630 std::min(fill->getNumWords(), numParts));
632 // Zero out the excess bits of the significand.
633 unsigned bitsToPreserve = semantics->precision - 1;
634 unsigned part = bitsToPreserve / 64;
635 bitsToPreserve %= 64;
636 significand[part] &= ((1ULL << bitsToPreserve) - 1);
637 for (part++; part != numParts; ++part)
638 significand[part] = 0;
641 unsigned QNaNBit = semantics->precision - 2;
644 // We always have to clear the QNaN bit to make it an SNaN.
645 APInt::tcClearBit(significand, QNaNBit);
647 // If there are no bits set in the payload, we have to set
648 // *something* to make it a NaN instead of an infinity;
649 // conventionally, this is the next bit down from the QNaN bit.
650 if (APInt::tcIsZero(significand, numParts))
651 APInt::tcSetBit(significand, QNaNBit - 1);
653 // We always have to set the QNaN bit to make it a QNaN.
654 APInt::tcSetBit(significand, QNaNBit);
657 // For x87 extended precision, we want to make a NaN, not a
658 // pseudo-NaN. Maybe we should expose the ability to make
660 if (semantics == &APFloat::x87DoubleExtended)
661 APInt::tcSetBit(significand, QNaNBit + 1);
664 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
666 APFloat value(Sem, uninitialized);
667 value.makeNaN(SNaN, Negative, fill);
672 APFloat::operator=(const APFloat &rhs)
675 if (semantics != rhs.semantics) {
677 initialize(rhs.semantics);
686 APFloat::operator=(APFloat &&rhs) {
689 semantics = rhs.semantics;
690 significand = rhs.significand;
691 exponent = rhs.exponent;
692 category = rhs.category;
695 rhs.semantics = &Bogus;
700 APFloat::isDenormal() const {
701 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
702 (APInt::tcExtractBit(significandParts(),
703 semantics->precision - 1) == 0);
707 APFloat::isSmallest() const {
708 // The smallest number by magnitude in our format will be the smallest
709 // denormal, i.e. the floating point number with exponent being minimum
710 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
711 return isFiniteNonZero() && exponent == semantics->minExponent &&
712 significandMSB() == 0;
715 bool APFloat::isSignificandAllOnes() const {
716 // Test if the significand excluding the integral bit is all ones. This allows
717 // us to test for binade boundaries.
718 const integerPart *Parts = significandParts();
719 const unsigned PartCount = partCount();
720 for (unsigned i = 0; i < PartCount - 1; i++)
724 // Set the unused high bits to all ones when we compare.
725 const unsigned NumHighBits =
726 PartCount*integerPartWidth - semantics->precision + 1;
727 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
728 "fill than integerPartWidth");
729 const integerPart HighBitFill =
730 ~integerPart(0) << (integerPartWidth - NumHighBits);
731 if (~(Parts[PartCount - 1] | HighBitFill))
737 bool APFloat::isSignificandAllZeros() const {
738 // Test if the significand excluding the integral bit is all zeros. This
739 // allows us to test for binade boundaries.
740 const integerPart *Parts = significandParts();
741 const unsigned PartCount = partCount();
743 for (unsigned i = 0; i < PartCount - 1; i++)
747 const unsigned NumHighBits =
748 PartCount*integerPartWidth - semantics->precision + 1;
749 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
750 "clear than integerPartWidth");
751 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
753 if (Parts[PartCount - 1] & HighBitMask)
760 APFloat::isLargest() const {
761 // The largest number by magnitude in our format will be the floating point
762 // number with maximum exponent and with significand that is all ones.
763 return isFiniteNonZero() && exponent == semantics->maxExponent
764 && isSignificandAllOnes();
768 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
771 if (semantics != rhs.semantics ||
772 category != rhs.category ||
775 if (category==fcZero || category==fcInfinity)
777 else if (isFiniteNonZero() && exponent!=rhs.exponent)
781 const integerPart* p=significandParts();
782 const integerPart* q=rhs.significandParts();
783 for (; i>0; i--, p++, q++) {
791 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
792 initialize(&ourSemantics);
796 exponent = ourSemantics.precision - 1;
797 significandParts()[0] = value;
798 normalize(rmNearestTiesToEven, lfExactlyZero);
801 APFloat::APFloat(const fltSemantics &ourSemantics) {
802 initialize(&ourSemantics);
807 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
808 // Allocates storage if necessary but does not initialize it.
809 initialize(&ourSemantics);
812 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
813 initialize(&ourSemantics);
814 convertFromString(text, rmNearestTiesToEven);
817 APFloat::APFloat(const APFloat &rhs) {
818 initialize(rhs.semantics);
822 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
823 *this = std::move(rhs);
831 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
832 void APFloat::Profile(FoldingSetNodeID& ID) const {
833 ID.Add(bitcastToAPInt());
837 APFloat::partCount() const
839 return partCountForBits(semantics->precision + 1);
843 APFloat::semanticsPrecision(const fltSemantics &semantics)
845 return semantics.precision;
849 APFloat::significandParts() const
851 return const_cast<APFloat *>(this)->significandParts();
855 APFloat::significandParts()
858 return significand.parts;
860 return &significand.part;
864 APFloat::zeroSignificand()
866 APInt::tcSet(significandParts(), 0, partCount());
869 /* Increment an fcNormal floating point number's significand. */
871 APFloat::incrementSignificand()
875 carry = APInt::tcIncrement(significandParts(), partCount());
877 /* Our callers should never cause us to overflow. */
882 /* Add the significand of the RHS. Returns the carry flag. */
884 APFloat::addSignificand(const APFloat &rhs)
888 parts = significandParts();
890 assert(semantics == rhs.semantics);
891 assert(exponent == rhs.exponent);
893 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
896 /* Subtract the significand of the RHS with a borrow flag. Returns
899 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
903 parts = significandParts();
905 assert(semantics == rhs.semantics);
906 assert(exponent == rhs.exponent);
908 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
912 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
913 on to the full-precision result of the multiplication. Returns the
916 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
918 unsigned int omsb; // One, not zero, based MSB.
919 unsigned int partsCount, newPartsCount, precision;
920 integerPart *lhsSignificand;
921 integerPart scratch[4];
922 integerPart *fullSignificand;
923 lostFraction lost_fraction;
926 assert(semantics == rhs.semantics);
928 precision = semantics->precision;
930 // Allocate space for twice as many bits as the original significand, plus one
931 // extra bit for the addition to overflow into.
932 newPartsCount = partCountForBits(precision * 2 + 1);
934 if (newPartsCount > 4)
935 fullSignificand = new integerPart[newPartsCount];
937 fullSignificand = scratch;
939 lhsSignificand = significandParts();
940 partsCount = partCount();
942 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
943 rhs.significandParts(), partsCount, partsCount);
945 lost_fraction = lfExactlyZero;
946 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
947 exponent += rhs.exponent;
949 // Assume the operands involved in the multiplication are single-precision
950 // FP, and the two multiplicants are:
951 // *this = a23 . a22 ... a0 * 2^e1
952 // rhs = b23 . b22 ... b0 * 2^e2
953 // the result of multiplication is:
954 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
955 // Note that there are three significant bits at the left-hand side of the
956 // radix point: two for the multiplication, and an overflow bit for the
957 // addition (that will always be zero at this point). Move the radix point
958 // toward left by two bits, and adjust exponent accordingly.
961 if (addend && addend->isNonZero()) {
962 // The intermediate result of the multiplication has "2 * precision"
963 // signicant bit; adjust the addend to be consistent with mul result.
965 Significand savedSignificand = significand;
966 const fltSemantics *savedSemantics = semantics;
967 fltSemantics extendedSemantics;
969 unsigned int extendedPrecision;
971 // Normalize our MSB to one below the top bit to allow for overflow.
972 extendedPrecision = 2 * precision + 1;
973 if (omsb != extendedPrecision - 1) {
974 assert(extendedPrecision > omsb);
975 APInt::tcShiftLeft(fullSignificand, newPartsCount,
976 (extendedPrecision - 1) - omsb);
977 exponent -= (extendedPrecision - 1) - omsb;
980 /* Create new semantics. */
981 extendedSemantics = *semantics;
982 extendedSemantics.precision = extendedPrecision;
984 if (newPartsCount == 1)
985 significand.part = fullSignificand[0];
987 significand.parts = fullSignificand;
988 semantics = &extendedSemantics;
990 APFloat extendedAddend(*addend);
991 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
992 assert(status == opOK);
995 // Shift the significand of the addend right by one bit. This guarantees
996 // that the high bit of the significand is zero (same as fullSignificand),
997 // so the addition will overflow (if it does overflow at all) into the top bit.
998 lost_fraction = extendedAddend.shiftSignificandRight(1);
999 assert(lost_fraction == lfExactlyZero &&
1000 "Lost precision while shifting addend for fused-multiply-add.");
1002 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1004 /* Restore our state. */
1005 if (newPartsCount == 1)
1006 fullSignificand[0] = significand.part;
1007 significand = savedSignificand;
1008 semantics = savedSemantics;
1010 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1013 // Convert the result having "2 * precision" significant-bits back to the one
1014 // having "precision" significant-bits. First, move the radix point from
1015 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1016 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1017 exponent -= precision + 1;
1019 // In case MSB resides at the left-hand side of radix point, shift the
1020 // mantissa right by some amount to make sure the MSB reside right before
1021 // the radix point (i.e. "MSB . rest-significant-bits").
1023 // Note that the result is not normalized when "omsb < precision". So, the
1024 // caller needs to call APFloat::normalize() if normalized value is expected.
1025 if (omsb > precision) {
1026 unsigned int bits, significantParts;
1029 bits = omsb - precision;
1030 significantParts = partCountForBits(omsb);
1031 lf = shiftRight(fullSignificand, significantParts, bits);
1032 lost_fraction = combineLostFractions(lf, lost_fraction);
1036 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1038 if (newPartsCount > 4)
1039 delete [] fullSignificand;
1041 return lost_fraction;
1044 /* Multiply the significands of LHS and RHS to DST. */
1046 APFloat::divideSignificand(const APFloat &rhs)
1048 unsigned int bit, i, partsCount;
1049 const integerPart *rhsSignificand;
1050 integerPart *lhsSignificand, *dividend, *divisor;
1051 integerPart scratch[4];
1052 lostFraction lost_fraction;
1054 assert(semantics == rhs.semantics);
1056 lhsSignificand = significandParts();
1057 rhsSignificand = rhs.significandParts();
1058 partsCount = partCount();
1061 dividend = new integerPart[partsCount * 2];
1065 divisor = dividend + partsCount;
1067 /* Copy the dividend and divisor as they will be modified in-place. */
1068 for (i = 0; i < partsCount; i++) {
1069 dividend[i] = lhsSignificand[i];
1070 divisor[i] = rhsSignificand[i];
1071 lhsSignificand[i] = 0;
1074 exponent -= rhs.exponent;
1076 unsigned int precision = semantics->precision;
1078 /* Normalize the divisor. */
1079 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1082 APInt::tcShiftLeft(divisor, partsCount, bit);
1085 /* Normalize the dividend. */
1086 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1089 APInt::tcShiftLeft(dividend, partsCount, bit);
1092 /* Ensure the dividend >= divisor initially for the loop below.
1093 Incidentally, this means that the division loop below is
1094 guaranteed to set the integer bit to one. */
1095 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1097 APInt::tcShiftLeft(dividend, partsCount, 1);
1098 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1101 /* Long division. */
1102 for (bit = precision; bit; bit -= 1) {
1103 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1104 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1105 APInt::tcSetBit(lhsSignificand, bit - 1);
1108 APInt::tcShiftLeft(dividend, partsCount, 1);
1111 /* Figure out the lost fraction. */
1112 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1115 lost_fraction = lfMoreThanHalf;
1117 lost_fraction = lfExactlyHalf;
1118 else if (APInt::tcIsZero(dividend, partsCount))
1119 lost_fraction = lfExactlyZero;
1121 lost_fraction = lfLessThanHalf;
1126 return lost_fraction;
1130 APFloat::significandMSB() const
1132 return APInt::tcMSB(significandParts(), partCount());
1136 APFloat::significandLSB() const
1138 return APInt::tcLSB(significandParts(), partCount());
1141 /* Note that a zero result is NOT normalized to fcZero. */
1143 APFloat::shiftSignificandRight(unsigned int bits)
1145 /* Our exponent should not overflow. */
1146 assert((ExponentType) (exponent + bits) >= exponent);
1150 return shiftRight(significandParts(), partCount(), bits);
1153 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1155 APFloat::shiftSignificandLeft(unsigned int bits)
1157 assert(bits < semantics->precision);
1160 unsigned int partsCount = partCount();
1162 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1165 assert(!APInt::tcIsZero(significandParts(), partsCount));
1170 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1174 assert(semantics == rhs.semantics);
1175 assert(isFiniteNonZero());
1176 assert(rhs.isFiniteNonZero());
1178 compare = exponent - rhs.exponent;
1180 /* If exponents are equal, do an unsigned bignum comparison of the
1183 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1187 return cmpGreaterThan;
1188 else if (compare < 0)
1194 /* Handle overflow. Sign is preserved. We either become infinity or
1195 the largest finite number. */
1197 APFloat::handleOverflow(roundingMode rounding_mode)
1200 if (rounding_mode == rmNearestTiesToEven ||
1201 rounding_mode == rmNearestTiesToAway ||
1202 (rounding_mode == rmTowardPositive && !sign) ||
1203 (rounding_mode == rmTowardNegative && sign)) {
1204 category = fcInfinity;
1205 return (opStatus) (opOverflow | opInexact);
1208 /* Otherwise we become the largest finite number. */
1209 category = fcNormal;
1210 exponent = semantics->maxExponent;
1211 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1212 semantics->precision);
1217 /* Returns TRUE if, when truncating the current number, with BIT the
1218 new LSB, with the given lost fraction and rounding mode, the result
1219 would need to be rounded away from zero (i.e., by increasing the
1220 signficand). This routine must work for fcZero of both signs, and
1221 fcNormal numbers. */
1223 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1224 lostFraction lost_fraction,
1225 unsigned int bit) const
1227 /* NaNs and infinities should not have lost fractions. */
1228 assert(isFiniteNonZero() || category == fcZero);
1230 /* Current callers never pass this so we don't handle it. */
1231 assert(lost_fraction != lfExactlyZero);
1233 switch (rounding_mode) {
1234 case rmNearestTiesToAway:
1235 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1237 case rmNearestTiesToEven:
1238 if (lost_fraction == lfMoreThanHalf)
1241 /* Our zeroes don't have a significand to test. */
1242 if (lost_fraction == lfExactlyHalf && category != fcZero)
1243 return APInt::tcExtractBit(significandParts(), bit);
1250 case rmTowardPositive:
1251 return sign == false;
1253 case rmTowardNegative:
1254 return sign == true;
1256 llvm_unreachable("Invalid rounding mode found");
1260 APFloat::normalize(roundingMode rounding_mode,
1261 lostFraction lost_fraction)
1263 unsigned int omsb; /* One, not zero, based MSB. */
1266 if (!isFiniteNonZero())
1269 /* Before rounding normalize the exponent of fcNormal numbers. */
1270 omsb = significandMSB() + 1;
1273 /* OMSB is numbered from 1. We want to place it in the integer
1274 bit numbered PRECISION if possible, with a compensating change in
1276 exponentChange = omsb - semantics->precision;
1278 /* If the resulting exponent is too high, overflow according to
1279 the rounding mode. */
1280 if (exponent + exponentChange > semantics->maxExponent)
1281 return handleOverflow(rounding_mode);
1283 /* Subnormal numbers have exponent minExponent, and their MSB
1284 is forced based on that. */
1285 if (exponent + exponentChange < semantics->minExponent)
1286 exponentChange = semantics->minExponent - exponent;
1288 /* Shifting left is easy as we don't lose precision. */
1289 if (exponentChange < 0) {
1290 assert(lost_fraction == lfExactlyZero);
1292 shiftSignificandLeft(-exponentChange);
1297 if (exponentChange > 0) {
1300 /* Shift right and capture any new lost fraction. */
1301 lf = shiftSignificandRight(exponentChange);
1303 lost_fraction = combineLostFractions(lf, lost_fraction);
1305 /* Keep OMSB up-to-date. */
1306 if (omsb > (unsigned) exponentChange)
1307 omsb -= exponentChange;
1313 /* Now round the number according to rounding_mode given the lost
1316 /* As specified in IEEE 754, since we do not trap we do not report
1317 underflow for exact results. */
1318 if (lost_fraction == lfExactlyZero) {
1319 /* Canonicalize zeroes. */
1326 /* Increment the significand if we're rounding away from zero. */
1327 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1329 exponent = semantics->minExponent;
1331 incrementSignificand();
1332 omsb = significandMSB() + 1;
1334 /* Did the significand increment overflow? */
1335 if (omsb == (unsigned) semantics->precision + 1) {
1336 /* Renormalize by incrementing the exponent and shifting our
1337 significand right one. However if we already have the
1338 maximum exponent we overflow to infinity. */
1339 if (exponent == semantics->maxExponent) {
1340 category = fcInfinity;
1342 return (opStatus) (opOverflow | opInexact);
1345 shiftSignificandRight(1);
1351 /* The normal case - we were and are not denormal, and any
1352 significand increment above didn't overflow. */
1353 if (omsb == semantics->precision)
1356 /* We have a non-zero denormal. */
1357 assert(omsb < semantics->precision);
1359 /* Canonicalize zeroes. */
1363 /* The fcZero case is a denormal that underflowed to zero. */
1364 return (opStatus) (opUnderflow | opInexact);
1368 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1370 switch (PackCategoriesIntoKey(category, rhs.category)) {
1372 llvm_unreachable(nullptr);
1374 case PackCategoriesIntoKey(fcNaN, fcZero):
1375 case PackCategoriesIntoKey(fcNaN, fcNormal):
1376 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1377 case PackCategoriesIntoKey(fcNaN, fcNaN):
1378 case PackCategoriesIntoKey(fcNormal, fcZero):
1379 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1380 case PackCategoriesIntoKey(fcInfinity, fcZero):
1383 case PackCategoriesIntoKey(fcZero, fcNaN):
1384 case PackCategoriesIntoKey(fcNormal, fcNaN):
1385 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1386 // We need to be sure to flip the sign here for subtraction because we
1387 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1388 sign = rhs.sign ^ subtract;
1390 copySignificand(rhs);
1393 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1394 case PackCategoriesIntoKey(fcZero, fcInfinity):
1395 category = fcInfinity;
1396 sign = rhs.sign ^ subtract;
1399 case PackCategoriesIntoKey(fcZero, fcNormal):
1401 sign = rhs.sign ^ subtract;
1404 case PackCategoriesIntoKey(fcZero, fcZero):
1405 /* Sign depends on rounding mode; handled by caller. */
1408 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1409 /* Differently signed infinities can only be validly
1411 if (((sign ^ rhs.sign)!=0) != subtract) {
1418 case PackCategoriesIntoKey(fcNormal, fcNormal):
1423 /* Add or subtract two normal numbers. */
1425 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1428 lostFraction lost_fraction;
1431 /* Determine if the operation on the absolute values is effectively
1432 an addition or subtraction. */
1433 subtract ^= (sign ^ rhs.sign) ? true : false;
1435 /* Are we bigger exponent-wise than the RHS? */
1436 bits = exponent - rhs.exponent;
1438 /* Subtraction is more subtle than one might naively expect. */
1440 APFloat temp_rhs(rhs);
1444 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1445 lost_fraction = lfExactlyZero;
1446 } else if (bits > 0) {
1447 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1448 shiftSignificandLeft(1);
1451 lost_fraction = shiftSignificandRight(-bits - 1);
1452 temp_rhs.shiftSignificandLeft(1);
1457 carry = temp_rhs.subtractSignificand
1458 (*this, lost_fraction != lfExactlyZero);
1459 copySignificand(temp_rhs);
1462 carry = subtractSignificand
1463 (temp_rhs, lost_fraction != lfExactlyZero);
1466 /* Invert the lost fraction - it was on the RHS and
1468 if (lost_fraction == lfLessThanHalf)
1469 lost_fraction = lfMoreThanHalf;
1470 else if (lost_fraction == lfMoreThanHalf)
1471 lost_fraction = lfLessThanHalf;
1473 /* The code above is intended to ensure that no borrow is
1479 APFloat temp_rhs(rhs);
1481 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1482 carry = addSignificand(temp_rhs);
1484 lost_fraction = shiftSignificandRight(-bits);
1485 carry = addSignificand(rhs);
1488 /* We have a guard bit; generating a carry cannot happen. */
1493 return lost_fraction;
1497 APFloat::multiplySpecials(const APFloat &rhs)
1499 switch (PackCategoriesIntoKey(category, rhs.category)) {
1501 llvm_unreachable(nullptr);
1503 case PackCategoriesIntoKey(fcNaN, fcZero):
1504 case PackCategoriesIntoKey(fcNaN, fcNormal):
1505 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1506 case PackCategoriesIntoKey(fcNaN, fcNaN):
1510 case PackCategoriesIntoKey(fcZero, fcNaN):
1511 case PackCategoriesIntoKey(fcNormal, fcNaN):
1512 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1515 copySignificand(rhs);
1518 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1519 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1520 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1521 category = fcInfinity;
1524 case PackCategoriesIntoKey(fcZero, fcNormal):
1525 case PackCategoriesIntoKey(fcNormal, fcZero):
1526 case PackCategoriesIntoKey(fcZero, fcZero):
1530 case PackCategoriesIntoKey(fcZero, fcInfinity):
1531 case PackCategoriesIntoKey(fcInfinity, fcZero):
1535 case PackCategoriesIntoKey(fcNormal, fcNormal):
1541 APFloat::divideSpecials(const APFloat &rhs)
1543 switch (PackCategoriesIntoKey(category, rhs.category)) {
1545 llvm_unreachable(nullptr);
1547 case PackCategoriesIntoKey(fcZero, fcNaN):
1548 case PackCategoriesIntoKey(fcNormal, fcNaN):
1549 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1551 copySignificand(rhs);
1552 case PackCategoriesIntoKey(fcNaN, fcZero):
1553 case PackCategoriesIntoKey(fcNaN, fcNormal):
1554 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1555 case PackCategoriesIntoKey(fcNaN, fcNaN):
1557 case PackCategoriesIntoKey(fcInfinity, fcZero):
1558 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1559 case PackCategoriesIntoKey(fcZero, fcInfinity):
1560 case PackCategoriesIntoKey(fcZero, fcNormal):
1563 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1567 case PackCategoriesIntoKey(fcNormal, fcZero):
1568 category = fcInfinity;
1571 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1572 case PackCategoriesIntoKey(fcZero, fcZero):
1576 case PackCategoriesIntoKey(fcNormal, fcNormal):
1582 APFloat::modSpecials(const APFloat &rhs)
1584 switch (PackCategoriesIntoKey(category, rhs.category)) {
1586 llvm_unreachable(nullptr);
1588 case PackCategoriesIntoKey(fcNaN, fcZero):
1589 case PackCategoriesIntoKey(fcNaN, fcNormal):
1590 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1591 case PackCategoriesIntoKey(fcNaN, fcNaN):
1592 case PackCategoriesIntoKey(fcZero, fcInfinity):
1593 case PackCategoriesIntoKey(fcZero, fcNormal):
1594 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1597 case PackCategoriesIntoKey(fcZero, fcNaN):
1598 case PackCategoriesIntoKey(fcNormal, fcNaN):
1599 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1602 copySignificand(rhs);
1605 case PackCategoriesIntoKey(fcNormal, fcZero):
1606 case PackCategoriesIntoKey(fcInfinity, fcZero):
1607 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1608 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1609 case PackCategoriesIntoKey(fcZero, fcZero):
1613 case PackCategoriesIntoKey(fcNormal, fcNormal):
1620 APFloat::changeSign()
1622 /* Look mummy, this one's easy. */
1627 APFloat::clearSign()
1629 /* So is this one. */
1634 APFloat::copySign(const APFloat &rhs)
1640 /* Normalized addition or subtraction. */
1642 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1647 fs = addOrSubtractSpecials(rhs, subtract);
1649 /* This return code means it was not a simple case. */
1650 if (fs == opDivByZero) {
1651 lostFraction lost_fraction;
1653 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1654 fs = normalize(rounding_mode, lost_fraction);
1656 /* Can only be zero if we lost no fraction. */
1657 assert(category != fcZero || lost_fraction == lfExactlyZero);
1660 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1661 positive zero unless rounding to minus infinity, except that
1662 adding two like-signed zeroes gives that zero. */
1663 if (category == fcZero) {
1664 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1665 sign = (rounding_mode == rmTowardNegative);
1671 /* Normalized addition. */
1673 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1675 return addOrSubtract(rhs, rounding_mode, false);
1678 /* Normalized subtraction. */
1680 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1682 return addOrSubtract(rhs, rounding_mode, true);
1685 /* Normalized multiply. */
1687 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1692 fs = multiplySpecials(rhs);
1694 if (isFiniteNonZero()) {
1695 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1696 fs = normalize(rounding_mode, lost_fraction);
1697 if (lost_fraction != lfExactlyZero)
1698 fs = (opStatus) (fs | opInexact);
1704 /* Normalized divide. */
1706 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1711 fs = divideSpecials(rhs);
1713 if (isFiniteNonZero()) {
1714 lostFraction lost_fraction = divideSignificand(rhs);
1715 fs = normalize(rounding_mode, lost_fraction);
1716 if (lost_fraction != lfExactlyZero)
1717 fs = (opStatus) (fs | opInexact);
1723 /* Normalized remainder. This is not currently correct in all cases. */
1725 APFloat::remainder(const APFloat &rhs)
1729 unsigned int origSign = sign;
1731 fs = V.divide(rhs, rmNearestTiesToEven);
1732 if (fs == opDivByZero)
1735 int parts = partCount();
1736 integerPart *x = new integerPart[parts];
1738 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1739 rmNearestTiesToEven, &ignored);
1740 if (fs==opInvalidOp)
1743 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1744 rmNearestTiesToEven);
1745 assert(fs==opOK); // should always work
1747 fs = V.multiply(rhs, rmNearestTiesToEven);
1748 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1750 fs = subtract(V, rmNearestTiesToEven);
1751 assert(fs==opOK || fs==opInexact); // likewise
1754 sign = origSign; // IEEE754 requires this
1759 /* Normalized llvm frem (C fmod).
1760 This is not currently correct in all cases. */
1762 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1765 fs = modSpecials(rhs);
1767 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1769 unsigned int origSign = sign;
1771 fs = V.divide(rhs, rmNearestTiesToEven);
1772 if (fs == opDivByZero)
1775 int parts = partCount();
1776 integerPart *x = new integerPart[parts];
1778 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1779 rmTowardZero, &ignored);
1780 if (fs==opInvalidOp)
1783 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1784 rmNearestTiesToEven);
1785 assert(fs==opOK); // should always work
1787 fs = V.multiply(rhs, rounding_mode);
1788 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1790 fs = subtract(V, rounding_mode);
1791 assert(fs==opOK || fs==opInexact); // likewise
1794 sign = origSign; // IEEE754 requires this
1800 /* Normalized fused-multiply-add. */
1802 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1803 const APFloat &addend,
1804 roundingMode rounding_mode)
1808 /* Post-multiplication sign, before addition. */
1809 sign ^= multiplicand.sign;
1811 /* If and only if all arguments are normal do we need to do an
1812 extended-precision calculation. */
1813 if (isFiniteNonZero() &&
1814 multiplicand.isFiniteNonZero() &&
1815 addend.isFinite()) {
1816 lostFraction lost_fraction;
1818 lost_fraction = multiplySignificand(multiplicand, &addend);
1819 fs = normalize(rounding_mode, lost_fraction);
1820 if (lost_fraction != lfExactlyZero)
1821 fs = (opStatus) (fs | opInexact);
1823 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1824 positive zero unless rounding to minus infinity, except that
1825 adding two like-signed zeroes gives that zero. */
1826 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1827 sign = (rounding_mode == rmTowardNegative);
1829 fs = multiplySpecials(multiplicand);
1831 /* FS can only be opOK or opInvalidOp. There is no more work
1832 to do in the latter case. The IEEE-754R standard says it is
1833 implementation-defined in this case whether, if ADDEND is a
1834 quiet NaN, we raise invalid op; this implementation does so.
1836 If we need to do the addition we can do so with normal
1839 fs = addOrSubtract(addend, rounding_mode, false);
1845 /* Rounding-mode corrrect round to integral value. */
1846 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1849 // If the exponent is large enough, we know that this value is already
1850 // integral, and the arithmetic below would potentially cause it to saturate
1851 // to +/-Inf. Bail out early instead.
1852 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1855 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1856 // precision of our format, and then subtract it back off again. The choice
1857 // of rounding modes for the addition/subtraction determines the rounding mode
1858 // for our integral rounding as well.
1859 // NOTE: When the input value is negative, we do subtraction followed by
1860 // addition instead.
1861 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1862 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1863 APFloat MagicConstant(*semantics);
1864 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1865 rmNearestTiesToEven);
1866 MagicConstant.copySign(*this);
1871 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1872 bool inputSign = isNegative();
1874 fs = add(MagicConstant, rounding_mode);
1875 if (fs != opOK && fs != opInexact)
1878 fs = subtract(MagicConstant, rounding_mode);
1880 // Restore the input sign.
1881 if (inputSign != isNegative())
1888 /* Comparison requires normalized numbers. */
1890 APFloat::compare(const APFloat &rhs) const
1894 assert(semantics == rhs.semantics);
1896 switch (PackCategoriesIntoKey(category, rhs.category)) {
1898 llvm_unreachable(nullptr);
1900 case PackCategoriesIntoKey(fcNaN, fcZero):
1901 case PackCategoriesIntoKey(fcNaN, fcNormal):
1902 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1903 case PackCategoriesIntoKey(fcNaN, fcNaN):
1904 case PackCategoriesIntoKey(fcZero, fcNaN):
1905 case PackCategoriesIntoKey(fcNormal, fcNaN):
1906 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1907 return cmpUnordered;
1909 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1910 case PackCategoriesIntoKey(fcInfinity, fcZero):
1911 case PackCategoriesIntoKey(fcNormal, fcZero):
1915 return cmpGreaterThan;
1917 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1918 case PackCategoriesIntoKey(fcZero, fcInfinity):
1919 case PackCategoriesIntoKey(fcZero, fcNormal):
1921 return cmpGreaterThan;
1925 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1926 if (sign == rhs.sign)
1931 return cmpGreaterThan;
1933 case PackCategoriesIntoKey(fcZero, fcZero):
1936 case PackCategoriesIntoKey(fcNormal, fcNormal):
1940 /* Two normal numbers. Do they have the same sign? */
1941 if (sign != rhs.sign) {
1943 result = cmpLessThan;
1945 result = cmpGreaterThan;
1947 /* Compare absolute values; invert result if negative. */
1948 result = compareAbsoluteValue(rhs);
1951 if (result == cmpLessThan)
1952 result = cmpGreaterThan;
1953 else if (result == cmpGreaterThan)
1954 result = cmpLessThan;
1961 /// APFloat::convert - convert a value of one floating point type to another.
1962 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1963 /// records whether the transformation lost information, i.e. whether
1964 /// converting the result back to the original type will produce the
1965 /// original value (this is almost the same as return value==fsOK, but there
1966 /// are edge cases where this is not so).
1969 APFloat::convert(const fltSemantics &toSemantics,
1970 roundingMode rounding_mode, bool *losesInfo)
1972 lostFraction lostFraction;
1973 unsigned int newPartCount, oldPartCount;
1976 const fltSemantics &fromSemantics = *semantics;
1978 lostFraction = lfExactlyZero;
1979 newPartCount = partCountForBits(toSemantics.precision + 1);
1980 oldPartCount = partCount();
1981 shift = toSemantics.precision - fromSemantics.precision;
1983 bool X86SpecialNan = false;
1984 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1985 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1986 (!(*significandParts() & 0x8000000000000000ULL) ||
1987 !(*significandParts() & 0x4000000000000000ULL))) {
1988 // x86 has some unusual NaNs which cannot be represented in any other
1989 // format; note them here.
1990 X86SpecialNan = true;
1993 // If this is a truncation of a denormal number, and the target semantics
1994 // has larger exponent range than the source semantics (this can happen
1995 // when truncating from PowerPC double-double to double format), the
1996 // right shift could lose result mantissa bits. Adjust exponent instead
1997 // of performing excessive shift.
1998 if (shift < 0 && isFiniteNonZero()) {
1999 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2000 if (exponent + exponentChange < toSemantics.minExponent)
2001 exponentChange = toSemantics.minExponent - exponent;
2002 if (exponentChange < shift)
2003 exponentChange = shift;
2004 if (exponentChange < 0) {
2005 shift -= exponentChange;
2006 exponent += exponentChange;
2010 // If this is a truncation, perform the shift before we narrow the storage.
2011 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2012 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2014 // Fix the storage so it can hold to new value.
2015 if (newPartCount > oldPartCount) {
2016 // The new type requires more storage; make it available.
2017 integerPart *newParts;
2018 newParts = new integerPart[newPartCount];
2019 APInt::tcSet(newParts, 0, newPartCount);
2020 if (isFiniteNonZero() || category==fcNaN)
2021 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2023 significand.parts = newParts;
2024 } else if (newPartCount == 1 && oldPartCount != 1) {
2025 // Switch to built-in storage for a single part.
2026 integerPart newPart = 0;
2027 if (isFiniteNonZero() || category==fcNaN)
2028 newPart = significandParts()[0];
2030 significand.part = newPart;
2033 // Now that we have the right storage, switch the semantics.
2034 semantics = &toSemantics;
2036 // If this is an extension, perform the shift now that the storage is
2038 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2039 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2041 if (isFiniteNonZero()) {
2042 fs = normalize(rounding_mode, lostFraction);
2043 *losesInfo = (fs != opOK);
2044 } else if (category == fcNaN) {
2045 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2047 // For x87 extended precision, we want to make a NaN, not a special NaN if
2048 // the input wasn't special either.
2049 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2050 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2052 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2053 // does not give you back the same bits. This is dubious, and we
2054 // don't currently do it. You're really supposed to get
2055 // an invalid operation signal at runtime, but nobody does that.
2065 /* Convert a floating point number to an integer according to the
2066 rounding mode. If the rounded integer value is out of range this
2067 returns an invalid operation exception and the contents of the
2068 destination parts are unspecified. If the rounded value is in
2069 range but the floating point number is not the exact integer, the C
2070 standard doesn't require an inexact exception to be raised. IEEE
2071 854 does require it so we do that.
2073 Note that for conversions to integer type the C standard requires
2074 round-to-zero to always be used. */
2076 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2078 roundingMode rounding_mode,
2079 bool *isExact) const
2081 lostFraction lost_fraction;
2082 const integerPart *src;
2083 unsigned int dstPartsCount, truncatedBits;
2087 /* Handle the three special cases first. */
2088 if (category == fcInfinity || category == fcNaN)
2091 dstPartsCount = partCountForBits(width);
2093 if (category == fcZero) {
2094 APInt::tcSet(parts, 0, dstPartsCount);
2095 // Negative zero can't be represented as an int.
2100 src = significandParts();
2102 /* Step 1: place our absolute value, with any fraction truncated, in
2105 /* Our absolute value is less than one; truncate everything. */
2106 APInt::tcSet(parts, 0, dstPartsCount);
2107 /* For exponent -1 the integer bit represents .5, look at that.
2108 For smaller exponents leftmost truncated bit is 0. */
2109 truncatedBits = semantics->precision -1U - exponent;
2111 /* We want the most significant (exponent + 1) bits; the rest are
2113 unsigned int bits = exponent + 1U;
2115 /* Hopelessly large in magnitude? */
2119 if (bits < semantics->precision) {
2120 /* We truncate (semantics->precision - bits) bits. */
2121 truncatedBits = semantics->precision - bits;
2122 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2124 /* We want at least as many bits as are available. */
2125 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2126 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2131 /* Step 2: work out any lost fraction, and increment the absolute
2132 value if we would round away from zero. */
2133 if (truncatedBits) {
2134 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2136 if (lost_fraction != lfExactlyZero &&
2137 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2138 if (APInt::tcIncrement(parts, dstPartsCount))
2139 return opInvalidOp; /* Overflow. */
2142 lost_fraction = lfExactlyZero;
2145 /* Step 3: check if we fit in the destination. */
2146 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2150 /* Negative numbers cannot be represented as unsigned. */
2154 /* It takes omsb bits to represent the unsigned integer value.
2155 We lose a bit for the sign, but care is needed as the
2156 maximally negative integer is a special case. */
2157 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2160 /* This case can happen because of rounding. */
2165 APInt::tcNegate (parts, dstPartsCount);
2167 if (omsb >= width + !isSigned)
2171 if (lost_fraction == lfExactlyZero) {
2178 /* Same as convertToSignExtendedInteger, except we provide
2179 deterministic values in case of an invalid operation exception,
2180 namely zero for NaNs and the minimal or maximal value respectively
2181 for underflow or overflow.
2182 The *isExact output tells whether the result is exact, in the sense
2183 that converting it back to the original floating point type produces
2184 the original value. This is almost equivalent to result==opOK,
2185 except for negative zeroes.
2188 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2190 roundingMode rounding_mode, bool *isExact) const
2194 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2197 if (fs == opInvalidOp) {
2198 unsigned int bits, dstPartsCount;
2200 dstPartsCount = partCountForBits(width);
2202 if (category == fcNaN)
2207 bits = width - isSigned;
2209 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2210 if (sign && isSigned)
2211 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2217 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2218 an APSInt, whose initial bit-width and signed-ness are used to determine the
2219 precision of the conversion.
2222 APFloat::convertToInteger(APSInt &result,
2223 roundingMode rounding_mode, bool *isExact) const
2225 unsigned bitWidth = result.getBitWidth();
2226 SmallVector<uint64_t, 4> parts(result.getNumWords());
2227 opStatus status = convertToInteger(
2228 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2229 // Keeps the original signed-ness.
2230 result = APInt(bitWidth, parts);
2234 /* Convert an unsigned integer SRC to a floating point number,
2235 rounding according to ROUNDING_MODE. The sign of the floating
2236 point number is not modified. */
2238 APFloat::convertFromUnsignedParts(const integerPart *src,
2239 unsigned int srcCount,
2240 roundingMode rounding_mode)
2242 unsigned int omsb, precision, dstCount;
2244 lostFraction lost_fraction;
2246 category = fcNormal;
2247 omsb = APInt::tcMSB(src, srcCount) + 1;
2248 dst = significandParts();
2249 dstCount = partCount();
2250 precision = semantics->precision;
2252 /* We want the most significant PRECISION bits of SRC. There may not
2253 be that many; extract what we can. */
2254 if (precision <= omsb) {
2255 exponent = omsb - 1;
2256 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2258 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2260 exponent = precision - 1;
2261 lost_fraction = lfExactlyZero;
2262 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2265 return normalize(rounding_mode, lost_fraction);
2269 APFloat::convertFromAPInt(const APInt &Val,
2271 roundingMode rounding_mode)
2273 unsigned int partCount = Val.getNumWords();
2277 if (isSigned && api.isNegative()) {
2282 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2285 /* Convert a two's complement integer SRC to a floating point number,
2286 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2287 integer is signed, in which case it must be sign-extended. */
2289 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2290 unsigned int srcCount,
2292 roundingMode rounding_mode)
2297 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2300 /* If we're signed and negative negate a copy. */
2302 copy = new integerPart[srcCount];
2303 APInt::tcAssign(copy, src, srcCount);
2304 APInt::tcNegate(copy, srcCount);
2305 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2309 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2315 /* FIXME: should this just take a const APInt reference? */
2317 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2318 unsigned int width, bool isSigned,
2319 roundingMode rounding_mode)
2321 unsigned int partCount = partCountForBits(width);
2322 APInt api = APInt(width, makeArrayRef(parts, partCount));
2325 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2330 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2334 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2336 lostFraction lost_fraction = lfExactlyZero;
2338 category = fcNormal;
2342 integerPart *significand = significandParts();
2343 unsigned partsCount = partCount();
2344 unsigned bitPos = partsCount * integerPartWidth;
2345 bool computedTrailingFraction = false;
2347 // Skip leading zeroes and any (hexa)decimal point.
2348 StringRef::iterator begin = s.begin();
2349 StringRef::iterator end = s.end();
2350 StringRef::iterator dot;
2351 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2352 StringRef::iterator firstSignificantDigit = p;
2355 integerPart hex_value;
2358 assert(dot == end && "String contains multiple dots");
2363 hex_value = hexDigitValue(*p);
2364 if (hex_value == -1U)
2369 // Store the number while we have space.
2372 hex_value <<= bitPos % integerPartWidth;
2373 significand[bitPos / integerPartWidth] |= hex_value;
2374 } else if (!computedTrailingFraction) {
2375 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2376 computedTrailingFraction = true;
2380 /* Hex floats require an exponent but not a hexadecimal point. */
2381 assert(p != end && "Hex strings require an exponent");
2382 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2383 assert(p != begin && "Significand has no digits");
2384 assert((dot == end || p - begin != 1) && "Significand has no digits");
2386 /* Ignore the exponent if we are zero. */
2387 if (p != firstSignificantDigit) {
2390 /* Implicit hexadecimal point? */
2394 /* Calculate the exponent adjustment implicit in the number of
2395 significant digits. */
2396 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2397 if (expAdjustment < 0)
2399 expAdjustment = expAdjustment * 4 - 1;
2401 /* Adjust for writing the significand starting at the most
2402 significant nibble. */
2403 expAdjustment += semantics->precision;
2404 expAdjustment -= partsCount * integerPartWidth;
2406 /* Adjust for the given exponent. */
2407 exponent = totalExponent(p + 1, end, expAdjustment);
2410 return normalize(rounding_mode, lost_fraction);
2414 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2415 unsigned sigPartCount, int exp,
2416 roundingMode rounding_mode)
2418 unsigned int parts, pow5PartCount;
2419 fltSemantics calcSemantics = { 32767, -32767, 0 };
2420 integerPart pow5Parts[maxPowerOfFiveParts];
2423 isNearest = (rounding_mode == rmNearestTiesToEven ||
2424 rounding_mode == rmNearestTiesToAway);
2426 parts = partCountForBits(semantics->precision + 11);
2428 /* Calculate pow(5, abs(exp)). */
2429 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2431 for (;; parts *= 2) {
2432 opStatus sigStatus, powStatus;
2433 unsigned int excessPrecision, truncatedBits;
2435 calcSemantics.precision = parts * integerPartWidth - 1;
2436 excessPrecision = calcSemantics.precision - semantics->precision;
2437 truncatedBits = excessPrecision;
2439 APFloat decSig = APFloat::getZero(calcSemantics, sign);
2440 APFloat pow5(calcSemantics);
2442 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2443 rmNearestTiesToEven);
2444 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2445 rmNearestTiesToEven);
2446 /* Add exp, as 10^n = 5^n * 2^n. */
2447 decSig.exponent += exp;
2449 lostFraction calcLostFraction;
2450 integerPart HUerr, HUdistance;
2451 unsigned int powHUerr;
2454 /* multiplySignificand leaves the precision-th bit set to 1. */
2455 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2456 powHUerr = powStatus != opOK;
2458 calcLostFraction = decSig.divideSignificand(pow5);
2459 /* Denormal numbers have less precision. */
2460 if (decSig.exponent < semantics->minExponent) {
2461 excessPrecision += (semantics->minExponent - decSig.exponent);
2462 truncatedBits = excessPrecision;
2463 if (excessPrecision > calcSemantics.precision)
2464 excessPrecision = calcSemantics.precision;
2466 /* Extra half-ulp lost in reciprocal of exponent. */
2467 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2470 /* Both multiplySignificand and divideSignificand return the
2471 result with the integer bit set. */
2472 assert(APInt::tcExtractBit
2473 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2475 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2477 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2478 excessPrecision, isNearest);
2480 /* Are we guaranteed to round correctly if we truncate? */
2481 if (HUdistance >= HUerr) {
2482 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2483 calcSemantics.precision - excessPrecision,
2485 /* Take the exponent of decSig. If we tcExtract-ed less bits
2486 above we must adjust our exponent to compensate for the
2487 implicit right shift. */
2488 exponent = (decSig.exponent + semantics->precision
2489 - (calcSemantics.precision - excessPrecision));
2490 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2493 return normalize(rounding_mode, calcLostFraction);
2499 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2504 /* Scan the text. */
2505 StringRef::iterator p = str.begin();
2506 interpretDecimal(p, str.end(), &D);
2508 /* Handle the quick cases. First the case of no significant digits,
2509 i.e. zero, and then exponents that are obviously too large or too
2510 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2511 definitely overflows if
2513 (exp - 1) * L >= maxExponent
2515 and definitely underflows to zero where
2517 (exp + 1) * L <= minExponent - precision
2519 With integer arithmetic the tightest bounds for L are
2521 93/28 < L < 196/59 [ numerator <= 256 ]
2522 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2525 // Test if we have a zero number allowing for strings with no null terminators
2526 // and zero decimals with non-zero exponents.
2528 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2529 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2530 // be at most one dot. On the other hand, if we have a zero with a non-zero
2531 // exponent, then we know that D.firstSigDigit will be non-numeric.
2532 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2536 /* Check whether the normalized exponent is high enough to overflow
2537 max during the log-rebasing in the max-exponent check below. */
2538 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2539 fs = handleOverflow(rounding_mode);
2541 /* If it wasn't, then it also wasn't high enough to overflow max
2542 during the log-rebasing in the min-exponent check. Check that it
2543 won't overflow min in either check, then perform the min-exponent
2545 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2546 (D.normalizedExponent + 1) * 28738 <=
2547 8651 * (semantics->minExponent - (int) semantics->precision)) {
2548 /* Underflow to zero and round. */
2549 category = fcNormal;
2551 fs = normalize(rounding_mode, lfLessThanHalf);
2553 /* We can finally safely perform the max-exponent check. */
2554 } else if ((D.normalizedExponent - 1) * 42039
2555 >= 12655 * semantics->maxExponent) {
2556 /* Overflow and round. */
2557 fs = handleOverflow(rounding_mode);
2559 integerPart *decSignificand;
2560 unsigned int partCount;
2562 /* A tight upper bound on number of bits required to hold an
2563 N-digit decimal integer is N * 196 / 59. Allocate enough space
2564 to hold the full significand, and an extra part required by
2566 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2567 partCount = partCountForBits(1 + 196 * partCount / 59);
2568 decSignificand = new integerPart[partCount + 1];
2571 /* Convert to binary efficiently - we do almost all multiplication
2572 in an integerPart. When this would overflow do we do a single
2573 bignum multiplication, and then revert again to multiplication
2574 in an integerPart. */
2576 integerPart decValue, val, multiplier;
2584 if (p == str.end()) {
2588 decValue = decDigitValue(*p++);
2589 assert(decValue < 10U && "Invalid character in significand");
2591 val = val * 10 + decValue;
2592 /* The maximum number that can be multiplied by ten with any
2593 digit added without overflowing an integerPart. */
2594 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2596 /* Multiply out the current part. */
2597 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2598 partCount, partCount + 1, false);
2600 /* If we used another part (likely but not guaranteed), increase
2602 if (decSignificand[partCount])
2604 } while (p <= D.lastSigDigit);
2606 category = fcNormal;
2607 fs = roundSignificandWithExponent(decSignificand, partCount,
2608 D.exponent, rounding_mode);
2610 delete [] decSignificand;
2617 APFloat::convertFromStringSpecials(StringRef str) {
2618 if (str.equals("inf") || str.equals("INFINITY")) {
2623 if (str.equals("-inf") || str.equals("-INFINITY")) {
2628 if (str.equals("nan") || str.equals("NaN")) {
2629 makeNaN(false, false);
2633 if (str.equals("-nan") || str.equals("-NaN")) {
2634 makeNaN(false, true);
2642 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2644 assert(!str.empty() && "Invalid string length");
2646 // Handle special cases.
2647 if (convertFromStringSpecials(str))
2650 /* Handle a leading minus sign. */
2651 StringRef::iterator p = str.begin();
2652 size_t slen = str.size();
2653 sign = *p == '-' ? 1 : 0;
2654 if (*p == '-' || *p == '+') {
2657 assert(slen && "String has no digits");
2660 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2661 assert(slen - 2 && "Invalid string");
2662 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2666 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2669 /* Write out a hexadecimal representation of the floating point value
2670 to DST, which must be of sufficient size, in the C99 form
2671 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2672 excluding the terminating NUL.
2674 If UPPERCASE, the output is in upper case, otherwise in lower case.
2676 HEXDIGITS digits appear altogether, rounding the value if
2677 necessary. If HEXDIGITS is 0, the minimal precision to display the
2678 number precisely is used instead. If nothing would appear after
2679 the decimal point it is suppressed.
2681 The decimal exponent is always printed and has at least one digit.
2682 Zero values display an exponent of zero. Infinities and NaNs
2683 appear as "infinity" or "nan" respectively.
2685 The above rules are as specified by C99. There is ambiguity about
2686 what the leading hexadecimal digit should be. This implementation
2687 uses whatever is necessary so that the exponent is displayed as
2688 stored. This implies the exponent will fall within the IEEE format
2689 range, and the leading hexadecimal digit will be 0 (for denormals),
2690 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2691 any other digits zero).
2694 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2695 bool upperCase, roundingMode rounding_mode) const
2705 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2706 dst += sizeof infinityL - 1;
2710 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2711 dst += sizeof NaNU - 1;
2716 *dst++ = upperCase ? 'X': 'x';
2718 if (hexDigits > 1) {
2720 memset (dst, '0', hexDigits - 1);
2721 dst += hexDigits - 1;
2723 *dst++ = upperCase ? 'P': 'p';
2728 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2734 return static_cast<unsigned int>(dst - p);
2737 /* Does the hard work of outputting the correctly rounded hexadecimal
2738 form of a normal floating point number with the specified number of
2739 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2740 digits necessary to print the value precisely is output. */
2742 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2744 roundingMode rounding_mode) const
2746 unsigned int count, valueBits, shift, partsCount, outputDigits;
2747 const char *hexDigitChars;
2748 const integerPart *significand;
2753 *dst++ = upperCase ? 'X': 'x';
2756 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2758 significand = significandParts();
2759 partsCount = partCount();
2761 /* +3 because the first digit only uses the single integer bit, so
2762 we have 3 virtual zero most-significant-bits. */
2763 valueBits = semantics->precision + 3;
2764 shift = integerPartWidth - valueBits % integerPartWidth;
2766 /* The natural number of digits required ignoring trailing
2767 insignificant zeroes. */
2768 outputDigits = (valueBits - significandLSB () + 3) / 4;
2770 /* hexDigits of zero means use the required number for the
2771 precision. Otherwise, see if we are truncating. If we are,
2772 find out if we need to round away from zero. */
2774 if (hexDigits < outputDigits) {
2775 /* We are dropping non-zero bits, so need to check how to round.
2776 "bits" is the number of dropped bits. */
2778 lostFraction fraction;
2780 bits = valueBits - hexDigits * 4;
2781 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2782 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2784 outputDigits = hexDigits;
2787 /* Write the digits consecutively, and start writing in the location
2788 of the hexadecimal point. We move the most significant digit
2789 left and add the hexadecimal point later. */
2792 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2794 while (outputDigits && count) {
2797 /* Put the most significant integerPartWidth bits in "part". */
2798 if (--count == partsCount)
2799 part = 0; /* An imaginary higher zero part. */
2801 part = significand[count] << shift;
2804 part |= significand[count - 1] >> (integerPartWidth - shift);
2806 /* Convert as much of "part" to hexdigits as we can. */
2807 unsigned int curDigits = integerPartWidth / 4;
2809 if (curDigits > outputDigits)
2810 curDigits = outputDigits;
2811 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2812 outputDigits -= curDigits;
2818 /* Note that hexDigitChars has a trailing '0'. */
2821 *q = hexDigitChars[hexDigitValue (*q) + 1];
2822 } while (*q == '0');
2825 /* Add trailing zeroes. */
2826 memset (dst, '0', outputDigits);
2827 dst += outputDigits;
2830 /* Move the most significant digit to before the point, and if there
2831 is something after the decimal point add it. This must come
2832 after rounding above. */
2839 /* Finally output the exponent. */
2840 *dst++ = upperCase ? 'P': 'p';
2842 return writeSignedDecimal (dst, exponent);
2845 hash_code llvm::hash_value(const APFloat &Arg) {
2846 if (!Arg.isFiniteNonZero())
2847 return hash_combine((uint8_t)Arg.category,
2848 // NaN has no sign, fix it at zero.
2849 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2850 Arg.semantics->precision);
2852 // Normal floats need their exponent and significand hashed.
2853 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2854 Arg.semantics->precision, Arg.exponent,
2856 Arg.significandParts(),
2857 Arg.significandParts() + Arg.partCount()));
2860 // Conversion from APFloat to/from host float/double. It may eventually be
2861 // possible to eliminate these and have everybody deal with APFloats, but that
2862 // will take a while. This approach will not easily extend to long double.
2863 // Current implementation requires integerPartWidth==64, which is correct at
2864 // the moment but could be made more general.
2866 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2867 // the actual IEEE respresentations. We compensate for that here.
2870 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2872 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2873 assert(partCount()==2);
2875 uint64_t myexponent, mysignificand;
2877 if (isFiniteNonZero()) {
2878 myexponent = exponent+16383; //bias
2879 mysignificand = significandParts()[0];
2880 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2881 myexponent = 0; // denormal
2882 } else if (category==fcZero) {
2885 } else if (category==fcInfinity) {
2886 myexponent = 0x7fff;
2887 mysignificand = 0x8000000000000000ULL;
2889 assert(category == fcNaN && "Unknown category");
2890 myexponent = 0x7fff;
2891 mysignificand = significandParts()[0];
2895 words[0] = mysignificand;
2896 words[1] = ((uint64_t)(sign & 1) << 15) |
2897 (myexponent & 0x7fffLL);
2898 return APInt(80, words);
2902 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2904 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2905 assert(partCount()==2);
2911 // Convert number to double. To avoid spurious underflows, we re-
2912 // normalize against the "double" minExponent first, and only *then*
2913 // truncate the mantissa. The result of that second conversion
2914 // may be inexact, but should never underflow.
2915 // Declare fltSemantics before APFloat that uses it (and
2916 // saves pointer to it) to ensure correct destruction order.
2917 fltSemantics extendedSemantics = *semantics;
2918 extendedSemantics.minExponent = IEEEdouble.minExponent;
2919 APFloat extended(*this);
2920 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2921 assert(fs == opOK && !losesInfo);
2924 APFloat u(extended);
2925 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2926 assert(fs == opOK || fs == opInexact);
2928 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2930 // If conversion was exact or resulted in a special case, we're done;
2931 // just set the second double to zero. Otherwise, re-convert back to
2932 // the extended format and compute the difference. This now should
2933 // convert exactly to double.
2934 if (u.isFiniteNonZero() && losesInfo) {
2935 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2936 assert(fs == opOK && !losesInfo);
2939 APFloat v(extended);
2940 v.subtract(u, rmNearestTiesToEven);
2941 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2942 assert(fs == opOK && !losesInfo);
2944 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2949 return APInt(128, words);
2953 APFloat::convertQuadrupleAPFloatToAPInt() const
2955 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2956 assert(partCount()==2);
2958 uint64_t myexponent, mysignificand, mysignificand2;
2960 if (isFiniteNonZero()) {
2961 myexponent = exponent+16383; //bias
2962 mysignificand = significandParts()[0];
2963 mysignificand2 = significandParts()[1];
2964 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2965 myexponent = 0; // denormal
2966 } else if (category==fcZero) {
2968 mysignificand = mysignificand2 = 0;
2969 } else if (category==fcInfinity) {
2970 myexponent = 0x7fff;
2971 mysignificand = mysignificand2 = 0;
2973 assert(category == fcNaN && "Unknown category!");
2974 myexponent = 0x7fff;
2975 mysignificand = significandParts()[0];
2976 mysignificand2 = significandParts()[1];
2980 words[0] = mysignificand;
2981 words[1] = ((uint64_t)(sign & 1) << 63) |
2982 ((myexponent & 0x7fff) << 48) |
2983 (mysignificand2 & 0xffffffffffffLL);
2985 return APInt(128, words);
2989 APFloat::convertDoubleAPFloatToAPInt() const
2991 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2992 assert(partCount()==1);
2994 uint64_t myexponent, mysignificand;
2996 if (isFiniteNonZero()) {
2997 myexponent = exponent+1023; //bias
2998 mysignificand = *significandParts();
2999 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3000 myexponent = 0; // denormal
3001 } else if (category==fcZero) {
3004 } else if (category==fcInfinity) {
3008 assert(category == fcNaN && "Unknown category!");
3010 mysignificand = *significandParts();
3013 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3014 ((myexponent & 0x7ff) << 52) |
3015 (mysignificand & 0xfffffffffffffLL))));
3019 APFloat::convertFloatAPFloatToAPInt() const
3021 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3022 assert(partCount()==1);
3024 uint32_t myexponent, mysignificand;
3026 if (isFiniteNonZero()) {
3027 myexponent = exponent+127; //bias
3028 mysignificand = (uint32_t)*significandParts();
3029 if (myexponent == 1 && !(mysignificand & 0x800000))
3030 myexponent = 0; // denormal
3031 } else if (category==fcZero) {
3034 } else if (category==fcInfinity) {
3038 assert(category == fcNaN && "Unknown category!");
3040 mysignificand = (uint32_t)*significandParts();
3043 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3044 (mysignificand & 0x7fffff)));
3048 APFloat::convertHalfAPFloatToAPInt() const
3050 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3051 assert(partCount()==1);
3053 uint32_t myexponent, mysignificand;
3055 if (isFiniteNonZero()) {
3056 myexponent = exponent+15; //bias
3057 mysignificand = (uint32_t)*significandParts();
3058 if (myexponent == 1 && !(mysignificand & 0x400))
3059 myexponent = 0; // denormal
3060 } else if (category==fcZero) {
3063 } else if (category==fcInfinity) {
3067 assert(category == fcNaN && "Unknown category!");
3069 mysignificand = (uint32_t)*significandParts();
3072 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3073 (mysignificand & 0x3ff)));
3076 // This function creates an APInt that is just a bit map of the floating
3077 // point constant as it would appear in memory. It is not a conversion,
3078 // and treating the result as a normal integer is unlikely to be useful.
3081 APFloat::bitcastToAPInt() const
3083 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3084 return convertHalfAPFloatToAPInt();
3086 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3087 return convertFloatAPFloatToAPInt();
3089 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3090 return convertDoubleAPFloatToAPInt();
3092 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3093 return convertQuadrupleAPFloatToAPInt();
3095 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3096 return convertPPCDoubleDoubleAPFloatToAPInt();
3098 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3100 return convertF80LongDoubleAPFloatToAPInt();
3104 APFloat::convertToFloat() const
3106 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3107 "Float semantics are not IEEEsingle");
3108 APInt api = bitcastToAPInt();
3109 return api.bitsToFloat();
3113 APFloat::convertToDouble() const
3115 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3116 "Float semantics are not IEEEdouble");
3117 APInt api = bitcastToAPInt();
3118 return api.bitsToDouble();
3121 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3122 /// does not support these bit patterns:
3123 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3124 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3125 /// exponent = 0, integer bit 1 ("pseudodenormal")
3126 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3127 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3129 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3131 assert(api.getBitWidth()==80);
3132 uint64_t i1 = api.getRawData()[0];
3133 uint64_t i2 = api.getRawData()[1];
3134 uint64_t myexponent = (i2 & 0x7fff);
3135 uint64_t mysignificand = i1;
3137 initialize(&APFloat::x87DoubleExtended);
3138 assert(partCount()==2);
3140 sign = static_cast<unsigned int>(i2>>15);
3141 if (myexponent==0 && mysignificand==0) {
3142 // exponent, significand meaningless
3144 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3145 // exponent, significand meaningless
3146 category = fcInfinity;
3147 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3148 // exponent meaningless
3150 significandParts()[0] = mysignificand;
3151 significandParts()[1] = 0;
3153 category = fcNormal;
3154 exponent = myexponent - 16383;
3155 significandParts()[0] = mysignificand;
3156 significandParts()[1] = 0;
3157 if (myexponent==0) // denormal
3163 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3165 assert(api.getBitWidth()==128);
3166 uint64_t i1 = api.getRawData()[0];
3167 uint64_t i2 = api.getRawData()[1];
3171 // Get the first double and convert to our format.
3172 initFromDoubleAPInt(APInt(64, i1));
3173 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3174 assert(fs == opOK && !losesInfo);
3177 // Unless we have a special case, add in second double.
3178 if (isFiniteNonZero()) {
3179 APFloat v(IEEEdouble, APInt(64, i2));
3180 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3181 assert(fs == opOK && !losesInfo);
3184 add(v, rmNearestTiesToEven);
3189 APFloat::initFromQuadrupleAPInt(const APInt &api)
3191 assert(api.getBitWidth()==128);
3192 uint64_t i1 = api.getRawData()[0];
3193 uint64_t i2 = api.getRawData()[1];
3194 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3195 uint64_t mysignificand = i1;
3196 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3198 initialize(&APFloat::IEEEquad);
3199 assert(partCount()==2);
3201 sign = static_cast<unsigned int>(i2>>63);
3202 if (myexponent==0 &&
3203 (mysignificand==0 && mysignificand2==0)) {
3204 // exponent, significand meaningless
3206 } else if (myexponent==0x7fff &&
3207 (mysignificand==0 && mysignificand2==0)) {
3208 // exponent, significand meaningless
3209 category = fcInfinity;
3210 } else if (myexponent==0x7fff &&
3211 (mysignificand!=0 || mysignificand2 !=0)) {
3212 // exponent meaningless
3214 significandParts()[0] = mysignificand;
3215 significandParts()[1] = mysignificand2;
3217 category = fcNormal;
3218 exponent = myexponent - 16383;
3219 significandParts()[0] = mysignificand;
3220 significandParts()[1] = mysignificand2;
3221 if (myexponent==0) // denormal
3224 significandParts()[1] |= 0x1000000000000LL; // integer bit
3229 APFloat::initFromDoubleAPInt(const APInt &api)
3231 assert(api.getBitWidth()==64);
3232 uint64_t i = *api.getRawData();
3233 uint64_t myexponent = (i >> 52) & 0x7ff;
3234 uint64_t mysignificand = i & 0xfffffffffffffLL;
3236 initialize(&APFloat::IEEEdouble);
3237 assert(partCount()==1);
3239 sign = static_cast<unsigned int>(i>>63);
3240 if (myexponent==0 && mysignificand==0) {
3241 // exponent, significand meaningless
3243 } else if (myexponent==0x7ff && mysignificand==0) {
3244 // exponent, significand meaningless
3245 category = fcInfinity;
3246 } else if (myexponent==0x7ff && mysignificand!=0) {
3247 // exponent meaningless
3249 *significandParts() = mysignificand;
3251 category = fcNormal;
3252 exponent = myexponent - 1023;
3253 *significandParts() = mysignificand;
3254 if (myexponent==0) // denormal
3257 *significandParts() |= 0x10000000000000LL; // integer bit
3262 APFloat::initFromFloatAPInt(const APInt & api)
3264 assert(api.getBitWidth()==32);
3265 uint32_t i = (uint32_t)*api.getRawData();
3266 uint32_t myexponent = (i >> 23) & 0xff;
3267 uint32_t mysignificand = i & 0x7fffff;
3269 initialize(&APFloat::IEEEsingle);
3270 assert(partCount()==1);
3273 if (myexponent==0 && mysignificand==0) {
3274 // exponent, significand meaningless
3276 } else if (myexponent==0xff && mysignificand==0) {
3277 // exponent, significand meaningless
3278 category = fcInfinity;
3279 } else if (myexponent==0xff && mysignificand!=0) {
3280 // sign, exponent, significand meaningless
3282 *significandParts() = mysignificand;
3284 category = fcNormal;
3285 exponent = myexponent - 127; //bias
3286 *significandParts() = mysignificand;
3287 if (myexponent==0) // denormal
3290 *significandParts() |= 0x800000; // integer bit
3295 APFloat::initFromHalfAPInt(const APInt & api)
3297 assert(api.getBitWidth()==16);
3298 uint32_t i = (uint32_t)*api.getRawData();
3299 uint32_t myexponent = (i >> 10) & 0x1f;
3300 uint32_t mysignificand = i & 0x3ff;
3302 initialize(&APFloat::IEEEhalf);
3303 assert(partCount()==1);
3306 if (myexponent==0 && mysignificand==0) {
3307 // exponent, significand meaningless
3309 } else if (myexponent==0x1f && mysignificand==0) {
3310 // exponent, significand meaningless
3311 category = fcInfinity;
3312 } else if (myexponent==0x1f && mysignificand!=0) {
3313 // sign, exponent, significand meaningless
3315 *significandParts() = mysignificand;
3317 category = fcNormal;
3318 exponent = myexponent - 15; //bias
3319 *significandParts() = mysignificand;
3320 if (myexponent==0) // denormal
3323 *significandParts() |= 0x400; // integer bit
3327 /// Treat api as containing the bits of a floating point number. Currently
3328 /// we infer the floating point type from the size of the APInt. The
3329 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3330 /// when the size is anything else).
3332 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3334 if (Sem == &IEEEhalf)
3335 return initFromHalfAPInt(api);
3336 if (Sem == &IEEEsingle)
3337 return initFromFloatAPInt(api);
3338 if (Sem == &IEEEdouble)
3339 return initFromDoubleAPInt(api);
3340 if (Sem == &x87DoubleExtended)
3341 return initFromF80LongDoubleAPInt(api);
3342 if (Sem == &IEEEquad)
3343 return initFromQuadrupleAPInt(api);
3344 if (Sem == &PPCDoubleDouble)
3345 return initFromPPCDoubleDoubleAPInt(api);
3347 llvm_unreachable(nullptr);
3351 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3355 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3357 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3359 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3361 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3364 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3365 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3367 llvm_unreachable("Unknown floating bit width");
3371 /// Make this number the largest magnitude normal number in the given
3373 void APFloat::makeLargest(bool Negative) {
3374 // We want (in interchange format):
3375 // sign = {Negative}
3377 // significand = 1..1
3378 category = fcNormal;
3380 exponent = semantics->maxExponent;
3382 // Use memset to set all but the highest integerPart to all ones.
3383 integerPart *significand = significandParts();
3384 unsigned PartCount = partCount();
3385 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3387 // Set the high integerPart especially setting all unused top bits for
3388 // internal consistency.
3389 const unsigned NumUnusedHighBits =
3390 PartCount*integerPartWidth - semantics->precision;
3391 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3392 ? (~integerPart(0) >> NumUnusedHighBits)
3396 /// Make this number the smallest magnitude denormal number in the given
3398 void APFloat::makeSmallest(bool Negative) {
3399 // We want (in interchange format):
3400 // sign = {Negative}
3402 // significand = 0..01
3403 category = fcNormal;
3405 exponent = semantics->minExponent;
3406 APInt::tcSet(significandParts(), 1, partCount());
3410 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3411 // We want (in interchange format):
3412 // sign = {Negative}
3414 // significand = 1..1
3415 APFloat Val(Sem, uninitialized);
3416 Val.makeLargest(Negative);
3420 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3421 // We want (in interchange format):
3422 // sign = {Negative}
3424 // significand = 0..01
3425 APFloat Val(Sem, uninitialized);
3426 Val.makeSmallest(Negative);
3430 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3431 APFloat Val(Sem, uninitialized);
3433 // We want (in interchange format):
3434 // sign = {Negative}
3436 // significand = 10..0
3438 Val.category = fcNormal;
3439 Val.zeroSignificand();
3440 Val.sign = Negative;
3441 Val.exponent = Sem.minExponent;
3442 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3443 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3448 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3449 initFromAPInt(&Sem, API);
3452 APFloat::APFloat(float f) {
3453 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3456 APFloat::APFloat(double d) {
3457 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3461 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3462 Buffer.append(Str.begin(), Str.end());
3465 /// Removes data from the given significand until it is no more
3466 /// precise than is required for the desired precision.
3467 void AdjustToPrecision(APInt &significand,
3468 int &exp, unsigned FormatPrecision) {
3469 unsigned bits = significand.getActiveBits();
3471 // 196/59 is a very slight overestimate of lg_2(10).
3472 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3474 if (bits <= bitsRequired) return;
3476 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3477 if (!tensRemovable) return;
3479 exp += tensRemovable;
3481 APInt divisor(significand.getBitWidth(), 1);
3482 APInt powten(significand.getBitWidth(), 10);
3484 if (tensRemovable & 1)
3486 tensRemovable >>= 1;
3487 if (!tensRemovable) break;
3491 significand = significand.udiv(divisor);
3493 // Truncate the significand down to its active bit count.
3494 significand = significand.trunc(significand.getActiveBits());
3498 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3499 int &exp, unsigned FormatPrecision) {
3500 unsigned N = buffer.size();
3501 if (N <= FormatPrecision) return;
3503 // The most significant figures are the last ones in the buffer.
3504 unsigned FirstSignificant = N - FormatPrecision;
3507 // FIXME: this probably shouldn't use 'round half up'.
3509 // Rounding down is just a truncation, except we also want to drop
3510 // trailing zeros from the new result.
3511 if (buffer[FirstSignificant - 1] < '5') {
3512 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3515 exp += FirstSignificant;
3516 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3520 // Rounding up requires a decimal add-with-carry. If we continue
3521 // the carry, the newly-introduced zeros will just be truncated.
3522 for (unsigned I = FirstSignificant; I != N; ++I) {
3523 if (buffer[I] == '9') {
3531 // If we carried through, we have exactly one digit of precision.
3532 if (FirstSignificant == N) {
3533 exp += FirstSignificant;
3535 buffer.push_back('1');
3539 exp += FirstSignificant;
3540 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3544 void APFloat::toString(SmallVectorImpl<char> &Str,
3545 unsigned FormatPrecision,
3546 unsigned FormatMaxPadding) const {
3550 return append(Str, "-Inf");
3552 return append(Str, "+Inf");
3554 case fcNaN: return append(Str, "NaN");
3560 if (!FormatMaxPadding)
3561 append(Str, "0.0E+0");
3573 // Decompose the number into an APInt and an exponent.
3574 int exp = exponent - ((int) semantics->precision - 1);
3575 APInt significand(semantics->precision,
3576 makeArrayRef(significandParts(),
3577 partCountForBits(semantics->precision)));
3579 // Set FormatPrecision if zero. We want to do this before we
3580 // truncate trailing zeros, as those are part of the precision.
3581 if (!FormatPrecision) {
3582 // We use enough digits so the number can be round-tripped back to an
3583 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3584 // Accurately" by Steele and White.
3585 // FIXME: Using a formula based purely on the precision is conservative;
3586 // we can print fewer digits depending on the actual value being printed.
3588 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3589 FormatPrecision = 2 + semantics->precision * 59 / 196;
3592 // Ignore trailing binary zeros.
3593 int trailingZeros = significand.countTrailingZeros();
3594 exp += trailingZeros;
3595 significand = significand.lshr(trailingZeros);
3597 // Change the exponent from 2^e to 10^e.
3600 } else if (exp > 0) {
3602 significand = significand.zext(semantics->precision + exp);
3603 significand <<= exp;
3605 } else { /* exp < 0 */
3608 // We transform this using the identity:
3609 // (N)(2^-e) == (N)(5^e)(10^-e)
3610 // This means we have to multiply N (the significand) by 5^e.
3611 // To avoid overflow, we have to operate on numbers large
3612 // enough to store N * 5^e:
3613 // log2(N * 5^e) == log2(N) + e * log2(5)
3614 // <= semantics->precision + e * 137 / 59
3615 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3617 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3619 // Multiply significand by 5^e.
3620 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3621 significand = significand.zext(precision);
3622 APInt five_to_the_i(precision, 5);
3624 if (texp & 1) significand *= five_to_the_i;
3628 five_to_the_i *= five_to_the_i;
3632 AdjustToPrecision(significand, exp, FormatPrecision);
3634 SmallVector<char, 256> buffer;
3637 unsigned precision = significand.getBitWidth();
3638 APInt ten(precision, 10);
3639 APInt digit(precision, 0);
3641 bool inTrail = true;
3642 while (significand != 0) {
3643 // digit <- significand % 10
3644 // significand <- significand / 10
3645 APInt::udivrem(significand, ten, significand, digit);
3647 unsigned d = digit.getZExtValue();
3649 // Drop trailing zeros.
3650 if (inTrail && !d) exp++;
3652 buffer.push_back((char) ('0' + d));
3657 assert(!buffer.empty() && "no characters in buffer!");
3659 // Drop down to FormatPrecision.
3660 // TODO: don't do more precise calculations above than are required.
3661 AdjustToPrecision(buffer, exp, FormatPrecision);
3663 unsigned NDigits = buffer.size();
3665 // Check whether we should use scientific notation.
3666 bool FormatScientific;
3667 if (!FormatMaxPadding)
3668 FormatScientific = true;
3673 // But we shouldn't make the number look more precise than it is.
3674 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3675 NDigits + (unsigned) exp > FormatPrecision);
3677 // Power of the most significant digit.
3678 int MSD = exp + (int) (NDigits - 1);
3681 FormatScientific = false;
3683 // 765e-5 == 0.00765
3685 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3690 // Scientific formatting is pretty straightforward.
3691 if (FormatScientific) {
3692 exp += (NDigits - 1);
3694 Str.push_back(buffer[NDigits-1]);
3699 for (unsigned I = 1; I != NDigits; ++I)
3700 Str.push_back(buffer[NDigits-1-I]);
3703 Str.push_back(exp >= 0 ? '+' : '-');
3704 if (exp < 0) exp = -exp;
3705 SmallVector<char, 6> expbuf;
3707 expbuf.push_back((char) ('0' + (exp % 10)));
3710 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3711 Str.push_back(expbuf[E-1-I]);
3715 // Non-scientific, positive exponents.
3717 for (unsigned I = 0; I != NDigits; ++I)
3718 Str.push_back(buffer[NDigits-1-I]);
3719 for (unsigned I = 0; I != (unsigned) exp; ++I)
3724 // Non-scientific, negative exponents.
3726 // The number of digits to the left of the decimal point.
3727 int NWholeDigits = exp + (int) NDigits;
3730 if (NWholeDigits > 0) {
3731 for (; I != (unsigned) NWholeDigits; ++I)
3732 Str.push_back(buffer[NDigits-I-1]);
3735 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3739 for (unsigned Z = 1; Z != NZeros; ++Z)
3743 for (; I != NDigits; ++I)
3744 Str.push_back(buffer[NDigits-I-1]);
3747 bool APFloat::getExactInverse(APFloat *inv) const {
3748 // Special floats and denormals have no exact inverse.
3749 if (!isFiniteNonZero())
3752 // Check that the number is a power of two by making sure that only the
3753 // integer bit is set in the significand.
3754 if (significandLSB() != semantics->precision - 1)
3758 APFloat reciprocal(*semantics, 1ULL);
3759 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3762 // Avoid multiplication with a denormal, it is not safe on all platforms and
3763 // may be slower than a normal division.
3764 if (reciprocal.isDenormal())
3767 assert(reciprocal.isFiniteNonZero() &&
3768 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3776 bool APFloat::isSignaling() const {
3780 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3781 // first bit of the trailing significand being 0.
3782 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3785 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3787 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3788 /// appropriate sign switching before/after the computation.
3789 APFloat::opStatus APFloat::next(bool nextDown) {
3790 // If we are performing nextDown, swap sign so we have -x.
3794 // Compute nextUp(x)
3795 opStatus result = opOK;
3797 // Handle each float category separately.
3800 // nextUp(+inf) = +inf
3803 // nextUp(-inf) = -getLargest()
3807 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3808 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3809 // change the payload.
3810 if (isSignaling()) {
3811 result = opInvalidOp;
3812 // For consistency, propagate the sign of the sNaN to the qNaN.
3813 makeNaN(false, isNegative(), nullptr);
3817 // nextUp(pm 0) = +getSmallest()
3818 makeSmallest(false);
3821 // nextUp(-getSmallest()) = -0
3822 if (isSmallest() && isNegative()) {
3823 APInt::tcSet(significandParts(), 0, partCount());
3829 // nextUp(getLargest()) == INFINITY
3830 if (isLargest() && !isNegative()) {
3831 APInt::tcSet(significandParts(), 0, partCount());
3832 category = fcInfinity;
3833 exponent = semantics->maxExponent + 1;
3837 // nextUp(normal) == normal + inc.
3839 // If we are negative, we need to decrement the significand.
3841 // We only cross a binade boundary that requires adjusting the exponent
3843 // 1. exponent != semantics->minExponent. This implies we are not in the
3844 // smallest binade or are dealing with denormals.
3845 // 2. Our significand excluding the integral bit is all zeros.
3846 bool WillCrossBinadeBoundary =
3847 exponent != semantics->minExponent && isSignificandAllZeros();
3849 // Decrement the significand.
3851 // We always do this since:
3852 // 1. If we are dealing with a non-binade decrement, by definition we
3853 // just decrement the significand.
3854 // 2. If we are dealing with a normal -> normal binade decrement, since
3855 // we have an explicit integral bit the fact that all bits but the
3856 // integral bit are zero implies that subtracting one will yield a
3857 // significand with 0 integral bit and 1 in all other spots. Thus we
3858 // must just adjust the exponent and set the integral bit to 1.
3859 // 3. If we are dealing with a normal -> denormal binade decrement,
3860 // since we set the integral bit to 0 when we represent denormals, we
3861 // just decrement the significand.
3862 integerPart *Parts = significandParts();
3863 APInt::tcDecrement(Parts, partCount());
3865 if (WillCrossBinadeBoundary) {
3866 // Our result is a normal number. Do the following:
3867 // 1. Set the integral bit to 1.
3868 // 2. Decrement the exponent.
3869 APInt::tcSetBit(Parts, semantics->precision - 1);
3873 // If we are positive, we need to increment the significand.
3875 // We only cross a binade boundary that requires adjusting the exponent if
3876 // the input is not a denormal and all of said input's significand bits
3877 // are set. If all of said conditions are true: clear the significand, set
3878 // the integral bit to 1, and increment the exponent. If we have a
3879 // denormal always increment since moving denormals and the numbers in the
3880 // smallest normal binade have the same exponent in our representation.
3881 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3883 if (WillCrossBinadeBoundary) {
3884 integerPart *Parts = significandParts();
3885 APInt::tcSet(Parts, 0, partCount());
3886 APInt::tcSetBit(Parts, semantics->precision - 1);
3887 assert(exponent != semantics->maxExponent &&
3888 "We can not increment an exponent beyond the maxExponent allowed"
3889 " by the given floating point semantics.");
3892 incrementSignificand();
3898 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3906 APFloat::makeInf(bool Negative) {
3907 category = fcInfinity;
3909 exponent = semantics->maxExponent + 1;
3910 APInt::tcSet(significandParts(), 0, partCount());
3914 APFloat::makeZero(bool Negative) {
3917 exponent = semantics->minExponent-1;
3918 APInt::tcSet(significandParts(), 0, partCount());
3921 APFloat llvm::scalbn(APFloat X, int Exp) {
3922 if (X.isInfinity() || X.isZero() || X.isNaN())
3923 return std::move(X);
3925 auto MaxExp = X.getSemantics().maxExponent;
3926 auto MinExp = X.getSemantics().minExponent;
3927 if (Exp > (MaxExp - X.exponent))
3928 // Overflow saturates to infinity.
3929 return APFloat::getInf(X.getSemantics(), X.isNegative());
3930 if (Exp < (MinExp - X.exponent))
3931 // Underflow saturates to zero.
3932 return APFloat::getZero(X.getSemantics(), X.isNegative());
3935 return std::move(X);