1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36 /* Assumed in hexadecimal significand parsing, and conversion to
37 hexadecimal strings. */
38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
42 /* Represents floating point arithmetic semantics. */
44 /* The largest E such that 2^E is representable; this matches the
45 definition of IEEE 754. */
46 APFloat::ExponentType maxExponent;
48 /* The smallest E such that 2^E is a normalized number; this
49 matches the definition of IEEE 754. */
50 APFloat::ExponentType minExponent;
52 /* Number of bits in the significand. This includes the integer
54 unsigned int precision;
57 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
58 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
59 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
60 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
61 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
62 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
64 /* The PowerPC format consists of two doubles. It does not map cleanly
65 onto the usual format above. It is approximated using twice the
66 mantissa bits. Note that for exponents near the double minimum,
67 we no longer can represent the full 106 mantissa bits, so those
68 will be treated as denormal numbers.
70 FIXME: While this approximation is equivalent to what GCC uses for
71 compile-time arithmetic on PPC double-double numbers, it is not able
72 to represent all possible values held by a PPC double-double number,
73 for example: (long double) 1.0 + (long double) 0x1p-106
74 Should this be replaced by a full emulation of PPC double-double? */
75 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
77 /* A tight upper bound on number of parts required to hold the value
80 power * 815 / (351 * integerPartWidth) + 1
82 However, whilst the result may require only this many parts,
83 because we are multiplying two values to get it, the
84 multiplication may require an extra part with the excess part
85 being zero (consider the trivial case of 1 * 1, tcFullMultiply
86 requires two parts to hold the single-part result). So we add an
87 extra one to guarantee enough space whilst multiplying. */
88 const unsigned int maxExponent = 16383;
89 const unsigned int maxPrecision = 113;
90 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
91 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
92 / (351 * integerPartWidth));
95 /* A bunch of private, handy routines. */
97 static inline unsigned int
98 partCountForBits(unsigned int bits)
100 return ((bits) + integerPartWidth - 1) / integerPartWidth;
103 /* Returns 0U-9U. Return values >= 10U are not digits. */
104 static inline unsigned int
105 decDigitValue(unsigned int c)
110 /* Return the value of a decimal exponent of the form
113 If the exponent overflows, returns a large exponent with the
116 readExponent(StringRef::iterator begin, StringRef::iterator end)
119 unsigned int absExponent;
120 const unsigned int overlargeExponent = 24000; /* FIXME. */
121 StringRef::iterator p = begin;
123 assert(p != end && "Exponent has no digits");
125 isNegative = (*p == '-');
126 if (*p == '-' || *p == '+') {
128 assert(p != end && "Exponent has no digits");
131 absExponent = decDigitValue(*p++);
132 assert(absExponent < 10U && "Invalid character in exponent");
134 for (; p != end; ++p) {
137 value = decDigitValue(*p);
138 assert(value < 10U && "Invalid character in exponent");
140 value += absExponent * 10;
141 if (absExponent >= overlargeExponent) {
142 absExponent = overlargeExponent;
143 p = end; /* outwit assert below */
149 assert(p == end && "Invalid exponent in exponent");
152 return -(int) absExponent;
154 return (int) absExponent;
157 /* This is ugly and needs cleaning up, but I don't immediately see
158 how whilst remaining safe. */
160 totalExponent(StringRef::iterator p, StringRef::iterator end,
161 int exponentAdjustment)
163 int unsignedExponent;
164 bool negative, overflow;
167 assert(p != end && "Exponent has no digits");
169 negative = *p == '-';
170 if (*p == '-' || *p == '+') {
172 assert(p != end && "Exponent has no digits");
175 unsignedExponent = 0;
177 for (; p != end; ++p) {
180 value = decDigitValue(*p);
181 assert(value < 10U && "Invalid character in exponent");
183 unsignedExponent = unsignedExponent * 10 + value;
184 if (unsignedExponent > 32767) {
190 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if (exponent > 32767 || exponent < -32768)
203 exponent = negative ? -32768: 32767;
208 static StringRef::iterator
209 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
210 StringRef::iterator *dot)
212 StringRef::iterator p = begin;
214 while (p != end && *p == '0')
217 if (p != end && *p == '.') {
220 assert(end - begin != 1 && "Significand has no digits");
222 while (p != end && *p == '0')
229 /* Given a normal decimal floating point number of the form
233 where the decimal point and exponent are optional, fill out the
234 structure D. Exponent is appropriate if the significand is
235 treated as an integer, and normalizedExponent if the significand
236 is taken to have the decimal point after a single leading
239 If the value is zero, V->firstSigDigit points to a non-digit, and
240 the return exponent is zero.
243 const char *firstSigDigit;
244 const char *lastSigDigit;
246 int normalizedExponent;
250 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
253 StringRef::iterator dot = end;
254 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
256 D->firstSigDigit = p;
258 D->normalizedExponent = 0;
260 for (; p != end; ++p) {
262 assert(dot == end && "String contains multiple dots");
267 if (decDigitValue(*p) >= 10U)
272 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
273 assert(p != begin && "Significand has no digits");
274 assert((dot == end || p - begin != 1) && "Significand has no digits");
276 /* p points to the first non-digit in the string */
277 D->exponent = readExponent(p + 1, end);
279 /* Implied decimal point? */
284 /* If number is all zeroes accept any exponent. */
285 if (p != D->firstSigDigit) {
286 /* Drop insignificant trailing zeroes. */
291 while (p != begin && *p == '0');
292 while (p != begin && *p == '.');
295 /* Adjust the exponents for any decimal point. */
296 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
297 D->normalizedExponent = (D->exponent +
298 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
299 - (dot > D->firstSigDigit && dot < p)));
305 /* Return the trailing fraction of a hexadecimal number.
306 DIGITVALUE is the first hex digit of the fraction, P points to
309 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
310 unsigned int digitValue)
312 unsigned int hexDigit;
314 /* If the first trailing digit isn't 0 or 8 we can work out the
315 fraction immediately. */
317 return lfMoreThanHalf;
318 else if (digitValue < 8 && digitValue > 0)
319 return lfLessThanHalf;
321 // Otherwise we need to find the first non-zero digit.
322 while (p != end && (*p == '0' || *p == '.'))
325 assert(p != end && "Invalid trailing hexadecimal fraction!");
327 hexDigit = hexDigitValue(*p);
329 /* If we ran off the end it is exactly zero or one-half, otherwise
332 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
334 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
337 /* Return the fraction lost were a bignum truncated losing the least
338 significant BITS bits. */
340 lostFractionThroughTruncation(const integerPart *parts,
341 unsigned int partCount,
346 lsb = APInt::tcLSB(parts, partCount);
348 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
350 return lfExactlyZero;
352 return lfExactlyHalf;
353 if (bits <= partCount * integerPartWidth &&
354 APInt::tcExtractBit(parts, bits - 1))
355 return lfMoreThanHalf;
357 return lfLessThanHalf;
360 /* Shift DST right BITS bits noting lost fraction. */
362 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
364 lostFraction lost_fraction;
366 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
368 APInt::tcShiftRight(dst, parts, bits);
370 return lost_fraction;
373 /* Combine the effect of two lost fractions. */
375 combineLostFractions(lostFraction moreSignificant,
376 lostFraction lessSignificant)
378 if (lessSignificant != lfExactlyZero) {
379 if (moreSignificant == lfExactlyZero)
380 moreSignificant = lfLessThanHalf;
381 else if (moreSignificant == lfExactlyHalf)
382 moreSignificant = lfMoreThanHalf;
385 return moreSignificant;
388 /* The error from the true value, in half-ulps, on multiplying two
389 floating point numbers, which differ from the value they
390 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
391 than the returned value.
393 See "How to Read Floating Point Numbers Accurately" by William D
396 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
398 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
400 if (HUerr1 + HUerr2 == 0)
401 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
403 return inexactMultiply + 2 * (HUerr1 + HUerr2);
406 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
407 when the least significant BITS are truncated. BITS cannot be
410 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
412 unsigned int count, partBits;
413 integerPart part, boundary;
418 count = bits / integerPartWidth;
419 partBits = bits % integerPartWidth + 1;
421 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
424 boundary = (integerPart) 1 << (partBits - 1);
429 if (part - boundary <= boundary - part)
430 return part - boundary;
432 return boundary - part;
435 if (part == boundary) {
438 return ~(integerPart) 0; /* A lot. */
441 } else if (part == boundary - 1) {
444 return ~(integerPart) 0; /* A lot. */
449 return ~(integerPart) 0; /* A lot. */
452 /* Place pow(5, power) in DST, and return the number of parts used.
453 DST must be at least one part larger than size of the answer. */
455 powerOf5(integerPart *dst, unsigned int power)
457 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
459 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
460 pow5s[0] = 78125 * 5;
462 unsigned int partsCount[16] = { 1 };
463 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
465 assert(power <= maxExponent);
470 *p1 = firstEightPowers[power & 7];
476 for (unsigned int n = 0; power; power >>= 1, n++) {
481 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
483 pc = partsCount[n - 1];
484 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
486 if (pow5[pc - 1] == 0)
494 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
496 if (p2[result - 1] == 0)
499 /* Now result is in p1 with partsCount parts and p2 is scratch
501 tmp = p1, p1 = p2, p2 = tmp;
508 APInt::tcAssign(dst, p1, result);
513 /* Zero at the end to avoid modular arithmetic when adding one; used
514 when rounding up during hexadecimal output. */
515 static const char hexDigitsLower[] = "0123456789abcdef0";
516 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
517 static const char infinityL[] = "infinity";
518 static const char infinityU[] = "INFINITY";
519 static const char NaNL[] = "nan";
520 static const char NaNU[] = "NAN";
522 /* Write out an integerPart in hexadecimal, starting with the most
523 significant nibble. Write out exactly COUNT hexdigits, return
526 partAsHex (char *dst, integerPart part, unsigned int count,
527 const char *hexDigitChars)
529 unsigned int result = count;
531 assert(count != 0 && count <= integerPartWidth / 4);
533 part >>= (integerPartWidth - 4 * count);
535 dst[count] = hexDigitChars[part & 0xf];
542 /* Write out an unsigned decimal integer. */
544 writeUnsignedDecimal (char *dst, unsigned int n)
560 /* Write out a signed decimal integer. */
562 writeSignedDecimal (char *dst, int value)
566 dst = writeUnsignedDecimal(dst, -(unsigned) value);
568 dst = writeUnsignedDecimal(dst, value);
575 APFloat::initialize(const fltSemantics *ourSemantics)
579 semantics = ourSemantics;
582 significand.parts = new integerPart[count];
586 APFloat::freeSignificand()
589 delete [] significand.parts;
593 APFloat::assign(const APFloat &rhs)
595 assert(semantics == rhs.semantics);
598 category = rhs.category;
599 exponent = rhs.exponent;
600 if (isFiniteNonZero() || category == fcNaN)
601 copySignificand(rhs);
605 APFloat::copySignificand(const APFloat &rhs)
607 assert(isFiniteNonZero() || category == fcNaN);
608 assert(rhs.partCount() >= partCount());
610 APInt::tcAssign(significandParts(), rhs.significandParts(),
614 /* Make this number a NaN, with an arbitrary but deterministic value
615 for the significand. If double or longer, this is a signalling NaN,
616 which may not be ideal. If float, this is QNaN(0). */
617 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
622 integerPart *significand = significandParts();
623 unsigned numParts = partCount();
625 // Set the significand bits to the fill.
626 if (!fill || fill->getNumWords() < numParts)
627 APInt::tcSet(significand, 0, numParts);
629 APInt::tcAssign(significand, fill->getRawData(),
630 std::min(fill->getNumWords(), numParts));
632 // Zero out the excess bits of the significand.
633 unsigned bitsToPreserve = semantics->precision - 1;
634 unsigned part = bitsToPreserve / 64;
635 bitsToPreserve %= 64;
636 significand[part] &= ((1ULL << bitsToPreserve) - 1);
637 for (part++; part != numParts; ++part)
638 significand[part] = 0;
641 unsigned QNaNBit = semantics->precision - 2;
644 // We always have to clear the QNaN bit to make it an SNaN.
645 APInt::tcClearBit(significand, QNaNBit);
647 // If there are no bits set in the payload, we have to set
648 // *something* to make it a NaN instead of an infinity;
649 // conventionally, this is the next bit down from the QNaN bit.
650 if (APInt::tcIsZero(significand, numParts))
651 APInt::tcSetBit(significand, QNaNBit - 1);
653 // We always have to set the QNaN bit to make it a QNaN.
654 APInt::tcSetBit(significand, QNaNBit);
657 // For x87 extended precision, we want to make a NaN, not a
658 // pseudo-NaN. Maybe we should expose the ability to make
660 if (semantics == &APFloat::x87DoubleExtended)
661 APInt::tcSetBit(significand, QNaNBit + 1);
664 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
666 APFloat value(Sem, uninitialized);
667 value.makeNaN(SNaN, Negative, fill);
672 APFloat::operator=(const APFloat &rhs)
675 if (semantics != rhs.semantics) {
677 initialize(rhs.semantics);
686 APFloat::operator=(APFloat &&rhs) {
689 semantics = rhs.semantics;
690 significand = rhs.significand;
691 exponent = rhs.exponent;
692 category = rhs.category;
695 rhs.semantics = &Bogus;
700 APFloat::isDenormal() const {
701 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
702 (APInt::tcExtractBit(significandParts(),
703 semantics->precision - 1) == 0);
707 APFloat::isSmallest() const {
708 // The smallest number by magnitude in our format will be the smallest
709 // denormal, i.e. the floating point number with exponent being minimum
710 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
711 return isFiniteNonZero() && exponent == semantics->minExponent &&
712 significandMSB() == 0;
715 bool APFloat::isSignificandAllOnes() const {
716 // Test if the significand excluding the integral bit is all ones. This allows
717 // us to test for binade boundaries.
718 const integerPart *Parts = significandParts();
719 const unsigned PartCount = partCount();
720 for (unsigned i = 0; i < PartCount - 1; i++)
724 // Set the unused high bits to all ones when we compare.
725 const unsigned NumHighBits =
726 PartCount*integerPartWidth - semantics->precision + 1;
727 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
728 "fill than integerPartWidth");
729 const integerPart HighBitFill =
730 ~integerPart(0) << (integerPartWidth - NumHighBits);
731 if (~(Parts[PartCount - 1] | HighBitFill))
737 bool APFloat::isSignificandAllZeros() const {
738 // Test if the significand excluding the integral bit is all zeros. This
739 // allows us to test for binade boundaries.
740 const integerPart *Parts = significandParts();
741 const unsigned PartCount = partCount();
743 for (unsigned i = 0; i < PartCount - 1; i++)
747 const unsigned NumHighBits =
748 PartCount*integerPartWidth - semantics->precision + 1;
749 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
750 "clear than integerPartWidth");
751 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
753 if (Parts[PartCount - 1] & HighBitMask)
760 APFloat::isLargest() const {
761 // The largest number by magnitude in our format will be the floating point
762 // number with maximum exponent and with significand that is all ones.
763 return isFiniteNonZero() && exponent == semantics->maxExponent
764 && isSignificandAllOnes();
768 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
771 if (semantics != rhs.semantics ||
772 category != rhs.category ||
775 if (category==fcZero || category==fcInfinity)
777 else if (isFiniteNonZero() && exponent!=rhs.exponent)
781 const integerPart* p=significandParts();
782 const integerPart* q=rhs.significandParts();
783 for (; i>0; i--, p++, q++) {
791 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
792 initialize(&ourSemantics);
796 exponent = ourSemantics.precision - 1;
797 significandParts()[0] = value;
798 normalize(rmNearestTiesToEven, lfExactlyZero);
801 APFloat::APFloat(const fltSemantics &ourSemantics) {
802 initialize(&ourSemantics);
807 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
808 // Allocates storage if necessary but does not initialize it.
809 initialize(&ourSemantics);
812 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
813 initialize(&ourSemantics);
814 convertFromString(text, rmNearestTiesToEven);
817 APFloat::APFloat(const APFloat &rhs) {
818 initialize(rhs.semantics);
822 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
823 *this = std::move(rhs);
831 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
832 void APFloat::Profile(FoldingSetNodeID& ID) const {
833 ID.Add(bitcastToAPInt());
837 APFloat::partCount() const
839 return partCountForBits(semantics->precision + 1);
843 APFloat::semanticsPrecision(const fltSemantics &semantics)
845 return semantics.precision;
849 APFloat::significandParts() const
851 return const_cast<APFloat *>(this)->significandParts();
855 APFloat::significandParts()
858 return significand.parts;
860 return &significand.part;
864 APFloat::zeroSignificand()
866 APInt::tcSet(significandParts(), 0, partCount());
869 /* Increment an fcNormal floating point number's significand. */
871 APFloat::incrementSignificand()
875 carry = APInt::tcIncrement(significandParts(), partCount());
877 /* Our callers should never cause us to overflow. */
882 /* Add the significand of the RHS. Returns the carry flag. */
884 APFloat::addSignificand(const APFloat &rhs)
888 parts = significandParts();
890 assert(semantics == rhs.semantics);
891 assert(exponent == rhs.exponent);
893 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
896 /* Subtract the significand of the RHS with a borrow flag. Returns
899 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
903 parts = significandParts();
905 assert(semantics == rhs.semantics);
906 assert(exponent == rhs.exponent);
908 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
912 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
913 on to the full-precision result of the multiplication. Returns the
916 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
918 unsigned int omsb; // One, not zero, based MSB.
919 unsigned int partsCount, newPartsCount, precision;
920 integerPart *lhsSignificand;
921 integerPart scratch[4];
922 integerPart *fullSignificand;
923 lostFraction lost_fraction;
926 assert(semantics == rhs.semantics);
928 precision = semantics->precision;
929 newPartsCount = partCountForBits(precision * 2);
931 if (newPartsCount > 4)
932 fullSignificand = new integerPart[newPartsCount];
934 fullSignificand = scratch;
936 lhsSignificand = significandParts();
937 partsCount = partCount();
939 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
940 rhs.significandParts(), partsCount, partsCount);
942 lost_fraction = lfExactlyZero;
943 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
944 exponent += rhs.exponent;
946 // Assume the operands involved in the multiplication are single-precision
947 // FP, and the two multiplicants are:
948 // *this = a23 . a22 ... a0 * 2^e1
949 // rhs = b23 . b22 ... b0 * 2^e2
950 // the result of multiplication is:
951 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
952 // Note that there are two significant bits at the left-hand side of the
953 // radix point. Move the radix point toward left by one bit, and adjust
954 // exponent accordingly.
957 if (addend && addend->isNonZero()) {
958 // The intermediate result of the multiplication has "2 * precision"
959 // signicant bit; adjust the addend to be consistent with mul result.
961 Significand savedSignificand = significand;
962 const fltSemantics *savedSemantics = semantics;
963 fltSemantics extendedSemantics;
965 unsigned int extendedPrecision;
967 /* Normalize our MSB. */
968 extendedPrecision = 2 * precision;
969 if (omsb != extendedPrecision) {
970 assert(extendedPrecision > omsb);
971 APInt::tcShiftLeft(fullSignificand, newPartsCount,
972 extendedPrecision - omsb);
973 exponent -= extendedPrecision - omsb;
976 /* Create new semantics. */
977 extendedSemantics = *semantics;
978 extendedSemantics.precision = extendedPrecision;
980 if (newPartsCount == 1)
981 significand.part = fullSignificand[0];
983 significand.parts = fullSignificand;
984 semantics = &extendedSemantics;
986 APFloat extendedAddend(*addend);
987 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
988 assert(status == opOK);
990 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
992 /* Restore our state. */
993 if (newPartsCount == 1)
994 fullSignificand[0] = significand.part;
995 significand = savedSignificand;
996 semantics = savedSemantics;
998 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1001 // Convert the result having "2 * precision" significant-bits back to the one
1002 // having "precision" significant-bits. First, move the radix point from
1003 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1004 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1005 exponent -= precision;
1007 // In case MSB resides at the left-hand side of radix point, shift the
1008 // mantissa right by some amount to make sure the MSB reside right before
1009 // the radix point (i.e. "MSB . rest-significant-bits").
1011 // Note that the result is not normalized when "omsb < precision". So, the
1012 // caller needs to call APFloat::normalize() if normalized value is expected.
1013 if (omsb > precision) {
1014 unsigned int bits, significantParts;
1017 bits = omsb - precision;
1018 significantParts = partCountForBits(omsb);
1019 lf = shiftRight(fullSignificand, significantParts, bits);
1020 lost_fraction = combineLostFractions(lf, lost_fraction);
1024 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1026 if (newPartsCount > 4)
1027 delete [] fullSignificand;
1029 return lost_fraction;
1032 /* Multiply the significands of LHS and RHS to DST. */
1034 APFloat::divideSignificand(const APFloat &rhs)
1036 unsigned int bit, i, partsCount;
1037 const integerPart *rhsSignificand;
1038 integerPart *lhsSignificand, *dividend, *divisor;
1039 integerPart scratch[4];
1040 lostFraction lost_fraction;
1042 assert(semantics == rhs.semantics);
1044 lhsSignificand = significandParts();
1045 rhsSignificand = rhs.significandParts();
1046 partsCount = partCount();
1049 dividend = new integerPart[partsCount * 2];
1053 divisor = dividend + partsCount;
1055 /* Copy the dividend and divisor as they will be modified in-place. */
1056 for (i = 0; i < partsCount; i++) {
1057 dividend[i] = lhsSignificand[i];
1058 divisor[i] = rhsSignificand[i];
1059 lhsSignificand[i] = 0;
1062 exponent -= rhs.exponent;
1064 unsigned int precision = semantics->precision;
1066 /* Normalize the divisor. */
1067 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1070 APInt::tcShiftLeft(divisor, partsCount, bit);
1073 /* Normalize the dividend. */
1074 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1077 APInt::tcShiftLeft(dividend, partsCount, bit);
1080 /* Ensure the dividend >= divisor initially for the loop below.
1081 Incidentally, this means that the division loop below is
1082 guaranteed to set the integer bit to one. */
1083 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1085 APInt::tcShiftLeft(dividend, partsCount, 1);
1086 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1089 /* Long division. */
1090 for (bit = precision; bit; bit -= 1) {
1091 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1092 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1093 APInt::tcSetBit(lhsSignificand, bit - 1);
1096 APInt::tcShiftLeft(dividend, partsCount, 1);
1099 /* Figure out the lost fraction. */
1100 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1103 lost_fraction = lfMoreThanHalf;
1105 lost_fraction = lfExactlyHalf;
1106 else if (APInt::tcIsZero(dividend, partsCount))
1107 lost_fraction = lfExactlyZero;
1109 lost_fraction = lfLessThanHalf;
1114 return lost_fraction;
1118 APFloat::significandMSB() const
1120 return APInt::tcMSB(significandParts(), partCount());
1124 APFloat::significandLSB() const
1126 return APInt::tcLSB(significandParts(), partCount());
1129 /* Note that a zero result is NOT normalized to fcZero. */
1131 APFloat::shiftSignificandRight(unsigned int bits)
1133 /* Our exponent should not overflow. */
1134 assert((ExponentType) (exponent + bits) >= exponent);
1138 return shiftRight(significandParts(), partCount(), bits);
1141 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1143 APFloat::shiftSignificandLeft(unsigned int bits)
1145 assert(bits < semantics->precision);
1148 unsigned int partsCount = partCount();
1150 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1153 assert(!APInt::tcIsZero(significandParts(), partsCount));
1158 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1162 assert(semantics == rhs.semantics);
1163 assert(isFiniteNonZero());
1164 assert(rhs.isFiniteNonZero());
1166 compare = exponent - rhs.exponent;
1168 /* If exponents are equal, do an unsigned bignum comparison of the
1171 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1175 return cmpGreaterThan;
1176 else if (compare < 0)
1182 /* Handle overflow. Sign is preserved. We either become infinity or
1183 the largest finite number. */
1185 APFloat::handleOverflow(roundingMode rounding_mode)
1188 if (rounding_mode == rmNearestTiesToEven ||
1189 rounding_mode == rmNearestTiesToAway ||
1190 (rounding_mode == rmTowardPositive && !sign) ||
1191 (rounding_mode == rmTowardNegative && sign)) {
1192 category = fcInfinity;
1193 return (opStatus) (opOverflow | opInexact);
1196 /* Otherwise we become the largest finite number. */
1197 category = fcNormal;
1198 exponent = semantics->maxExponent;
1199 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1200 semantics->precision);
1205 /* Returns TRUE if, when truncating the current number, with BIT the
1206 new LSB, with the given lost fraction and rounding mode, the result
1207 would need to be rounded away from zero (i.e., by increasing the
1208 signficand). This routine must work for fcZero of both signs, and
1209 fcNormal numbers. */
1211 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1212 lostFraction lost_fraction,
1213 unsigned int bit) const
1215 /* NaNs and infinities should not have lost fractions. */
1216 assert(isFiniteNonZero() || category == fcZero);
1218 /* Current callers never pass this so we don't handle it. */
1219 assert(lost_fraction != lfExactlyZero);
1221 switch (rounding_mode) {
1222 case rmNearestTiesToAway:
1223 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1225 case rmNearestTiesToEven:
1226 if (lost_fraction == lfMoreThanHalf)
1229 /* Our zeroes don't have a significand to test. */
1230 if (lost_fraction == lfExactlyHalf && category != fcZero)
1231 return APInt::tcExtractBit(significandParts(), bit);
1238 case rmTowardPositive:
1239 return sign == false;
1241 case rmTowardNegative:
1242 return sign == true;
1244 llvm_unreachable("Invalid rounding mode found");
1248 APFloat::normalize(roundingMode rounding_mode,
1249 lostFraction lost_fraction)
1251 unsigned int omsb; /* One, not zero, based MSB. */
1254 if (!isFiniteNonZero())
1257 /* Before rounding normalize the exponent of fcNormal numbers. */
1258 omsb = significandMSB() + 1;
1261 /* OMSB is numbered from 1. We want to place it in the integer
1262 bit numbered PRECISION if possible, with a compensating change in
1264 exponentChange = omsb - semantics->precision;
1266 /* If the resulting exponent is too high, overflow according to
1267 the rounding mode. */
1268 if (exponent + exponentChange > semantics->maxExponent)
1269 return handleOverflow(rounding_mode);
1271 /* Subnormal numbers have exponent minExponent, and their MSB
1272 is forced based on that. */
1273 if (exponent + exponentChange < semantics->minExponent)
1274 exponentChange = semantics->minExponent - exponent;
1276 /* Shifting left is easy as we don't lose precision. */
1277 if (exponentChange < 0) {
1278 assert(lost_fraction == lfExactlyZero);
1280 shiftSignificandLeft(-exponentChange);
1285 if (exponentChange > 0) {
1288 /* Shift right and capture any new lost fraction. */
1289 lf = shiftSignificandRight(exponentChange);
1291 lost_fraction = combineLostFractions(lf, lost_fraction);
1293 /* Keep OMSB up-to-date. */
1294 if (omsb > (unsigned) exponentChange)
1295 omsb -= exponentChange;
1301 /* Now round the number according to rounding_mode given the lost
1304 /* As specified in IEEE 754, since we do not trap we do not report
1305 underflow for exact results. */
1306 if (lost_fraction == lfExactlyZero) {
1307 /* Canonicalize zeroes. */
1314 /* Increment the significand if we're rounding away from zero. */
1315 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1317 exponent = semantics->minExponent;
1319 incrementSignificand();
1320 omsb = significandMSB() + 1;
1322 /* Did the significand increment overflow? */
1323 if (omsb == (unsigned) semantics->precision + 1) {
1324 /* Renormalize by incrementing the exponent and shifting our
1325 significand right one. However if we already have the
1326 maximum exponent we overflow to infinity. */
1327 if (exponent == semantics->maxExponent) {
1328 category = fcInfinity;
1330 return (opStatus) (opOverflow | opInexact);
1333 shiftSignificandRight(1);
1339 /* The normal case - we were and are not denormal, and any
1340 significand increment above didn't overflow. */
1341 if (omsb == semantics->precision)
1344 /* We have a non-zero denormal. */
1345 assert(omsb < semantics->precision);
1347 /* Canonicalize zeroes. */
1351 /* The fcZero case is a denormal that underflowed to zero. */
1352 return (opStatus) (opUnderflow | opInexact);
1356 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1358 switch (PackCategoriesIntoKey(category, rhs.category)) {
1360 llvm_unreachable(nullptr);
1362 case PackCategoriesIntoKey(fcNaN, fcZero):
1363 case PackCategoriesIntoKey(fcNaN, fcNormal):
1364 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1365 case PackCategoriesIntoKey(fcNaN, fcNaN):
1366 case PackCategoriesIntoKey(fcNormal, fcZero):
1367 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1368 case PackCategoriesIntoKey(fcInfinity, fcZero):
1371 case PackCategoriesIntoKey(fcZero, fcNaN):
1372 case PackCategoriesIntoKey(fcNormal, fcNaN):
1373 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1374 // We need to be sure to flip the sign here for subtraction because we
1375 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1376 sign = rhs.sign ^ subtract;
1378 copySignificand(rhs);
1381 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1382 case PackCategoriesIntoKey(fcZero, fcInfinity):
1383 category = fcInfinity;
1384 sign = rhs.sign ^ subtract;
1387 case PackCategoriesIntoKey(fcZero, fcNormal):
1389 sign = rhs.sign ^ subtract;
1392 case PackCategoriesIntoKey(fcZero, fcZero):
1393 /* Sign depends on rounding mode; handled by caller. */
1396 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1397 /* Differently signed infinities can only be validly
1399 if (((sign ^ rhs.sign)!=0) != subtract) {
1406 case PackCategoriesIntoKey(fcNormal, fcNormal):
1411 /* Add or subtract two normal numbers. */
1413 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1416 lostFraction lost_fraction;
1419 /* Determine if the operation on the absolute values is effectively
1420 an addition or subtraction. */
1421 subtract ^= (sign ^ rhs.sign) ? true : false;
1423 /* Are we bigger exponent-wise than the RHS? */
1424 bits = exponent - rhs.exponent;
1426 /* Subtraction is more subtle than one might naively expect. */
1428 APFloat temp_rhs(rhs);
1432 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1433 lost_fraction = lfExactlyZero;
1434 } else if (bits > 0) {
1435 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1436 shiftSignificandLeft(1);
1439 lost_fraction = shiftSignificandRight(-bits - 1);
1440 temp_rhs.shiftSignificandLeft(1);
1445 carry = temp_rhs.subtractSignificand
1446 (*this, lost_fraction != lfExactlyZero);
1447 copySignificand(temp_rhs);
1450 carry = subtractSignificand
1451 (temp_rhs, lost_fraction != lfExactlyZero);
1454 /* Invert the lost fraction - it was on the RHS and
1456 if (lost_fraction == lfLessThanHalf)
1457 lost_fraction = lfMoreThanHalf;
1458 else if (lost_fraction == lfMoreThanHalf)
1459 lost_fraction = lfLessThanHalf;
1461 /* The code above is intended to ensure that no borrow is
1467 APFloat temp_rhs(rhs);
1469 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1470 carry = addSignificand(temp_rhs);
1472 lost_fraction = shiftSignificandRight(-bits);
1473 carry = addSignificand(rhs);
1476 /* We have a guard bit; generating a carry cannot happen. */
1481 return lost_fraction;
1485 APFloat::multiplySpecials(const APFloat &rhs)
1487 switch (PackCategoriesIntoKey(category, rhs.category)) {
1489 llvm_unreachable(nullptr);
1491 case PackCategoriesIntoKey(fcNaN, fcZero):
1492 case PackCategoriesIntoKey(fcNaN, fcNormal):
1493 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1494 case PackCategoriesIntoKey(fcNaN, fcNaN):
1498 case PackCategoriesIntoKey(fcZero, fcNaN):
1499 case PackCategoriesIntoKey(fcNormal, fcNaN):
1500 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1503 copySignificand(rhs);
1506 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1507 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1508 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1509 category = fcInfinity;
1512 case PackCategoriesIntoKey(fcZero, fcNormal):
1513 case PackCategoriesIntoKey(fcNormal, fcZero):
1514 case PackCategoriesIntoKey(fcZero, fcZero):
1518 case PackCategoriesIntoKey(fcZero, fcInfinity):
1519 case PackCategoriesIntoKey(fcInfinity, fcZero):
1523 case PackCategoriesIntoKey(fcNormal, fcNormal):
1529 APFloat::divideSpecials(const APFloat &rhs)
1531 switch (PackCategoriesIntoKey(category, rhs.category)) {
1533 llvm_unreachable(nullptr);
1535 case PackCategoriesIntoKey(fcZero, fcNaN):
1536 case PackCategoriesIntoKey(fcNormal, fcNaN):
1537 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1539 copySignificand(rhs);
1540 case PackCategoriesIntoKey(fcNaN, fcZero):
1541 case PackCategoriesIntoKey(fcNaN, fcNormal):
1542 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1543 case PackCategoriesIntoKey(fcNaN, fcNaN):
1545 case PackCategoriesIntoKey(fcInfinity, fcZero):
1546 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1547 case PackCategoriesIntoKey(fcZero, fcInfinity):
1548 case PackCategoriesIntoKey(fcZero, fcNormal):
1551 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1555 case PackCategoriesIntoKey(fcNormal, fcZero):
1556 category = fcInfinity;
1559 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1560 case PackCategoriesIntoKey(fcZero, fcZero):
1564 case PackCategoriesIntoKey(fcNormal, fcNormal):
1570 APFloat::modSpecials(const APFloat &rhs)
1572 switch (PackCategoriesIntoKey(category, rhs.category)) {
1574 llvm_unreachable(nullptr);
1576 case PackCategoriesIntoKey(fcNaN, fcZero):
1577 case PackCategoriesIntoKey(fcNaN, fcNormal):
1578 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1579 case PackCategoriesIntoKey(fcNaN, fcNaN):
1580 case PackCategoriesIntoKey(fcZero, fcInfinity):
1581 case PackCategoriesIntoKey(fcZero, fcNormal):
1582 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1585 case PackCategoriesIntoKey(fcZero, fcNaN):
1586 case PackCategoriesIntoKey(fcNormal, fcNaN):
1587 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1590 copySignificand(rhs);
1593 case PackCategoriesIntoKey(fcNormal, fcZero):
1594 case PackCategoriesIntoKey(fcInfinity, fcZero):
1595 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1596 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1597 case PackCategoriesIntoKey(fcZero, fcZero):
1601 case PackCategoriesIntoKey(fcNormal, fcNormal):
1608 APFloat::changeSign()
1610 /* Look mummy, this one's easy. */
1615 APFloat::clearSign()
1617 /* So is this one. */
1622 APFloat::copySign(const APFloat &rhs)
1628 /* Normalized addition or subtraction. */
1630 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1635 fs = addOrSubtractSpecials(rhs, subtract);
1637 /* This return code means it was not a simple case. */
1638 if (fs == opDivByZero) {
1639 lostFraction lost_fraction;
1641 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1642 fs = normalize(rounding_mode, lost_fraction);
1644 /* Can only be zero if we lost no fraction. */
1645 assert(category != fcZero || lost_fraction == lfExactlyZero);
1648 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1649 positive zero unless rounding to minus infinity, except that
1650 adding two like-signed zeroes gives that zero. */
1651 if (category == fcZero) {
1652 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1653 sign = (rounding_mode == rmTowardNegative);
1659 /* Normalized addition. */
1661 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1663 return addOrSubtract(rhs, rounding_mode, false);
1666 /* Normalized subtraction. */
1668 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1670 return addOrSubtract(rhs, rounding_mode, true);
1673 /* Normalized multiply. */
1675 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1680 fs = multiplySpecials(rhs);
1682 if (isFiniteNonZero()) {
1683 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1684 fs = normalize(rounding_mode, lost_fraction);
1685 if (lost_fraction != lfExactlyZero)
1686 fs = (opStatus) (fs | opInexact);
1692 /* Normalized divide. */
1694 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1699 fs = divideSpecials(rhs);
1701 if (isFiniteNonZero()) {
1702 lostFraction lost_fraction = divideSignificand(rhs);
1703 fs = normalize(rounding_mode, lost_fraction);
1704 if (lost_fraction != lfExactlyZero)
1705 fs = (opStatus) (fs | opInexact);
1711 /* Normalized remainder. This is not currently correct in all cases. */
1713 APFloat::remainder(const APFloat &rhs)
1717 unsigned int origSign = sign;
1719 fs = V.divide(rhs, rmNearestTiesToEven);
1720 if (fs == opDivByZero)
1723 int parts = partCount();
1724 integerPart *x = new integerPart[parts];
1726 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1727 rmNearestTiesToEven, &ignored);
1728 if (fs==opInvalidOp)
1731 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1732 rmNearestTiesToEven);
1733 assert(fs==opOK); // should always work
1735 fs = V.multiply(rhs, rmNearestTiesToEven);
1736 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1738 fs = subtract(V, rmNearestTiesToEven);
1739 assert(fs==opOK || fs==opInexact); // likewise
1742 sign = origSign; // IEEE754 requires this
1747 /* Normalized llvm frem (C fmod).
1748 This is not currently correct in all cases. */
1750 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1753 fs = modSpecials(rhs);
1755 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1757 unsigned int origSign = sign;
1759 fs = V.divide(rhs, rmNearestTiesToEven);
1760 if (fs == opDivByZero)
1763 int parts = partCount();
1764 integerPart *x = new integerPart[parts];
1766 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1767 rmTowardZero, &ignored);
1768 if (fs==opInvalidOp)
1771 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1772 rmNearestTiesToEven);
1773 assert(fs==opOK); // should always work
1775 fs = V.multiply(rhs, rounding_mode);
1776 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1778 fs = subtract(V, rounding_mode);
1779 assert(fs==opOK || fs==opInexact); // likewise
1782 sign = origSign; // IEEE754 requires this
1788 /* Normalized fused-multiply-add. */
1790 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1791 const APFloat &addend,
1792 roundingMode rounding_mode)
1796 /* Post-multiplication sign, before addition. */
1797 sign ^= multiplicand.sign;
1799 /* If and only if all arguments are normal do we need to do an
1800 extended-precision calculation. */
1801 if (isFiniteNonZero() &&
1802 multiplicand.isFiniteNonZero() &&
1803 addend.isFinite()) {
1804 lostFraction lost_fraction;
1806 lost_fraction = multiplySignificand(multiplicand, &addend);
1807 fs = normalize(rounding_mode, lost_fraction);
1808 if (lost_fraction != lfExactlyZero)
1809 fs = (opStatus) (fs | opInexact);
1811 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1812 positive zero unless rounding to minus infinity, except that
1813 adding two like-signed zeroes gives that zero. */
1814 if (category == fcZero && sign != addend.sign)
1815 sign = (rounding_mode == rmTowardNegative);
1817 fs = multiplySpecials(multiplicand);
1819 /* FS can only be opOK or opInvalidOp. There is no more work
1820 to do in the latter case. The IEEE-754R standard says it is
1821 implementation-defined in this case whether, if ADDEND is a
1822 quiet NaN, we raise invalid op; this implementation does so.
1824 If we need to do the addition we can do so with normal
1827 fs = addOrSubtract(addend, rounding_mode, false);
1833 /* Rounding-mode corrrect round to integral value. */
1834 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1837 // If the exponent is large enough, we know that this value is already
1838 // integral, and the arithmetic below would potentially cause it to saturate
1839 // to +/-Inf. Bail out early instead.
1840 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1843 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1844 // precision of our format, and then subtract it back off again. The choice
1845 // of rounding modes for the addition/subtraction determines the rounding mode
1846 // for our integral rounding as well.
1847 // NOTE: When the input value is negative, we do subtraction followed by
1848 // addition instead.
1849 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1850 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1851 APFloat MagicConstant(*semantics);
1852 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1853 rmNearestTiesToEven);
1854 MagicConstant.copySign(*this);
1859 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1860 bool inputSign = isNegative();
1862 fs = add(MagicConstant, rounding_mode);
1863 if (fs != opOK && fs != opInexact)
1866 fs = subtract(MagicConstant, rounding_mode);
1868 // Restore the input sign.
1869 if (inputSign != isNegative())
1876 /* Comparison requires normalized numbers. */
1878 APFloat::compare(const APFloat &rhs) const
1882 assert(semantics == rhs.semantics);
1884 switch (PackCategoriesIntoKey(category, rhs.category)) {
1886 llvm_unreachable(nullptr);
1888 case PackCategoriesIntoKey(fcNaN, fcZero):
1889 case PackCategoriesIntoKey(fcNaN, fcNormal):
1890 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1891 case PackCategoriesIntoKey(fcNaN, fcNaN):
1892 case PackCategoriesIntoKey(fcZero, fcNaN):
1893 case PackCategoriesIntoKey(fcNormal, fcNaN):
1894 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1895 return cmpUnordered;
1897 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1898 case PackCategoriesIntoKey(fcInfinity, fcZero):
1899 case PackCategoriesIntoKey(fcNormal, fcZero):
1903 return cmpGreaterThan;
1905 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1906 case PackCategoriesIntoKey(fcZero, fcInfinity):
1907 case PackCategoriesIntoKey(fcZero, fcNormal):
1909 return cmpGreaterThan;
1913 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1914 if (sign == rhs.sign)
1919 return cmpGreaterThan;
1921 case PackCategoriesIntoKey(fcZero, fcZero):
1924 case PackCategoriesIntoKey(fcNormal, fcNormal):
1928 /* Two normal numbers. Do they have the same sign? */
1929 if (sign != rhs.sign) {
1931 result = cmpLessThan;
1933 result = cmpGreaterThan;
1935 /* Compare absolute values; invert result if negative. */
1936 result = compareAbsoluteValue(rhs);
1939 if (result == cmpLessThan)
1940 result = cmpGreaterThan;
1941 else if (result == cmpGreaterThan)
1942 result = cmpLessThan;
1949 /// APFloat::convert - convert a value of one floating point type to another.
1950 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1951 /// records whether the transformation lost information, i.e. whether
1952 /// converting the result back to the original type will produce the
1953 /// original value (this is almost the same as return value==fsOK, but there
1954 /// are edge cases where this is not so).
1957 APFloat::convert(const fltSemantics &toSemantics,
1958 roundingMode rounding_mode, bool *losesInfo)
1960 lostFraction lostFraction;
1961 unsigned int newPartCount, oldPartCount;
1964 const fltSemantics &fromSemantics = *semantics;
1966 lostFraction = lfExactlyZero;
1967 newPartCount = partCountForBits(toSemantics.precision + 1);
1968 oldPartCount = partCount();
1969 shift = toSemantics.precision - fromSemantics.precision;
1971 bool X86SpecialNan = false;
1972 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1973 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1974 (!(*significandParts() & 0x8000000000000000ULL) ||
1975 !(*significandParts() & 0x4000000000000000ULL))) {
1976 // x86 has some unusual NaNs which cannot be represented in any other
1977 // format; note them here.
1978 X86SpecialNan = true;
1981 // If this is a truncation of a denormal number, and the target semantics
1982 // has larger exponent range than the source semantics (this can happen
1983 // when truncating from PowerPC double-double to double format), the
1984 // right shift could lose result mantissa bits. Adjust exponent instead
1985 // of performing excessive shift.
1986 if (shift < 0 && isFiniteNonZero()) {
1987 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1988 if (exponent + exponentChange < toSemantics.minExponent)
1989 exponentChange = toSemantics.minExponent - exponent;
1990 if (exponentChange < shift)
1991 exponentChange = shift;
1992 if (exponentChange < 0) {
1993 shift -= exponentChange;
1994 exponent += exponentChange;
1998 // If this is a truncation, perform the shift before we narrow the storage.
1999 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2000 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2002 // Fix the storage so it can hold to new value.
2003 if (newPartCount > oldPartCount) {
2004 // The new type requires more storage; make it available.
2005 integerPart *newParts;
2006 newParts = new integerPart[newPartCount];
2007 APInt::tcSet(newParts, 0, newPartCount);
2008 if (isFiniteNonZero() || category==fcNaN)
2009 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2011 significand.parts = newParts;
2012 } else if (newPartCount == 1 && oldPartCount != 1) {
2013 // Switch to built-in storage for a single part.
2014 integerPart newPart = 0;
2015 if (isFiniteNonZero() || category==fcNaN)
2016 newPart = significandParts()[0];
2018 significand.part = newPart;
2021 // Now that we have the right storage, switch the semantics.
2022 semantics = &toSemantics;
2024 // If this is an extension, perform the shift now that the storage is
2026 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2027 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2029 if (isFiniteNonZero()) {
2030 fs = normalize(rounding_mode, lostFraction);
2031 *losesInfo = (fs != opOK);
2032 } else if (category == fcNaN) {
2033 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2035 // For x87 extended precision, we want to make a NaN, not a special NaN if
2036 // the input wasn't special either.
2037 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2038 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2040 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2041 // does not give you back the same bits. This is dubious, and we
2042 // don't currently do it. You're really supposed to get
2043 // an invalid operation signal at runtime, but nobody does that.
2053 /* Convert a floating point number to an integer according to the
2054 rounding mode. If the rounded integer value is out of range this
2055 returns an invalid operation exception and the contents of the
2056 destination parts are unspecified. If the rounded value is in
2057 range but the floating point number is not the exact integer, the C
2058 standard doesn't require an inexact exception to be raised. IEEE
2059 854 does require it so we do that.
2061 Note that for conversions to integer type the C standard requires
2062 round-to-zero to always be used. */
2064 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2066 roundingMode rounding_mode,
2067 bool *isExact) const
2069 lostFraction lost_fraction;
2070 const integerPart *src;
2071 unsigned int dstPartsCount, truncatedBits;
2075 /* Handle the three special cases first. */
2076 if (category == fcInfinity || category == fcNaN)
2079 dstPartsCount = partCountForBits(width);
2081 if (category == fcZero) {
2082 APInt::tcSet(parts, 0, dstPartsCount);
2083 // Negative zero can't be represented as an int.
2088 src = significandParts();
2090 /* Step 1: place our absolute value, with any fraction truncated, in
2093 /* Our absolute value is less than one; truncate everything. */
2094 APInt::tcSet(parts, 0, dstPartsCount);
2095 /* For exponent -1 the integer bit represents .5, look at that.
2096 For smaller exponents leftmost truncated bit is 0. */
2097 truncatedBits = semantics->precision -1U - exponent;
2099 /* We want the most significant (exponent + 1) bits; the rest are
2101 unsigned int bits = exponent + 1U;
2103 /* Hopelessly large in magnitude? */
2107 if (bits < semantics->precision) {
2108 /* We truncate (semantics->precision - bits) bits. */
2109 truncatedBits = semantics->precision - bits;
2110 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2112 /* We want at least as many bits as are available. */
2113 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2114 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2119 /* Step 2: work out any lost fraction, and increment the absolute
2120 value if we would round away from zero. */
2121 if (truncatedBits) {
2122 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2124 if (lost_fraction != lfExactlyZero &&
2125 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2126 if (APInt::tcIncrement(parts, dstPartsCount))
2127 return opInvalidOp; /* Overflow. */
2130 lost_fraction = lfExactlyZero;
2133 /* Step 3: check if we fit in the destination. */
2134 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2138 /* Negative numbers cannot be represented as unsigned. */
2142 /* It takes omsb bits to represent the unsigned integer value.
2143 We lose a bit for the sign, but care is needed as the
2144 maximally negative integer is a special case. */
2145 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2148 /* This case can happen because of rounding. */
2153 APInt::tcNegate (parts, dstPartsCount);
2155 if (omsb >= width + !isSigned)
2159 if (lost_fraction == lfExactlyZero) {
2166 /* Same as convertToSignExtendedInteger, except we provide
2167 deterministic values in case of an invalid operation exception,
2168 namely zero for NaNs and the minimal or maximal value respectively
2169 for underflow or overflow.
2170 The *isExact output tells whether the result is exact, in the sense
2171 that converting it back to the original floating point type produces
2172 the original value. This is almost equivalent to result==opOK,
2173 except for negative zeroes.
2176 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2178 roundingMode rounding_mode, bool *isExact) const
2182 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2185 if (fs == opInvalidOp) {
2186 unsigned int bits, dstPartsCount;
2188 dstPartsCount = partCountForBits(width);
2190 if (category == fcNaN)
2195 bits = width - isSigned;
2197 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2198 if (sign && isSigned)
2199 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2205 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2206 an APSInt, whose initial bit-width and signed-ness are used to determine the
2207 precision of the conversion.
2210 APFloat::convertToInteger(APSInt &result,
2211 roundingMode rounding_mode, bool *isExact) const
2213 unsigned bitWidth = result.getBitWidth();
2214 SmallVector<uint64_t, 4> parts(result.getNumWords());
2215 opStatus status = convertToInteger(
2216 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2217 // Keeps the original signed-ness.
2218 result = APInt(bitWidth, parts);
2222 /* Convert an unsigned integer SRC to a floating point number,
2223 rounding according to ROUNDING_MODE. The sign of the floating
2224 point number is not modified. */
2226 APFloat::convertFromUnsignedParts(const integerPart *src,
2227 unsigned int srcCount,
2228 roundingMode rounding_mode)
2230 unsigned int omsb, precision, dstCount;
2232 lostFraction lost_fraction;
2234 category = fcNormal;
2235 omsb = APInt::tcMSB(src, srcCount) + 1;
2236 dst = significandParts();
2237 dstCount = partCount();
2238 precision = semantics->precision;
2240 /* We want the most significant PRECISION bits of SRC. There may not
2241 be that many; extract what we can. */
2242 if (precision <= omsb) {
2243 exponent = omsb - 1;
2244 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2246 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2248 exponent = precision - 1;
2249 lost_fraction = lfExactlyZero;
2250 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2253 return normalize(rounding_mode, lost_fraction);
2257 APFloat::convertFromAPInt(const APInt &Val,
2259 roundingMode rounding_mode)
2261 unsigned int partCount = Val.getNumWords();
2265 if (isSigned && api.isNegative()) {
2270 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2273 /* Convert a two's complement integer SRC to a floating point number,
2274 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2275 integer is signed, in which case it must be sign-extended. */
2277 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2278 unsigned int srcCount,
2280 roundingMode rounding_mode)
2285 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2288 /* If we're signed and negative negate a copy. */
2290 copy = new integerPart[srcCount];
2291 APInt::tcAssign(copy, src, srcCount);
2292 APInt::tcNegate(copy, srcCount);
2293 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2297 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2303 /* FIXME: should this just take a const APInt reference? */
2305 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2306 unsigned int width, bool isSigned,
2307 roundingMode rounding_mode)
2309 unsigned int partCount = partCountForBits(width);
2310 APInt api = APInt(width, makeArrayRef(parts, partCount));
2313 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2318 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2322 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2324 lostFraction lost_fraction = lfExactlyZero;
2326 category = fcNormal;
2330 integerPart *significand = significandParts();
2331 unsigned partsCount = partCount();
2332 unsigned bitPos = partsCount * integerPartWidth;
2333 bool computedTrailingFraction = false;
2335 // Skip leading zeroes and any (hexa)decimal point.
2336 StringRef::iterator begin = s.begin();
2337 StringRef::iterator end = s.end();
2338 StringRef::iterator dot;
2339 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2340 StringRef::iterator firstSignificantDigit = p;
2343 integerPart hex_value;
2346 assert(dot == end && "String contains multiple dots");
2351 hex_value = hexDigitValue(*p);
2352 if (hex_value == -1U)
2357 // Store the number while we have space.
2360 hex_value <<= bitPos % integerPartWidth;
2361 significand[bitPos / integerPartWidth] |= hex_value;
2362 } else if (!computedTrailingFraction) {
2363 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2364 computedTrailingFraction = true;
2368 /* Hex floats require an exponent but not a hexadecimal point. */
2369 assert(p != end && "Hex strings require an exponent");
2370 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2371 assert(p != begin && "Significand has no digits");
2372 assert((dot == end || p - begin != 1) && "Significand has no digits");
2374 /* Ignore the exponent if we are zero. */
2375 if (p != firstSignificantDigit) {
2378 /* Implicit hexadecimal point? */
2382 /* Calculate the exponent adjustment implicit in the number of
2383 significant digits. */
2384 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2385 if (expAdjustment < 0)
2387 expAdjustment = expAdjustment * 4 - 1;
2389 /* Adjust for writing the significand starting at the most
2390 significant nibble. */
2391 expAdjustment += semantics->precision;
2392 expAdjustment -= partsCount * integerPartWidth;
2394 /* Adjust for the given exponent. */
2395 exponent = totalExponent(p + 1, end, expAdjustment);
2398 return normalize(rounding_mode, lost_fraction);
2402 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2403 unsigned sigPartCount, int exp,
2404 roundingMode rounding_mode)
2406 unsigned int parts, pow5PartCount;
2407 fltSemantics calcSemantics = { 32767, -32767, 0 };
2408 integerPart pow5Parts[maxPowerOfFiveParts];
2411 isNearest = (rounding_mode == rmNearestTiesToEven ||
2412 rounding_mode == rmNearestTiesToAway);
2414 parts = partCountForBits(semantics->precision + 11);
2416 /* Calculate pow(5, abs(exp)). */
2417 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2419 for (;; parts *= 2) {
2420 opStatus sigStatus, powStatus;
2421 unsigned int excessPrecision, truncatedBits;
2423 calcSemantics.precision = parts * integerPartWidth - 1;
2424 excessPrecision = calcSemantics.precision - semantics->precision;
2425 truncatedBits = excessPrecision;
2427 APFloat decSig = APFloat::getZero(calcSemantics, sign);
2428 APFloat pow5(calcSemantics);
2430 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2431 rmNearestTiesToEven);
2432 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2433 rmNearestTiesToEven);
2434 /* Add exp, as 10^n = 5^n * 2^n. */
2435 decSig.exponent += exp;
2437 lostFraction calcLostFraction;
2438 integerPart HUerr, HUdistance;
2439 unsigned int powHUerr;
2442 /* multiplySignificand leaves the precision-th bit set to 1. */
2443 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2444 powHUerr = powStatus != opOK;
2446 calcLostFraction = decSig.divideSignificand(pow5);
2447 /* Denormal numbers have less precision. */
2448 if (decSig.exponent < semantics->minExponent) {
2449 excessPrecision += (semantics->minExponent - decSig.exponent);
2450 truncatedBits = excessPrecision;
2451 if (excessPrecision > calcSemantics.precision)
2452 excessPrecision = calcSemantics.precision;
2454 /* Extra half-ulp lost in reciprocal of exponent. */
2455 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2458 /* Both multiplySignificand and divideSignificand return the
2459 result with the integer bit set. */
2460 assert(APInt::tcExtractBit
2461 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2463 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2465 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2466 excessPrecision, isNearest);
2468 /* Are we guaranteed to round correctly if we truncate? */
2469 if (HUdistance >= HUerr) {
2470 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2471 calcSemantics.precision - excessPrecision,
2473 /* Take the exponent of decSig. If we tcExtract-ed less bits
2474 above we must adjust our exponent to compensate for the
2475 implicit right shift. */
2476 exponent = (decSig.exponent + semantics->precision
2477 - (calcSemantics.precision - excessPrecision));
2478 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2481 return normalize(rounding_mode, calcLostFraction);
2487 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2492 /* Scan the text. */
2493 StringRef::iterator p = str.begin();
2494 interpretDecimal(p, str.end(), &D);
2496 /* Handle the quick cases. First the case of no significant digits,
2497 i.e. zero, and then exponents that are obviously too large or too
2498 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2499 definitely overflows if
2501 (exp - 1) * L >= maxExponent
2503 and definitely underflows to zero where
2505 (exp + 1) * L <= minExponent - precision
2507 With integer arithmetic the tightest bounds for L are
2509 93/28 < L < 196/59 [ numerator <= 256 ]
2510 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2513 // Test if we have a zero number allowing for strings with no null terminators
2514 // and zero decimals with non-zero exponents.
2516 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2517 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2518 // be at most one dot. On the other hand, if we have a zero with a non-zero
2519 // exponent, then we know that D.firstSigDigit will be non-numeric.
2520 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2524 /* Check whether the normalized exponent is high enough to overflow
2525 max during the log-rebasing in the max-exponent check below. */
2526 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2527 fs = handleOverflow(rounding_mode);
2529 /* If it wasn't, then it also wasn't high enough to overflow max
2530 during the log-rebasing in the min-exponent check. Check that it
2531 won't overflow min in either check, then perform the min-exponent
2533 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2534 (D.normalizedExponent + 1) * 28738 <=
2535 8651 * (semantics->minExponent - (int) semantics->precision)) {
2536 /* Underflow to zero and round. */
2537 category = fcNormal;
2539 fs = normalize(rounding_mode, lfLessThanHalf);
2541 /* We can finally safely perform the max-exponent check. */
2542 } else if ((D.normalizedExponent - 1) * 42039
2543 >= 12655 * semantics->maxExponent) {
2544 /* Overflow and round. */
2545 fs = handleOverflow(rounding_mode);
2547 integerPart *decSignificand;
2548 unsigned int partCount;
2550 /* A tight upper bound on number of bits required to hold an
2551 N-digit decimal integer is N * 196 / 59. Allocate enough space
2552 to hold the full significand, and an extra part required by
2554 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2555 partCount = partCountForBits(1 + 196 * partCount / 59);
2556 decSignificand = new integerPart[partCount + 1];
2559 /* Convert to binary efficiently - we do almost all multiplication
2560 in an integerPart. When this would overflow do we do a single
2561 bignum multiplication, and then revert again to multiplication
2562 in an integerPart. */
2564 integerPart decValue, val, multiplier;
2572 if (p == str.end()) {
2576 decValue = decDigitValue(*p++);
2577 assert(decValue < 10U && "Invalid character in significand");
2579 val = val * 10 + decValue;
2580 /* The maximum number that can be multiplied by ten with any
2581 digit added without overflowing an integerPart. */
2582 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2584 /* Multiply out the current part. */
2585 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2586 partCount, partCount + 1, false);
2588 /* If we used another part (likely but not guaranteed), increase
2590 if (decSignificand[partCount])
2592 } while (p <= D.lastSigDigit);
2594 category = fcNormal;
2595 fs = roundSignificandWithExponent(decSignificand, partCount,
2596 D.exponent, rounding_mode);
2598 delete [] decSignificand;
2605 APFloat::convertFromStringSpecials(StringRef str) {
2606 if (str.equals("inf") || str.equals("INFINITY")) {
2611 if (str.equals("-inf") || str.equals("-INFINITY")) {
2616 if (str.equals("nan") || str.equals("NaN")) {
2617 makeNaN(false, false);
2621 if (str.equals("-nan") || str.equals("-NaN")) {
2622 makeNaN(false, true);
2630 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2632 assert(!str.empty() && "Invalid string length");
2634 // Handle special cases.
2635 if (convertFromStringSpecials(str))
2638 /* Handle a leading minus sign. */
2639 StringRef::iterator p = str.begin();
2640 size_t slen = str.size();
2641 sign = *p == '-' ? 1 : 0;
2642 if (*p == '-' || *p == '+') {
2645 assert(slen && "String has no digits");
2648 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2649 assert(slen - 2 && "Invalid string");
2650 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2654 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2657 /* Write out a hexadecimal representation of the floating point value
2658 to DST, which must be of sufficient size, in the C99 form
2659 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2660 excluding the terminating NUL.
2662 If UPPERCASE, the output is in upper case, otherwise in lower case.
2664 HEXDIGITS digits appear altogether, rounding the value if
2665 necessary. If HEXDIGITS is 0, the minimal precision to display the
2666 number precisely is used instead. If nothing would appear after
2667 the decimal point it is suppressed.
2669 The decimal exponent is always printed and has at least one digit.
2670 Zero values display an exponent of zero. Infinities and NaNs
2671 appear as "infinity" or "nan" respectively.
2673 The above rules are as specified by C99. There is ambiguity about
2674 what the leading hexadecimal digit should be. This implementation
2675 uses whatever is necessary so that the exponent is displayed as
2676 stored. This implies the exponent will fall within the IEEE format
2677 range, and the leading hexadecimal digit will be 0 (for denormals),
2678 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2679 any other digits zero).
2682 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2683 bool upperCase, roundingMode rounding_mode) const
2693 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2694 dst += sizeof infinityL - 1;
2698 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2699 dst += sizeof NaNU - 1;
2704 *dst++ = upperCase ? 'X': 'x';
2706 if (hexDigits > 1) {
2708 memset (dst, '0', hexDigits - 1);
2709 dst += hexDigits - 1;
2711 *dst++ = upperCase ? 'P': 'p';
2716 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2722 return static_cast<unsigned int>(dst - p);
2725 /* Does the hard work of outputting the correctly rounded hexadecimal
2726 form of a normal floating point number with the specified number of
2727 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2728 digits necessary to print the value precisely is output. */
2730 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2732 roundingMode rounding_mode) const
2734 unsigned int count, valueBits, shift, partsCount, outputDigits;
2735 const char *hexDigitChars;
2736 const integerPart *significand;
2741 *dst++ = upperCase ? 'X': 'x';
2744 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2746 significand = significandParts();
2747 partsCount = partCount();
2749 /* +3 because the first digit only uses the single integer bit, so
2750 we have 3 virtual zero most-significant-bits. */
2751 valueBits = semantics->precision + 3;
2752 shift = integerPartWidth - valueBits % integerPartWidth;
2754 /* The natural number of digits required ignoring trailing
2755 insignificant zeroes. */
2756 outputDigits = (valueBits - significandLSB () + 3) / 4;
2758 /* hexDigits of zero means use the required number for the
2759 precision. Otherwise, see if we are truncating. If we are,
2760 find out if we need to round away from zero. */
2762 if (hexDigits < outputDigits) {
2763 /* We are dropping non-zero bits, so need to check how to round.
2764 "bits" is the number of dropped bits. */
2766 lostFraction fraction;
2768 bits = valueBits - hexDigits * 4;
2769 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2770 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2772 outputDigits = hexDigits;
2775 /* Write the digits consecutively, and start writing in the location
2776 of the hexadecimal point. We move the most significant digit
2777 left and add the hexadecimal point later. */
2780 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2782 while (outputDigits && count) {
2785 /* Put the most significant integerPartWidth bits in "part". */
2786 if (--count == partsCount)
2787 part = 0; /* An imaginary higher zero part. */
2789 part = significand[count] << shift;
2792 part |= significand[count - 1] >> (integerPartWidth - shift);
2794 /* Convert as much of "part" to hexdigits as we can. */
2795 unsigned int curDigits = integerPartWidth / 4;
2797 if (curDigits > outputDigits)
2798 curDigits = outputDigits;
2799 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2800 outputDigits -= curDigits;
2806 /* Note that hexDigitChars has a trailing '0'. */
2809 *q = hexDigitChars[hexDigitValue (*q) + 1];
2810 } while (*q == '0');
2813 /* Add trailing zeroes. */
2814 memset (dst, '0', outputDigits);
2815 dst += outputDigits;
2818 /* Move the most significant digit to before the point, and if there
2819 is something after the decimal point add it. This must come
2820 after rounding above. */
2827 /* Finally output the exponent. */
2828 *dst++ = upperCase ? 'P': 'p';
2830 return writeSignedDecimal (dst, exponent);
2833 hash_code llvm::hash_value(const APFloat &Arg) {
2834 if (!Arg.isFiniteNonZero())
2835 return hash_combine((uint8_t)Arg.category,
2836 // NaN has no sign, fix it at zero.
2837 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2838 Arg.semantics->precision);
2840 // Normal floats need their exponent and significand hashed.
2841 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2842 Arg.semantics->precision, Arg.exponent,
2844 Arg.significandParts(),
2845 Arg.significandParts() + Arg.partCount()));
2848 // Conversion from APFloat to/from host float/double. It may eventually be
2849 // possible to eliminate these and have everybody deal with APFloats, but that
2850 // will take a while. This approach will not easily extend to long double.
2851 // Current implementation requires integerPartWidth==64, which is correct at
2852 // the moment but could be made more general.
2854 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2855 // the actual IEEE respresentations. We compensate for that here.
2858 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2860 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2861 assert(partCount()==2);
2863 uint64_t myexponent, mysignificand;
2865 if (isFiniteNonZero()) {
2866 myexponent = exponent+16383; //bias
2867 mysignificand = significandParts()[0];
2868 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2869 myexponent = 0; // denormal
2870 } else if (category==fcZero) {
2873 } else if (category==fcInfinity) {
2874 myexponent = 0x7fff;
2875 mysignificand = 0x8000000000000000ULL;
2877 assert(category == fcNaN && "Unknown category");
2878 myexponent = 0x7fff;
2879 mysignificand = significandParts()[0];
2883 words[0] = mysignificand;
2884 words[1] = ((uint64_t)(sign & 1) << 15) |
2885 (myexponent & 0x7fffLL);
2886 return APInt(80, words);
2890 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2892 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2893 assert(partCount()==2);
2899 // Convert number to double. To avoid spurious underflows, we re-
2900 // normalize against the "double" minExponent first, and only *then*
2901 // truncate the mantissa. The result of that second conversion
2902 // may be inexact, but should never underflow.
2903 // Declare fltSemantics before APFloat that uses it (and
2904 // saves pointer to it) to ensure correct destruction order.
2905 fltSemantics extendedSemantics = *semantics;
2906 extendedSemantics.minExponent = IEEEdouble.minExponent;
2907 APFloat extended(*this);
2908 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2909 assert(fs == opOK && !losesInfo);
2912 APFloat u(extended);
2913 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2914 assert(fs == opOK || fs == opInexact);
2916 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2918 // If conversion was exact or resulted in a special case, we're done;
2919 // just set the second double to zero. Otherwise, re-convert back to
2920 // the extended format and compute the difference. This now should
2921 // convert exactly to double.
2922 if (u.isFiniteNonZero() && losesInfo) {
2923 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2924 assert(fs == opOK && !losesInfo);
2927 APFloat v(extended);
2928 v.subtract(u, rmNearestTiesToEven);
2929 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2930 assert(fs == opOK && !losesInfo);
2932 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2937 return APInt(128, words);
2941 APFloat::convertQuadrupleAPFloatToAPInt() const
2943 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2944 assert(partCount()==2);
2946 uint64_t myexponent, mysignificand, mysignificand2;
2948 if (isFiniteNonZero()) {
2949 myexponent = exponent+16383; //bias
2950 mysignificand = significandParts()[0];
2951 mysignificand2 = significandParts()[1];
2952 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2953 myexponent = 0; // denormal
2954 } else if (category==fcZero) {
2956 mysignificand = mysignificand2 = 0;
2957 } else if (category==fcInfinity) {
2958 myexponent = 0x7fff;
2959 mysignificand = mysignificand2 = 0;
2961 assert(category == fcNaN && "Unknown category!");
2962 myexponent = 0x7fff;
2963 mysignificand = significandParts()[0];
2964 mysignificand2 = significandParts()[1];
2968 words[0] = mysignificand;
2969 words[1] = ((uint64_t)(sign & 1) << 63) |
2970 ((myexponent & 0x7fff) << 48) |
2971 (mysignificand2 & 0xffffffffffffLL);
2973 return APInt(128, words);
2977 APFloat::convertDoubleAPFloatToAPInt() const
2979 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2980 assert(partCount()==1);
2982 uint64_t myexponent, mysignificand;
2984 if (isFiniteNonZero()) {
2985 myexponent = exponent+1023; //bias
2986 mysignificand = *significandParts();
2987 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2988 myexponent = 0; // denormal
2989 } else if (category==fcZero) {
2992 } else if (category==fcInfinity) {
2996 assert(category == fcNaN && "Unknown category!");
2998 mysignificand = *significandParts();
3001 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3002 ((myexponent & 0x7ff) << 52) |
3003 (mysignificand & 0xfffffffffffffLL))));
3007 APFloat::convertFloatAPFloatToAPInt() const
3009 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3010 assert(partCount()==1);
3012 uint32_t myexponent, mysignificand;
3014 if (isFiniteNonZero()) {
3015 myexponent = exponent+127; //bias
3016 mysignificand = (uint32_t)*significandParts();
3017 if (myexponent == 1 && !(mysignificand & 0x800000))
3018 myexponent = 0; // denormal
3019 } else if (category==fcZero) {
3022 } else if (category==fcInfinity) {
3026 assert(category == fcNaN && "Unknown category!");
3028 mysignificand = (uint32_t)*significandParts();
3031 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3032 (mysignificand & 0x7fffff)));
3036 APFloat::convertHalfAPFloatToAPInt() const
3038 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3039 assert(partCount()==1);
3041 uint32_t myexponent, mysignificand;
3043 if (isFiniteNonZero()) {
3044 myexponent = exponent+15; //bias
3045 mysignificand = (uint32_t)*significandParts();
3046 if (myexponent == 1 && !(mysignificand & 0x400))
3047 myexponent = 0; // denormal
3048 } else if (category==fcZero) {
3051 } else if (category==fcInfinity) {
3055 assert(category == fcNaN && "Unknown category!");
3057 mysignificand = (uint32_t)*significandParts();
3060 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3061 (mysignificand & 0x3ff)));
3064 // This function creates an APInt that is just a bit map of the floating
3065 // point constant as it would appear in memory. It is not a conversion,
3066 // and treating the result as a normal integer is unlikely to be useful.
3069 APFloat::bitcastToAPInt() const
3071 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3072 return convertHalfAPFloatToAPInt();
3074 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3075 return convertFloatAPFloatToAPInt();
3077 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3078 return convertDoubleAPFloatToAPInt();
3080 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3081 return convertQuadrupleAPFloatToAPInt();
3083 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3084 return convertPPCDoubleDoubleAPFloatToAPInt();
3086 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3088 return convertF80LongDoubleAPFloatToAPInt();
3092 APFloat::convertToFloat() const
3094 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3095 "Float semantics are not IEEEsingle");
3096 APInt api = bitcastToAPInt();
3097 return api.bitsToFloat();
3101 APFloat::convertToDouble() const
3103 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3104 "Float semantics are not IEEEdouble");
3105 APInt api = bitcastToAPInt();
3106 return api.bitsToDouble();
3109 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3110 /// does not support these bit patterns:
3111 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3112 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3113 /// exponent = 0, integer bit 1 ("pseudodenormal")
3114 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3115 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3117 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3119 assert(api.getBitWidth()==80);
3120 uint64_t i1 = api.getRawData()[0];
3121 uint64_t i2 = api.getRawData()[1];
3122 uint64_t myexponent = (i2 & 0x7fff);
3123 uint64_t mysignificand = i1;
3125 initialize(&APFloat::x87DoubleExtended);
3126 assert(partCount()==2);
3128 sign = static_cast<unsigned int>(i2>>15);
3129 if (myexponent==0 && mysignificand==0) {
3130 // exponent, significand meaningless
3132 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3133 // exponent, significand meaningless
3134 category = fcInfinity;
3135 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3136 // exponent meaningless
3138 significandParts()[0] = mysignificand;
3139 significandParts()[1] = 0;
3141 category = fcNormal;
3142 exponent = myexponent - 16383;
3143 significandParts()[0] = mysignificand;
3144 significandParts()[1] = 0;
3145 if (myexponent==0) // denormal
3151 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3153 assert(api.getBitWidth()==128);
3154 uint64_t i1 = api.getRawData()[0];
3155 uint64_t i2 = api.getRawData()[1];
3159 // Get the first double and convert to our format.
3160 initFromDoubleAPInt(APInt(64, i1));
3161 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3162 assert(fs == opOK && !losesInfo);
3165 // Unless we have a special case, add in second double.
3166 if (isFiniteNonZero()) {
3167 APFloat v(IEEEdouble, APInt(64, i2));
3168 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3169 assert(fs == opOK && !losesInfo);
3172 add(v, rmNearestTiesToEven);
3177 APFloat::initFromQuadrupleAPInt(const APInt &api)
3179 assert(api.getBitWidth()==128);
3180 uint64_t i1 = api.getRawData()[0];
3181 uint64_t i2 = api.getRawData()[1];
3182 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3183 uint64_t mysignificand = i1;
3184 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3186 initialize(&APFloat::IEEEquad);
3187 assert(partCount()==2);
3189 sign = static_cast<unsigned int>(i2>>63);
3190 if (myexponent==0 &&
3191 (mysignificand==0 && mysignificand2==0)) {
3192 // exponent, significand meaningless
3194 } else if (myexponent==0x7fff &&
3195 (mysignificand==0 && mysignificand2==0)) {
3196 // exponent, significand meaningless
3197 category = fcInfinity;
3198 } else if (myexponent==0x7fff &&
3199 (mysignificand!=0 || mysignificand2 !=0)) {
3200 // exponent meaningless
3202 significandParts()[0] = mysignificand;
3203 significandParts()[1] = mysignificand2;
3205 category = fcNormal;
3206 exponent = myexponent - 16383;
3207 significandParts()[0] = mysignificand;
3208 significandParts()[1] = mysignificand2;
3209 if (myexponent==0) // denormal
3212 significandParts()[1] |= 0x1000000000000LL; // integer bit
3217 APFloat::initFromDoubleAPInt(const APInt &api)
3219 assert(api.getBitWidth()==64);
3220 uint64_t i = *api.getRawData();
3221 uint64_t myexponent = (i >> 52) & 0x7ff;
3222 uint64_t mysignificand = i & 0xfffffffffffffLL;
3224 initialize(&APFloat::IEEEdouble);
3225 assert(partCount()==1);
3227 sign = static_cast<unsigned int>(i>>63);
3228 if (myexponent==0 && mysignificand==0) {
3229 // exponent, significand meaningless
3231 } else if (myexponent==0x7ff && mysignificand==0) {
3232 // exponent, significand meaningless
3233 category = fcInfinity;
3234 } else if (myexponent==0x7ff && mysignificand!=0) {
3235 // exponent meaningless
3237 *significandParts() = mysignificand;
3239 category = fcNormal;
3240 exponent = myexponent - 1023;
3241 *significandParts() = mysignificand;
3242 if (myexponent==0) // denormal
3245 *significandParts() |= 0x10000000000000LL; // integer bit
3250 APFloat::initFromFloatAPInt(const APInt & api)
3252 assert(api.getBitWidth()==32);
3253 uint32_t i = (uint32_t)*api.getRawData();
3254 uint32_t myexponent = (i >> 23) & 0xff;
3255 uint32_t mysignificand = i & 0x7fffff;
3257 initialize(&APFloat::IEEEsingle);
3258 assert(partCount()==1);
3261 if (myexponent==0 && mysignificand==0) {
3262 // exponent, significand meaningless
3264 } else if (myexponent==0xff && mysignificand==0) {
3265 // exponent, significand meaningless
3266 category = fcInfinity;
3267 } else if (myexponent==0xff && mysignificand!=0) {
3268 // sign, exponent, significand meaningless
3270 *significandParts() = mysignificand;
3272 category = fcNormal;
3273 exponent = myexponent - 127; //bias
3274 *significandParts() = mysignificand;
3275 if (myexponent==0) // denormal
3278 *significandParts() |= 0x800000; // integer bit
3283 APFloat::initFromHalfAPInt(const APInt & api)
3285 assert(api.getBitWidth()==16);
3286 uint32_t i = (uint32_t)*api.getRawData();
3287 uint32_t myexponent = (i >> 10) & 0x1f;
3288 uint32_t mysignificand = i & 0x3ff;
3290 initialize(&APFloat::IEEEhalf);
3291 assert(partCount()==1);
3294 if (myexponent==0 && mysignificand==0) {
3295 // exponent, significand meaningless
3297 } else if (myexponent==0x1f && mysignificand==0) {
3298 // exponent, significand meaningless
3299 category = fcInfinity;
3300 } else if (myexponent==0x1f && mysignificand!=0) {
3301 // sign, exponent, significand meaningless
3303 *significandParts() = mysignificand;
3305 category = fcNormal;
3306 exponent = myexponent - 15; //bias
3307 *significandParts() = mysignificand;
3308 if (myexponent==0) // denormal
3311 *significandParts() |= 0x400; // integer bit
3315 /// Treat api as containing the bits of a floating point number. Currently
3316 /// we infer the floating point type from the size of the APInt. The
3317 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3318 /// when the size is anything else).
3320 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3322 if (Sem == &IEEEhalf)
3323 return initFromHalfAPInt(api);
3324 if (Sem == &IEEEsingle)
3325 return initFromFloatAPInt(api);
3326 if (Sem == &IEEEdouble)
3327 return initFromDoubleAPInt(api);
3328 if (Sem == &x87DoubleExtended)
3329 return initFromF80LongDoubleAPInt(api);
3330 if (Sem == &IEEEquad)
3331 return initFromQuadrupleAPInt(api);
3332 if (Sem == &PPCDoubleDouble)
3333 return initFromPPCDoubleDoubleAPInt(api);
3335 llvm_unreachable(nullptr);
3339 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3343 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3345 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3347 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3349 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3352 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3353 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3355 llvm_unreachable("Unknown floating bit width");
3359 /// Make this number the largest magnitude normal number in the given
3361 void APFloat::makeLargest(bool Negative) {
3362 // We want (in interchange format):
3363 // sign = {Negative}
3365 // significand = 1..1
3366 category = fcNormal;
3368 exponent = semantics->maxExponent;
3370 // Use memset to set all but the highest integerPart to all ones.
3371 integerPart *significand = significandParts();
3372 unsigned PartCount = partCount();
3373 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3375 // Set the high integerPart especially setting all unused top bits for
3376 // internal consistency.
3377 const unsigned NumUnusedHighBits =
3378 PartCount*integerPartWidth - semantics->precision;
3379 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3380 ? (~integerPart(0) >> NumUnusedHighBits)
3384 /// Make this number the smallest magnitude denormal number in the given
3386 void APFloat::makeSmallest(bool Negative) {
3387 // We want (in interchange format):
3388 // sign = {Negative}
3390 // significand = 0..01
3391 category = fcNormal;
3393 exponent = semantics->minExponent;
3394 APInt::tcSet(significandParts(), 1, partCount());
3398 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3399 // We want (in interchange format):
3400 // sign = {Negative}
3402 // significand = 1..1
3403 APFloat Val(Sem, uninitialized);
3404 Val.makeLargest(Negative);
3408 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3409 // We want (in interchange format):
3410 // sign = {Negative}
3412 // significand = 0..01
3413 APFloat Val(Sem, uninitialized);
3414 Val.makeSmallest(Negative);
3418 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3419 APFloat Val(Sem, uninitialized);
3421 // We want (in interchange format):
3422 // sign = {Negative}
3424 // significand = 10..0
3426 Val.category = fcNormal;
3427 Val.zeroSignificand();
3428 Val.sign = Negative;
3429 Val.exponent = Sem.minExponent;
3430 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3431 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3436 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3437 initFromAPInt(&Sem, API);
3440 APFloat::APFloat(float f) {
3441 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3444 APFloat::APFloat(double d) {
3445 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3449 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3450 Buffer.append(Str.begin(), Str.end());
3453 /// Removes data from the given significand until it is no more
3454 /// precise than is required for the desired precision.
3455 void AdjustToPrecision(APInt &significand,
3456 int &exp, unsigned FormatPrecision) {
3457 unsigned bits = significand.getActiveBits();
3459 // 196/59 is a very slight overestimate of lg_2(10).
3460 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3462 if (bits <= bitsRequired) return;
3464 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3465 if (!tensRemovable) return;
3467 exp += tensRemovable;
3469 APInt divisor(significand.getBitWidth(), 1);
3470 APInt powten(significand.getBitWidth(), 10);
3472 if (tensRemovable & 1)
3474 tensRemovable >>= 1;
3475 if (!tensRemovable) break;
3479 significand = significand.udiv(divisor);
3481 // Truncate the significand down to its active bit count.
3482 significand = significand.trunc(significand.getActiveBits());
3486 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3487 int &exp, unsigned FormatPrecision) {
3488 unsigned N = buffer.size();
3489 if (N <= FormatPrecision) return;
3491 // The most significant figures are the last ones in the buffer.
3492 unsigned FirstSignificant = N - FormatPrecision;
3495 // FIXME: this probably shouldn't use 'round half up'.
3497 // Rounding down is just a truncation, except we also want to drop
3498 // trailing zeros from the new result.
3499 if (buffer[FirstSignificant - 1] < '5') {
3500 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3503 exp += FirstSignificant;
3504 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3508 // Rounding up requires a decimal add-with-carry. If we continue
3509 // the carry, the newly-introduced zeros will just be truncated.
3510 for (unsigned I = FirstSignificant; I != N; ++I) {
3511 if (buffer[I] == '9') {
3519 // If we carried through, we have exactly one digit of precision.
3520 if (FirstSignificant == N) {
3521 exp += FirstSignificant;
3523 buffer.push_back('1');
3527 exp += FirstSignificant;
3528 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3532 void APFloat::toString(SmallVectorImpl<char> &Str,
3533 unsigned FormatPrecision,
3534 unsigned FormatMaxPadding) const {
3538 return append(Str, "-Inf");
3540 return append(Str, "+Inf");
3542 case fcNaN: return append(Str, "NaN");
3548 if (!FormatMaxPadding)
3549 append(Str, "0.0E+0");
3561 // Decompose the number into an APInt and an exponent.
3562 int exp = exponent - ((int) semantics->precision - 1);
3563 APInt significand(semantics->precision,
3564 makeArrayRef(significandParts(),
3565 partCountForBits(semantics->precision)));
3567 // Set FormatPrecision if zero. We want to do this before we
3568 // truncate trailing zeros, as those are part of the precision.
3569 if (!FormatPrecision) {
3570 // We use enough digits so the number can be round-tripped back to an
3571 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3572 // Accurately" by Steele and White.
3573 // FIXME: Using a formula based purely on the precision is conservative;
3574 // we can print fewer digits depending on the actual value being printed.
3576 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3577 FormatPrecision = 2 + semantics->precision * 59 / 196;
3580 // Ignore trailing binary zeros.
3581 int trailingZeros = significand.countTrailingZeros();
3582 exp += trailingZeros;
3583 significand = significand.lshr(trailingZeros);
3585 // Change the exponent from 2^e to 10^e.
3588 } else if (exp > 0) {
3590 significand = significand.zext(semantics->precision + exp);
3591 significand <<= exp;
3593 } else { /* exp < 0 */
3596 // We transform this using the identity:
3597 // (N)(2^-e) == (N)(5^e)(10^-e)
3598 // This means we have to multiply N (the significand) by 5^e.
3599 // To avoid overflow, we have to operate on numbers large
3600 // enough to store N * 5^e:
3601 // log2(N * 5^e) == log2(N) + e * log2(5)
3602 // <= semantics->precision + e * 137 / 59
3603 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3605 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3607 // Multiply significand by 5^e.
3608 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3609 significand = significand.zext(precision);
3610 APInt five_to_the_i(precision, 5);
3612 if (texp & 1) significand *= five_to_the_i;
3616 five_to_the_i *= five_to_the_i;
3620 AdjustToPrecision(significand, exp, FormatPrecision);
3622 SmallVector<char, 256> buffer;
3625 unsigned precision = significand.getBitWidth();
3626 APInt ten(precision, 10);
3627 APInt digit(precision, 0);
3629 bool inTrail = true;
3630 while (significand != 0) {
3631 // digit <- significand % 10
3632 // significand <- significand / 10
3633 APInt::udivrem(significand, ten, significand, digit);
3635 unsigned d = digit.getZExtValue();
3637 // Drop trailing zeros.
3638 if (inTrail && !d) exp++;
3640 buffer.push_back((char) ('0' + d));
3645 assert(!buffer.empty() && "no characters in buffer!");
3647 // Drop down to FormatPrecision.
3648 // TODO: don't do more precise calculations above than are required.
3649 AdjustToPrecision(buffer, exp, FormatPrecision);
3651 unsigned NDigits = buffer.size();
3653 // Check whether we should use scientific notation.
3654 bool FormatScientific;
3655 if (!FormatMaxPadding)
3656 FormatScientific = true;
3661 // But we shouldn't make the number look more precise than it is.
3662 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3663 NDigits + (unsigned) exp > FormatPrecision);
3665 // Power of the most significant digit.
3666 int MSD = exp + (int) (NDigits - 1);
3669 FormatScientific = false;
3671 // 765e-5 == 0.00765
3673 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3678 // Scientific formatting is pretty straightforward.
3679 if (FormatScientific) {
3680 exp += (NDigits - 1);
3682 Str.push_back(buffer[NDigits-1]);
3687 for (unsigned I = 1; I != NDigits; ++I)
3688 Str.push_back(buffer[NDigits-1-I]);
3691 Str.push_back(exp >= 0 ? '+' : '-');
3692 if (exp < 0) exp = -exp;
3693 SmallVector<char, 6> expbuf;
3695 expbuf.push_back((char) ('0' + (exp % 10)));
3698 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3699 Str.push_back(expbuf[E-1-I]);
3703 // Non-scientific, positive exponents.
3705 for (unsigned I = 0; I != NDigits; ++I)
3706 Str.push_back(buffer[NDigits-1-I]);
3707 for (unsigned I = 0; I != (unsigned) exp; ++I)
3712 // Non-scientific, negative exponents.
3714 // The number of digits to the left of the decimal point.
3715 int NWholeDigits = exp + (int) NDigits;
3718 if (NWholeDigits > 0) {
3719 for (; I != (unsigned) NWholeDigits; ++I)
3720 Str.push_back(buffer[NDigits-I-1]);
3723 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3727 for (unsigned Z = 1; Z != NZeros; ++Z)
3731 for (; I != NDigits; ++I)
3732 Str.push_back(buffer[NDigits-I-1]);
3735 bool APFloat::getExactInverse(APFloat *inv) const {
3736 // Special floats and denormals have no exact inverse.
3737 if (!isFiniteNonZero())
3740 // Check that the number is a power of two by making sure that only the
3741 // integer bit is set in the significand.
3742 if (significandLSB() != semantics->precision - 1)
3746 APFloat reciprocal(*semantics, 1ULL);
3747 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3750 // Avoid multiplication with a denormal, it is not safe on all platforms and
3751 // may be slower than a normal division.
3752 if (reciprocal.isDenormal())
3755 assert(reciprocal.isFiniteNonZero() &&
3756 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3764 bool APFloat::isSignaling() const {
3768 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3769 // first bit of the trailing significand being 0.
3770 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3773 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3775 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3776 /// appropriate sign switching before/after the computation.
3777 APFloat::opStatus APFloat::next(bool nextDown) {
3778 // If we are performing nextDown, swap sign so we have -x.
3782 // Compute nextUp(x)
3783 opStatus result = opOK;
3785 // Handle each float category separately.
3788 // nextUp(+inf) = +inf
3791 // nextUp(-inf) = -getLargest()
3795 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3796 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3797 // change the payload.
3798 if (isSignaling()) {
3799 result = opInvalidOp;
3800 // For consistency, propagate the sign of the sNaN to the qNaN.
3801 makeNaN(false, isNegative(), nullptr);
3805 // nextUp(pm 0) = +getSmallest()
3806 makeSmallest(false);
3809 // nextUp(-getSmallest()) = -0
3810 if (isSmallest() && isNegative()) {
3811 APInt::tcSet(significandParts(), 0, partCount());
3817 // nextUp(getLargest()) == INFINITY
3818 if (isLargest() && !isNegative()) {
3819 APInt::tcSet(significandParts(), 0, partCount());
3820 category = fcInfinity;
3821 exponent = semantics->maxExponent + 1;
3825 // nextUp(normal) == normal + inc.
3827 // If we are negative, we need to decrement the significand.
3829 // We only cross a binade boundary that requires adjusting the exponent
3831 // 1. exponent != semantics->minExponent. This implies we are not in the
3832 // smallest binade or are dealing with denormals.
3833 // 2. Our significand excluding the integral bit is all zeros.
3834 bool WillCrossBinadeBoundary =
3835 exponent != semantics->minExponent && isSignificandAllZeros();
3837 // Decrement the significand.
3839 // We always do this since:
3840 // 1. If we are dealing with a non-binade decrement, by definition we
3841 // just decrement the significand.
3842 // 2. If we are dealing with a normal -> normal binade decrement, since
3843 // we have an explicit integral bit the fact that all bits but the
3844 // integral bit are zero implies that subtracting one will yield a
3845 // significand with 0 integral bit and 1 in all other spots. Thus we
3846 // must just adjust the exponent and set the integral bit to 1.
3847 // 3. If we are dealing with a normal -> denormal binade decrement,
3848 // since we set the integral bit to 0 when we represent denormals, we
3849 // just decrement the significand.
3850 integerPart *Parts = significandParts();
3851 APInt::tcDecrement(Parts, partCount());
3853 if (WillCrossBinadeBoundary) {
3854 // Our result is a normal number. Do the following:
3855 // 1. Set the integral bit to 1.
3856 // 2. Decrement the exponent.
3857 APInt::tcSetBit(Parts, semantics->precision - 1);
3861 // If we are positive, we need to increment the significand.
3863 // We only cross a binade boundary that requires adjusting the exponent if
3864 // the input is not a denormal and all of said input's significand bits
3865 // are set. If all of said conditions are true: clear the significand, set
3866 // the integral bit to 1, and increment the exponent. If we have a
3867 // denormal always increment since moving denormals and the numbers in the
3868 // smallest normal binade have the same exponent in our representation.
3869 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3871 if (WillCrossBinadeBoundary) {
3872 integerPart *Parts = significandParts();
3873 APInt::tcSet(Parts, 0, partCount());
3874 APInt::tcSetBit(Parts, semantics->precision - 1);
3875 assert(exponent != semantics->maxExponent &&
3876 "We can not increment an exponent beyond the maxExponent allowed"
3877 " by the given floating point semantics.");
3880 incrementSignificand();
3886 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3894 APFloat::makeInf(bool Negative) {
3895 category = fcInfinity;
3897 exponent = semantics->maxExponent + 1;
3898 APInt::tcSet(significandParts(), 0, partCount());
3902 APFloat::makeZero(bool Negative) {
3905 exponent = semantics->minExponent-1;
3906 APInt::tcSet(significandParts(), 0, partCount());
3909 APFloat llvm::scalbn(APFloat X, int Exp) {
3910 if (X.isInfinity() || X.isZero() || X.isNaN())
3911 return std::move(X);
3913 auto MaxExp = X.getSemantics().maxExponent;
3914 auto MinExp = X.getSemantics().minExponent;
3915 if (Exp > (MaxExp - X.exponent))
3916 // Overflow saturates to infinity.
3917 return APFloat::getInf(X.getSemantics(), X.isNegative());
3918 if (Exp < (MinExp - X.exponent))
3919 // Underflow saturates to zero.
3920 return APFloat::getZero(X.getSemantics(), X.isNegative());
3923 return std::move(X);