1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/MathExtras.h"
26 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
28 /* Assumed in hexadecimal significand parsing, and conversion to
29 hexadecimal strings. */
30 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
31 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
35 /* Represents floating point arithmetic semantics. */
37 /* The largest E such that 2^E is representable; this matches the
38 definition of IEEE 754. */
39 exponent_t maxExponent;
41 /* The smallest E such that 2^E is a normalized number; this
42 matches the definition of IEEE 754. */
43 exponent_t minExponent;
45 /* Number of bits in the significand. This includes the integer
47 unsigned int precision;
49 /* True if arithmetic is supported. */
50 unsigned int arithmeticOK;
53 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
54 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
55 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
56 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
57 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
58 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
60 // The PowerPC format consists of two doubles. It does not map cleanly
61 // onto the usual format above. For now only storage of constants of
62 // this type is supported, no arithmetic.
63 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
65 /* A tight upper bound on number of parts required to hold the value
68 power * 815 / (351 * integerPartWidth) + 1
70 However, whilst the result may require only this many parts,
71 because we are multiplying two values to get it, the
72 multiplication may require an extra part with the excess part
73 being zero (consider the trivial case of 1 * 1, tcFullMultiply
74 requires two parts to hold the single-part result). So we add an
75 extra one to guarantee enough space whilst multiplying. */
76 const unsigned int maxExponent = 16383;
77 const unsigned int maxPrecision = 113;
78 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
79 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
80 / (351 * integerPartWidth));
83 /* A bunch of private, handy routines. */
85 static inline unsigned int
86 partCountForBits(unsigned int bits)
88 return ((bits) + integerPartWidth - 1) / integerPartWidth;
91 /* Returns 0U-9U. Return values >= 10U are not digits. */
92 static inline unsigned int
93 decDigitValue(unsigned int c)
99 hexDigitValue(unsigned int c)
119 assertArithmeticOK(const llvm::fltSemantics &semantics) {
120 assert(semantics.arithmeticOK &&
121 "Compile-time arithmetic does not support these semantics");
124 /* Return the value of a decimal exponent of the form
127 If the exponent overflows, returns a large exponent with the
130 readExponent(StringRef::iterator begin, StringRef::iterator end)
133 unsigned int absExponent;
134 const unsigned int overlargeExponent = 24000; /* FIXME. */
135 StringRef::iterator p = begin;
137 assert(p != end && "Exponent has no digits");
139 isNegative = (*p == '-');
140 if (*p == '-' || *p == '+') {
142 assert(p != end && "Exponent has no digits");
145 absExponent = decDigitValue(*p++);
146 assert(absExponent < 10U && "Invalid character in exponent");
148 for (; p != end; ++p) {
151 value = decDigitValue(*p);
152 assert(value < 10U && "Invalid character in exponent");
154 value += absExponent * 10;
155 if (absExponent >= overlargeExponent) {
156 absExponent = overlargeExponent;
157 p = end; /* outwit assert below */
163 assert(p == end && "Invalid exponent in exponent");
166 return -(int) absExponent;
168 return (int) absExponent;
171 /* This is ugly and needs cleaning up, but I don't immediately see
172 how whilst remaining safe. */
174 totalExponent(StringRef::iterator p, StringRef::iterator end,
175 int exponentAdjustment)
177 int unsignedExponent;
178 bool negative, overflow;
181 assert(p != end && "Exponent has no digits");
183 negative = *p == '-';
184 if (*p == '-' || *p == '+') {
186 assert(p != end && "Exponent has no digits");
189 unsignedExponent = 0;
191 for (; p != end; ++p) {
194 value = decDigitValue(*p);
195 assert(value < 10U && "Invalid character in exponent");
197 unsignedExponent = unsignedExponent * 10 + value;
198 if (unsignedExponent > 32767)
202 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
206 exponent = unsignedExponent;
208 exponent = -exponent;
209 exponent += exponentAdjustment;
210 if (exponent > 32767 || exponent < -32768)
215 exponent = negative ? -32768: 32767;
220 static StringRef::iterator
221 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
222 StringRef::iterator *dot)
224 StringRef::iterator p = begin;
226 while (*p == '0' && p != end)
232 assert(end - begin != 1 && "Significand has no digits");
234 while (*p == '0' && p != end)
241 /* Given a normal decimal floating point number of the form
245 where the decimal point and exponent are optional, fill out the
246 structure D. Exponent is appropriate if the significand is
247 treated as an integer, and normalizedExponent if the significand
248 is taken to have the decimal point after a single leading
251 If the value is zero, V->firstSigDigit points to a non-digit, and
252 the return exponent is zero.
255 const char *firstSigDigit;
256 const char *lastSigDigit;
258 int normalizedExponent;
262 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
265 StringRef::iterator dot = end;
266 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
268 D->firstSigDigit = p;
270 D->normalizedExponent = 0;
272 for (; p != end; ++p) {
274 assert(dot == end && "String contains multiple dots");
279 if (decDigitValue(*p) >= 10U)
284 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
285 assert(p != begin && "Significand has no digits");
286 assert((dot == end || p - begin != 1) && "Significand has no digits");
288 /* p points to the first non-digit in the string */
289 D->exponent = readExponent(p + 1, end);
291 /* Implied decimal point? */
296 /* If number is all zeroes accept any exponent. */
297 if (p != D->firstSigDigit) {
298 /* Drop insignificant trailing zeroes. */
303 while (p != begin && *p == '0');
304 while (p != begin && *p == '.');
307 /* Adjust the exponents for any decimal point. */
308 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
309 D->normalizedExponent = (D->exponent +
310 static_cast<exponent_t>((p - D->firstSigDigit)
311 - (dot > D->firstSigDigit && dot < p)));
317 /* Return the trailing fraction of a hexadecimal number.
318 DIGITVALUE is the first hex digit of the fraction, P points to
321 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
322 unsigned int digitValue)
324 unsigned int hexDigit;
326 /* If the first trailing digit isn't 0 or 8 we can work out the
327 fraction immediately. */
329 return lfMoreThanHalf;
330 else if (digitValue < 8 && digitValue > 0)
331 return lfLessThanHalf;
333 /* Otherwise we need to find the first non-zero digit. */
337 assert(p != end && "Invalid trailing hexadecimal fraction!");
339 hexDigit = hexDigitValue(*p);
341 /* If we ran off the end it is exactly zero or one-half, otherwise
344 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
346 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
349 /* Return the fraction lost were a bignum truncated losing the least
350 significant BITS bits. */
352 lostFractionThroughTruncation(const integerPart *parts,
353 unsigned int partCount,
358 lsb = APInt::tcLSB(parts, partCount);
360 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
362 return lfExactlyZero;
364 return lfExactlyHalf;
365 if (bits <= partCount * integerPartWidth &&
366 APInt::tcExtractBit(parts, bits - 1))
367 return lfMoreThanHalf;
369 return lfLessThanHalf;
372 /* Shift DST right BITS bits noting lost fraction. */
374 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
376 lostFraction lost_fraction;
378 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
380 APInt::tcShiftRight(dst, parts, bits);
382 return lost_fraction;
385 /* Combine the effect of two lost fractions. */
387 combineLostFractions(lostFraction moreSignificant,
388 lostFraction lessSignificant)
390 if (lessSignificant != lfExactlyZero) {
391 if (moreSignificant == lfExactlyZero)
392 moreSignificant = lfLessThanHalf;
393 else if (moreSignificant == lfExactlyHalf)
394 moreSignificant = lfMoreThanHalf;
397 return moreSignificant;
400 /* The error from the true value, in half-ulps, on multiplying two
401 floating point numbers, which differ from the value they
402 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
403 than the returned value.
405 See "How to Read Floating Point Numbers Accurately" by William D
408 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
410 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
412 if (HUerr1 + HUerr2 == 0)
413 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
415 return inexactMultiply + 2 * (HUerr1 + HUerr2);
418 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
419 when the least significant BITS are truncated. BITS cannot be
422 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
424 unsigned int count, partBits;
425 integerPart part, boundary;
430 count = bits / integerPartWidth;
431 partBits = bits % integerPartWidth + 1;
433 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
436 boundary = (integerPart) 1 << (partBits - 1);
441 if (part - boundary <= boundary - part)
442 return part - boundary;
444 return boundary - part;
447 if (part == boundary) {
450 return ~(integerPart) 0; /* A lot. */
453 } else if (part == boundary - 1) {
456 return ~(integerPart) 0; /* A lot. */
461 return ~(integerPart) 0; /* A lot. */
464 /* Place pow(5, power) in DST, and return the number of parts used.
465 DST must be at least one part larger than size of the answer. */
467 powerOf5(integerPart *dst, unsigned int power)
469 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
471 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
472 pow5s[0] = 78125 * 5;
474 unsigned int partsCount[16] = { 1 };
475 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
477 assert(power <= maxExponent);
482 *p1 = firstEightPowers[power & 7];
488 for (unsigned int n = 0; power; power >>= 1, n++) {
493 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
495 pc = partsCount[n - 1];
496 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
498 if (pow5[pc - 1] == 0)
506 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
508 if (p2[result - 1] == 0)
511 /* Now result is in p1 with partsCount parts and p2 is scratch
513 tmp = p1, p1 = p2, p2 = tmp;
520 APInt::tcAssign(dst, p1, result);
525 /* Zero at the end to avoid modular arithmetic when adding one; used
526 when rounding up during hexadecimal output. */
527 static const char hexDigitsLower[] = "0123456789abcdef0";
528 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
529 static const char infinityL[] = "infinity";
530 static const char infinityU[] = "INFINITY";
531 static const char NaNL[] = "nan";
532 static const char NaNU[] = "NAN";
534 /* Write out an integerPart in hexadecimal, starting with the most
535 significant nibble. Write out exactly COUNT hexdigits, return
538 partAsHex (char *dst, integerPart part, unsigned int count,
539 const char *hexDigitChars)
541 unsigned int result = count;
543 assert(count != 0 && count <= integerPartWidth / 4);
545 part >>= (integerPartWidth - 4 * count);
547 dst[count] = hexDigitChars[part & 0xf];
554 /* Write out an unsigned decimal integer. */
556 writeUnsignedDecimal (char *dst, unsigned int n)
572 /* Write out a signed decimal integer. */
574 writeSignedDecimal (char *dst, int value)
578 dst = writeUnsignedDecimal(dst, -(unsigned) value);
580 dst = writeUnsignedDecimal(dst, value);
587 APFloat::initialize(const fltSemantics *ourSemantics)
591 semantics = ourSemantics;
594 significand.parts = new integerPart[count];
598 APFloat::freeSignificand()
601 delete [] significand.parts;
605 APFloat::assign(const APFloat &rhs)
607 assert(semantics == rhs.semantics);
610 category = rhs.category;
611 exponent = rhs.exponent;
613 exponent2 = rhs.exponent2;
614 if (category == fcNormal || category == fcNaN)
615 copySignificand(rhs);
619 APFloat::copySignificand(const APFloat &rhs)
621 assert(category == fcNormal || category == fcNaN);
622 assert(rhs.partCount() >= partCount());
624 APInt::tcAssign(significandParts(), rhs.significandParts(),
628 /* Make this number a NaN, with an arbitrary but deterministic value
629 for the significand. If double or longer, this is a signalling NaN,
630 which may not be ideal. If float, this is QNaN(0). */
631 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
636 integerPart *significand = significandParts();
637 unsigned numParts = partCount();
639 // Set the significand bits to the fill.
640 if (!fill || fill->getNumWords() < numParts)
641 APInt::tcSet(significand, 0, numParts);
643 APInt::tcAssign(significand, fill->getRawData(),
644 std::min(fill->getNumWords(), numParts));
646 // Zero out the excess bits of the significand.
647 unsigned bitsToPreserve = semantics->precision - 1;
648 unsigned part = bitsToPreserve / 64;
649 bitsToPreserve %= 64;
650 significand[part] &= ((1ULL << bitsToPreserve) - 1);
651 for (part++; part != numParts; ++part)
652 significand[part] = 0;
655 unsigned QNaNBit = semantics->precision - 2;
658 // We always have to clear the QNaN bit to make it an SNaN.
659 APInt::tcClearBit(significand, QNaNBit);
661 // If there are no bits set in the payload, we have to set
662 // *something* to make it a NaN instead of an infinity;
663 // conventionally, this is the next bit down from the QNaN bit.
664 if (APInt::tcIsZero(significand, numParts))
665 APInt::tcSetBit(significand, QNaNBit - 1);
667 // We always have to set the QNaN bit to make it a QNaN.
668 APInt::tcSetBit(significand, QNaNBit);
671 // For x87 extended precision, we want to make a NaN, not a
672 // pseudo-NaN. Maybe we should expose the ability to make
674 if (semantics == &APFloat::x87DoubleExtended)
675 APInt::tcSetBit(significand, QNaNBit + 1);
678 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
680 APFloat value(Sem, uninitialized);
681 value.makeNaN(SNaN, Negative, fill);
686 APFloat::operator=(const APFloat &rhs)
689 if (semantics != rhs.semantics) {
691 initialize(rhs.semantics);
700 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
703 if (semantics != rhs.semantics ||
704 category != rhs.category ||
707 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
710 if (category==fcZero || category==fcInfinity)
712 else if (category==fcNormal && exponent!=rhs.exponent)
714 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
715 exponent2!=rhs.exponent2)
719 const integerPart* p=significandParts();
720 const integerPart* q=rhs.significandParts();
721 for (; i>0; i--, p++, q++) {
729 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
730 : exponent2(0), sign2(0) {
731 assertArithmeticOK(ourSemantics);
732 initialize(&ourSemantics);
735 exponent = ourSemantics.precision - 1;
736 significandParts()[0] = value;
737 normalize(rmNearestTiesToEven, lfExactlyZero);
740 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
741 assertArithmeticOK(ourSemantics);
742 initialize(&ourSemantics);
747 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
748 : exponent2(0), sign2(0) {
749 assertArithmeticOK(ourSemantics);
750 // Allocates storage if necessary but does not initialize it.
751 initialize(&ourSemantics);
754 APFloat::APFloat(const fltSemantics &ourSemantics,
755 fltCategory ourCategory, bool negative)
756 : exponent2(0), sign2(0) {
757 assertArithmeticOK(ourSemantics);
758 initialize(&ourSemantics);
759 category = ourCategory;
761 if (category == fcNormal)
763 else if (ourCategory == fcNaN)
767 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
768 : exponent2(0), sign2(0) {
769 assertArithmeticOK(ourSemantics);
770 initialize(&ourSemantics);
771 convertFromString(text, rmNearestTiesToEven);
774 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
775 initialize(rhs.semantics);
784 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
785 void APFloat::Profile(FoldingSetNodeID& ID) const {
786 ID.Add(bitcastToAPInt());
790 APFloat::partCount() const
792 return partCountForBits(semantics->precision + 1);
796 APFloat::semanticsPrecision(const fltSemantics &semantics)
798 return semantics.precision;
802 APFloat::significandParts() const
804 return const_cast<APFloat *>(this)->significandParts();
808 APFloat::significandParts()
810 assert(category == fcNormal || category == fcNaN);
813 return significand.parts;
815 return &significand.part;
819 APFloat::zeroSignificand()
822 APInt::tcSet(significandParts(), 0, partCount());
825 /* Increment an fcNormal floating point number's significand. */
827 APFloat::incrementSignificand()
831 carry = APInt::tcIncrement(significandParts(), partCount());
833 /* Our callers should never cause us to overflow. */
838 /* Add the significand of the RHS. Returns the carry flag. */
840 APFloat::addSignificand(const APFloat &rhs)
844 parts = significandParts();
846 assert(semantics == rhs.semantics);
847 assert(exponent == rhs.exponent);
849 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
852 /* Subtract the significand of the RHS with a borrow flag. Returns
855 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
859 parts = significandParts();
861 assert(semantics == rhs.semantics);
862 assert(exponent == rhs.exponent);
864 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
868 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
869 on to the full-precision result of the multiplication. Returns the
872 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
874 unsigned int omsb; // One, not zero, based MSB.
875 unsigned int partsCount, newPartsCount, precision;
876 integerPart *lhsSignificand;
877 integerPart scratch[4];
878 integerPart *fullSignificand;
879 lostFraction lost_fraction;
882 assert(semantics == rhs.semantics);
884 precision = semantics->precision;
885 newPartsCount = partCountForBits(precision * 2);
887 if (newPartsCount > 4)
888 fullSignificand = new integerPart[newPartsCount];
890 fullSignificand = scratch;
892 lhsSignificand = significandParts();
893 partsCount = partCount();
895 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
896 rhs.significandParts(), partsCount, partsCount);
898 lost_fraction = lfExactlyZero;
899 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
900 exponent += rhs.exponent;
903 Significand savedSignificand = significand;
904 const fltSemantics *savedSemantics = semantics;
905 fltSemantics extendedSemantics;
907 unsigned int extendedPrecision;
909 /* Normalize our MSB. */
910 extendedPrecision = precision + precision - 1;
911 if (omsb != extendedPrecision) {
912 APInt::tcShiftLeft(fullSignificand, newPartsCount,
913 extendedPrecision - omsb);
914 exponent -= extendedPrecision - omsb;
917 /* Create new semantics. */
918 extendedSemantics = *semantics;
919 extendedSemantics.precision = extendedPrecision;
921 if (newPartsCount == 1)
922 significand.part = fullSignificand[0];
924 significand.parts = fullSignificand;
925 semantics = &extendedSemantics;
927 APFloat extendedAddend(*addend);
928 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
929 assert(status == opOK);
931 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
933 /* Restore our state. */
934 if (newPartsCount == 1)
935 fullSignificand[0] = significand.part;
936 significand = savedSignificand;
937 semantics = savedSemantics;
939 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
942 exponent -= (precision - 1);
944 if (omsb > precision) {
945 unsigned int bits, significantParts;
948 bits = omsb - precision;
949 significantParts = partCountForBits(omsb);
950 lf = shiftRight(fullSignificand, significantParts, bits);
951 lost_fraction = combineLostFractions(lf, lost_fraction);
955 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
957 if (newPartsCount > 4)
958 delete [] fullSignificand;
960 return lost_fraction;
963 /* Multiply the significands of LHS and RHS to DST. */
965 APFloat::divideSignificand(const APFloat &rhs)
967 unsigned int bit, i, partsCount;
968 const integerPart *rhsSignificand;
969 integerPart *lhsSignificand, *dividend, *divisor;
970 integerPart scratch[4];
971 lostFraction lost_fraction;
973 assert(semantics == rhs.semantics);
975 lhsSignificand = significandParts();
976 rhsSignificand = rhs.significandParts();
977 partsCount = partCount();
980 dividend = new integerPart[partsCount * 2];
984 divisor = dividend + partsCount;
986 /* Copy the dividend and divisor as they will be modified in-place. */
987 for (i = 0; i < partsCount; i++) {
988 dividend[i] = lhsSignificand[i];
989 divisor[i] = rhsSignificand[i];
990 lhsSignificand[i] = 0;
993 exponent -= rhs.exponent;
995 unsigned int precision = semantics->precision;
997 /* Normalize the divisor. */
998 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1001 APInt::tcShiftLeft(divisor, partsCount, bit);
1004 /* Normalize the dividend. */
1005 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1008 APInt::tcShiftLeft(dividend, partsCount, bit);
1011 /* Ensure the dividend >= divisor initially for the loop below.
1012 Incidentally, this means that the division loop below is
1013 guaranteed to set the integer bit to one. */
1014 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1016 APInt::tcShiftLeft(dividend, partsCount, 1);
1017 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1020 /* Long division. */
1021 for (bit = precision; bit; bit -= 1) {
1022 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1023 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1024 APInt::tcSetBit(lhsSignificand, bit - 1);
1027 APInt::tcShiftLeft(dividend, partsCount, 1);
1030 /* Figure out the lost fraction. */
1031 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1034 lost_fraction = lfMoreThanHalf;
1036 lost_fraction = lfExactlyHalf;
1037 else if (APInt::tcIsZero(dividend, partsCount))
1038 lost_fraction = lfExactlyZero;
1040 lost_fraction = lfLessThanHalf;
1045 return lost_fraction;
1049 APFloat::significandMSB() const
1051 return APInt::tcMSB(significandParts(), partCount());
1055 APFloat::significandLSB() const
1057 return APInt::tcLSB(significandParts(), partCount());
1060 /* Note that a zero result is NOT normalized to fcZero. */
1062 APFloat::shiftSignificandRight(unsigned int bits)
1064 /* Our exponent should not overflow. */
1065 assert((exponent_t) (exponent + bits) >= exponent);
1069 return shiftRight(significandParts(), partCount(), bits);
1072 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1074 APFloat::shiftSignificandLeft(unsigned int bits)
1076 assert(bits < semantics->precision);
1079 unsigned int partsCount = partCount();
1081 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1084 assert(!APInt::tcIsZero(significandParts(), partsCount));
1089 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1093 assert(semantics == rhs.semantics);
1094 assert(category == fcNormal);
1095 assert(rhs.category == fcNormal);
1097 compare = exponent - rhs.exponent;
1099 /* If exponents are equal, do an unsigned bignum comparison of the
1102 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1106 return cmpGreaterThan;
1107 else if (compare < 0)
1113 /* Handle overflow. Sign is preserved. We either become infinity or
1114 the largest finite number. */
1116 APFloat::handleOverflow(roundingMode rounding_mode)
1119 if (rounding_mode == rmNearestTiesToEven ||
1120 rounding_mode == rmNearestTiesToAway ||
1121 (rounding_mode == rmTowardPositive && !sign) ||
1122 (rounding_mode == rmTowardNegative && sign)) {
1123 category = fcInfinity;
1124 return (opStatus) (opOverflow | opInexact);
1127 /* Otherwise we become the largest finite number. */
1128 category = fcNormal;
1129 exponent = semantics->maxExponent;
1130 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1131 semantics->precision);
1136 /* Returns TRUE if, when truncating the current number, with BIT the
1137 new LSB, with the given lost fraction and rounding mode, the result
1138 would need to be rounded away from zero (i.e., by increasing the
1139 signficand). This routine must work for fcZero of both signs, and
1140 fcNormal numbers. */
1142 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1143 lostFraction lost_fraction,
1144 unsigned int bit) const
1146 /* NaNs and infinities should not have lost fractions. */
1147 assert(category == fcNormal || category == fcZero);
1149 /* Current callers never pass this so we don't handle it. */
1150 assert(lost_fraction != lfExactlyZero);
1152 switch (rounding_mode) {
1154 llvm_unreachable(0);
1156 case rmNearestTiesToAway:
1157 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1159 case rmNearestTiesToEven:
1160 if (lost_fraction == lfMoreThanHalf)
1163 /* Our zeroes don't have a significand to test. */
1164 if (lost_fraction == lfExactlyHalf && category != fcZero)
1165 return APInt::tcExtractBit(significandParts(), bit);
1172 case rmTowardPositive:
1173 return sign == false;
1175 case rmTowardNegative:
1176 return sign == true;
1181 APFloat::normalize(roundingMode rounding_mode,
1182 lostFraction lost_fraction)
1184 unsigned int omsb; /* One, not zero, based MSB. */
1187 if (category != fcNormal)
1190 /* Before rounding normalize the exponent of fcNormal numbers. */
1191 omsb = significandMSB() + 1;
1194 /* OMSB is numbered from 1. We want to place it in the integer
1195 bit numbered PRECISON if possible, with a compensating change in
1197 exponentChange = omsb - semantics->precision;
1199 /* If the resulting exponent is too high, overflow according to
1200 the rounding mode. */
1201 if (exponent + exponentChange > semantics->maxExponent)
1202 return handleOverflow(rounding_mode);
1204 /* Subnormal numbers have exponent minExponent, and their MSB
1205 is forced based on that. */
1206 if (exponent + exponentChange < semantics->minExponent)
1207 exponentChange = semantics->minExponent - exponent;
1209 /* Shifting left is easy as we don't lose precision. */
1210 if (exponentChange < 0) {
1211 assert(lost_fraction == lfExactlyZero);
1213 shiftSignificandLeft(-exponentChange);
1218 if (exponentChange > 0) {
1221 /* Shift right and capture any new lost fraction. */
1222 lf = shiftSignificandRight(exponentChange);
1224 lost_fraction = combineLostFractions(lf, lost_fraction);
1226 /* Keep OMSB up-to-date. */
1227 if (omsb > (unsigned) exponentChange)
1228 omsb -= exponentChange;
1234 /* Now round the number according to rounding_mode given the lost
1237 /* As specified in IEEE 754, since we do not trap we do not report
1238 underflow for exact results. */
1239 if (lost_fraction == lfExactlyZero) {
1240 /* Canonicalize zeroes. */
1247 /* Increment the significand if we're rounding away from zero. */
1248 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1250 exponent = semantics->minExponent;
1252 incrementSignificand();
1253 omsb = significandMSB() + 1;
1255 /* Did the significand increment overflow? */
1256 if (omsb == (unsigned) semantics->precision + 1) {
1257 /* Renormalize by incrementing the exponent and shifting our
1258 significand right one. However if we already have the
1259 maximum exponent we overflow to infinity. */
1260 if (exponent == semantics->maxExponent) {
1261 category = fcInfinity;
1263 return (opStatus) (opOverflow | opInexact);
1266 shiftSignificandRight(1);
1272 /* The normal case - we were and are not denormal, and any
1273 significand increment above didn't overflow. */
1274 if (omsb == semantics->precision)
1277 /* We have a non-zero denormal. */
1278 assert(omsb < semantics->precision);
1280 /* Canonicalize zeroes. */
1284 /* The fcZero case is a denormal that underflowed to zero. */
1285 return (opStatus) (opUnderflow | opInexact);
1289 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1291 switch (convolve(category, rhs.category)) {
1293 llvm_unreachable(0);
1295 case convolve(fcNaN, fcZero):
1296 case convolve(fcNaN, fcNormal):
1297 case convolve(fcNaN, fcInfinity):
1298 case convolve(fcNaN, fcNaN):
1299 case convolve(fcNormal, fcZero):
1300 case convolve(fcInfinity, fcNormal):
1301 case convolve(fcInfinity, fcZero):
1304 case convolve(fcZero, fcNaN):
1305 case convolve(fcNormal, fcNaN):
1306 case convolve(fcInfinity, fcNaN):
1308 copySignificand(rhs);
1311 case convolve(fcNormal, fcInfinity):
1312 case convolve(fcZero, fcInfinity):
1313 category = fcInfinity;
1314 sign = rhs.sign ^ subtract;
1317 case convolve(fcZero, fcNormal):
1319 sign = rhs.sign ^ subtract;
1322 case convolve(fcZero, fcZero):
1323 /* Sign depends on rounding mode; handled by caller. */
1326 case convolve(fcInfinity, fcInfinity):
1327 /* Differently signed infinities can only be validly
1329 if (((sign ^ rhs.sign)!=0) != subtract) {
1336 case convolve(fcNormal, fcNormal):
1341 /* Add or subtract two normal numbers. */
1343 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1346 lostFraction lost_fraction;
1349 /* Determine if the operation on the absolute values is effectively
1350 an addition or subtraction. */
1351 subtract ^= (sign ^ rhs.sign) ? true : false;
1353 /* Are we bigger exponent-wise than the RHS? */
1354 bits = exponent - rhs.exponent;
1356 /* Subtraction is more subtle than one might naively expect. */
1358 APFloat temp_rhs(rhs);
1362 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1363 lost_fraction = lfExactlyZero;
1364 } else if (bits > 0) {
1365 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1366 shiftSignificandLeft(1);
1369 lost_fraction = shiftSignificandRight(-bits - 1);
1370 temp_rhs.shiftSignificandLeft(1);
1375 carry = temp_rhs.subtractSignificand
1376 (*this, lost_fraction != lfExactlyZero);
1377 copySignificand(temp_rhs);
1380 carry = subtractSignificand
1381 (temp_rhs, lost_fraction != lfExactlyZero);
1384 /* Invert the lost fraction - it was on the RHS and
1386 if (lost_fraction == lfLessThanHalf)
1387 lost_fraction = lfMoreThanHalf;
1388 else if (lost_fraction == lfMoreThanHalf)
1389 lost_fraction = lfLessThanHalf;
1391 /* The code above is intended to ensure that no borrow is
1397 APFloat temp_rhs(rhs);
1399 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1400 carry = addSignificand(temp_rhs);
1402 lost_fraction = shiftSignificandRight(-bits);
1403 carry = addSignificand(rhs);
1406 /* We have a guard bit; generating a carry cannot happen. */
1411 return lost_fraction;
1415 APFloat::multiplySpecials(const APFloat &rhs)
1417 switch (convolve(category, rhs.category)) {
1419 llvm_unreachable(0);
1421 case convolve(fcNaN, fcZero):
1422 case convolve(fcNaN, fcNormal):
1423 case convolve(fcNaN, fcInfinity):
1424 case convolve(fcNaN, fcNaN):
1427 case convolve(fcZero, fcNaN):
1428 case convolve(fcNormal, fcNaN):
1429 case convolve(fcInfinity, fcNaN):
1431 copySignificand(rhs);
1434 case convolve(fcNormal, fcInfinity):
1435 case convolve(fcInfinity, fcNormal):
1436 case convolve(fcInfinity, fcInfinity):
1437 category = fcInfinity;
1440 case convolve(fcZero, fcNormal):
1441 case convolve(fcNormal, fcZero):
1442 case convolve(fcZero, fcZero):
1446 case convolve(fcZero, fcInfinity):
1447 case convolve(fcInfinity, fcZero):
1451 case convolve(fcNormal, fcNormal):
1457 APFloat::divideSpecials(const APFloat &rhs)
1459 switch (convolve(category, rhs.category)) {
1461 llvm_unreachable(0);
1463 case convolve(fcNaN, fcZero):
1464 case convolve(fcNaN, fcNormal):
1465 case convolve(fcNaN, fcInfinity):
1466 case convolve(fcNaN, fcNaN):
1467 case convolve(fcInfinity, fcZero):
1468 case convolve(fcInfinity, fcNormal):
1469 case convolve(fcZero, fcInfinity):
1470 case convolve(fcZero, fcNormal):
1473 case convolve(fcZero, fcNaN):
1474 case convolve(fcNormal, fcNaN):
1475 case convolve(fcInfinity, fcNaN):
1477 copySignificand(rhs);
1480 case convolve(fcNormal, fcInfinity):
1484 case convolve(fcNormal, fcZero):
1485 category = fcInfinity;
1488 case convolve(fcInfinity, fcInfinity):
1489 case convolve(fcZero, fcZero):
1493 case convolve(fcNormal, fcNormal):
1499 APFloat::modSpecials(const APFloat &rhs)
1501 switch (convolve(category, rhs.category)) {
1503 llvm_unreachable(0);
1505 case convolve(fcNaN, fcZero):
1506 case convolve(fcNaN, fcNormal):
1507 case convolve(fcNaN, fcInfinity):
1508 case convolve(fcNaN, fcNaN):
1509 case convolve(fcZero, fcInfinity):
1510 case convolve(fcZero, fcNormal):
1511 case convolve(fcNormal, fcInfinity):
1514 case convolve(fcZero, fcNaN):
1515 case convolve(fcNormal, fcNaN):
1516 case convolve(fcInfinity, fcNaN):
1518 copySignificand(rhs);
1521 case convolve(fcNormal, fcZero):
1522 case convolve(fcInfinity, fcZero):
1523 case convolve(fcInfinity, fcNormal):
1524 case convolve(fcInfinity, fcInfinity):
1525 case convolve(fcZero, fcZero):
1529 case convolve(fcNormal, fcNormal):
1536 APFloat::changeSign()
1538 /* Look mummy, this one's easy. */
1543 APFloat::clearSign()
1545 /* So is this one. */
1550 APFloat::copySign(const APFloat &rhs)
1556 /* Normalized addition or subtraction. */
1558 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1563 assertArithmeticOK(*semantics);
1565 fs = addOrSubtractSpecials(rhs, subtract);
1567 /* This return code means it was not a simple case. */
1568 if (fs == opDivByZero) {
1569 lostFraction lost_fraction;
1571 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1572 fs = normalize(rounding_mode, lost_fraction);
1574 /* Can only be zero if we lost no fraction. */
1575 assert(category != fcZero || lost_fraction == lfExactlyZero);
1578 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1579 positive zero unless rounding to minus infinity, except that
1580 adding two like-signed zeroes gives that zero. */
1581 if (category == fcZero) {
1582 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1583 sign = (rounding_mode == rmTowardNegative);
1589 /* Normalized addition. */
1591 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1593 return addOrSubtract(rhs, rounding_mode, false);
1596 /* Normalized subtraction. */
1598 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1600 return addOrSubtract(rhs, rounding_mode, true);
1603 /* Normalized multiply. */
1605 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1609 assertArithmeticOK(*semantics);
1611 fs = multiplySpecials(rhs);
1613 if (category == fcNormal) {
1614 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1615 fs = normalize(rounding_mode, lost_fraction);
1616 if (lost_fraction != lfExactlyZero)
1617 fs = (opStatus) (fs | opInexact);
1623 /* Normalized divide. */
1625 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1629 assertArithmeticOK(*semantics);
1631 fs = divideSpecials(rhs);
1633 if (category == fcNormal) {
1634 lostFraction lost_fraction = divideSignificand(rhs);
1635 fs = normalize(rounding_mode, lost_fraction);
1636 if (lost_fraction != lfExactlyZero)
1637 fs = (opStatus) (fs | opInexact);
1643 /* Normalized remainder. This is not currently correct in all cases. */
1645 APFloat::remainder(const APFloat &rhs)
1649 unsigned int origSign = sign;
1651 assertArithmeticOK(*semantics);
1652 fs = V.divide(rhs, rmNearestTiesToEven);
1653 if (fs == opDivByZero)
1656 int parts = partCount();
1657 integerPart *x = new integerPart[parts];
1659 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1660 rmNearestTiesToEven, &ignored);
1661 if (fs==opInvalidOp)
1664 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1665 rmNearestTiesToEven);
1666 assert(fs==opOK); // should always work
1668 fs = V.multiply(rhs, rmNearestTiesToEven);
1669 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1671 fs = subtract(V, rmNearestTiesToEven);
1672 assert(fs==opOK || fs==opInexact); // likewise
1675 sign = origSign; // IEEE754 requires this
1680 /* Normalized llvm frem (C fmod).
1681 This is not currently correct in all cases. */
1683 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1686 assertArithmeticOK(*semantics);
1687 fs = modSpecials(rhs);
1689 if (category == fcNormal && rhs.category == fcNormal) {
1691 unsigned int origSign = sign;
1693 fs = V.divide(rhs, rmNearestTiesToEven);
1694 if (fs == opDivByZero)
1697 int parts = partCount();
1698 integerPart *x = new integerPart[parts];
1700 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1701 rmTowardZero, &ignored);
1702 if (fs==opInvalidOp)
1705 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1706 rmNearestTiesToEven);
1707 assert(fs==opOK); // should always work
1709 fs = V.multiply(rhs, rounding_mode);
1710 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1712 fs = subtract(V, rounding_mode);
1713 assert(fs==opOK || fs==opInexact); // likewise
1716 sign = origSign; // IEEE754 requires this
1722 /* Normalized fused-multiply-add. */
1724 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1725 const APFloat &addend,
1726 roundingMode rounding_mode)
1730 assertArithmeticOK(*semantics);
1732 /* Post-multiplication sign, before addition. */
1733 sign ^= multiplicand.sign;
1735 /* If and only if all arguments are normal do we need to do an
1736 extended-precision calculation. */
1737 if (category == fcNormal &&
1738 multiplicand.category == fcNormal &&
1739 addend.category == fcNormal) {
1740 lostFraction lost_fraction;
1742 lost_fraction = multiplySignificand(multiplicand, &addend);
1743 fs = normalize(rounding_mode, lost_fraction);
1744 if (lost_fraction != lfExactlyZero)
1745 fs = (opStatus) (fs | opInexact);
1747 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1748 positive zero unless rounding to minus infinity, except that
1749 adding two like-signed zeroes gives that zero. */
1750 if (category == fcZero && sign != addend.sign)
1751 sign = (rounding_mode == rmTowardNegative);
1753 fs = multiplySpecials(multiplicand);
1755 /* FS can only be opOK or opInvalidOp. There is no more work
1756 to do in the latter case. The IEEE-754R standard says it is
1757 implementation-defined in this case whether, if ADDEND is a
1758 quiet NaN, we raise invalid op; this implementation does so.
1760 If we need to do the addition we can do so with normal
1763 fs = addOrSubtract(addend, rounding_mode, false);
1769 /* Comparison requires normalized numbers. */
1771 APFloat::compare(const APFloat &rhs) const
1775 assertArithmeticOK(*semantics);
1776 assert(semantics == rhs.semantics);
1778 switch (convolve(category, rhs.category)) {
1780 llvm_unreachable(0);
1782 case convolve(fcNaN, fcZero):
1783 case convolve(fcNaN, fcNormal):
1784 case convolve(fcNaN, fcInfinity):
1785 case convolve(fcNaN, fcNaN):
1786 case convolve(fcZero, fcNaN):
1787 case convolve(fcNormal, fcNaN):
1788 case convolve(fcInfinity, fcNaN):
1789 return cmpUnordered;
1791 case convolve(fcInfinity, fcNormal):
1792 case convolve(fcInfinity, fcZero):
1793 case convolve(fcNormal, fcZero):
1797 return cmpGreaterThan;
1799 case convolve(fcNormal, fcInfinity):
1800 case convolve(fcZero, fcInfinity):
1801 case convolve(fcZero, fcNormal):
1803 return cmpGreaterThan;
1807 case convolve(fcInfinity, fcInfinity):
1808 if (sign == rhs.sign)
1813 return cmpGreaterThan;
1815 case convolve(fcZero, fcZero):
1818 case convolve(fcNormal, fcNormal):
1822 /* Two normal numbers. Do they have the same sign? */
1823 if (sign != rhs.sign) {
1825 result = cmpLessThan;
1827 result = cmpGreaterThan;
1829 /* Compare absolute values; invert result if negative. */
1830 result = compareAbsoluteValue(rhs);
1833 if (result == cmpLessThan)
1834 result = cmpGreaterThan;
1835 else if (result == cmpGreaterThan)
1836 result = cmpLessThan;
1843 /// APFloat::convert - convert a value of one floating point type to another.
1844 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1845 /// records whether the transformation lost information, i.e. whether
1846 /// converting the result back to the original type will produce the
1847 /// original value (this is almost the same as return value==fsOK, but there
1848 /// are edge cases where this is not so).
1851 APFloat::convert(const fltSemantics &toSemantics,
1852 roundingMode rounding_mode, bool *losesInfo)
1854 lostFraction lostFraction;
1855 unsigned int newPartCount, oldPartCount;
1858 assertArithmeticOK(*semantics);
1859 assertArithmeticOK(toSemantics);
1860 lostFraction = lfExactlyZero;
1861 newPartCount = partCountForBits(toSemantics.precision + 1);
1862 oldPartCount = partCount();
1864 /* Handle storage complications. If our new form is wider,
1865 re-allocate our bit pattern into wider storage. If it is
1866 narrower, we ignore the excess parts, but if narrowing to a
1867 single part we need to free the old storage.
1868 Be careful not to reference significandParts for zeroes
1869 and infinities, since it aborts. */
1870 if (newPartCount > oldPartCount) {
1871 integerPart *newParts;
1872 newParts = new integerPart[newPartCount];
1873 APInt::tcSet(newParts, 0, newPartCount);
1874 if (category==fcNormal || category==fcNaN)
1875 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1877 significand.parts = newParts;
1878 } else if (newPartCount < oldPartCount) {
1879 /* Capture any lost fraction through truncation of parts so we get
1880 correct rounding whilst normalizing. */
1881 if (category==fcNormal)
1882 lostFraction = lostFractionThroughTruncation
1883 (significandParts(), oldPartCount, toSemantics.precision);
1884 if (newPartCount == 1) {
1885 integerPart newPart = 0;
1886 if (category==fcNormal || category==fcNaN)
1887 newPart = significandParts()[0];
1889 significand.part = newPart;
1893 if (category == fcNormal) {
1894 /* Re-interpret our bit-pattern. */
1895 exponent += toSemantics.precision - semantics->precision;
1896 semantics = &toSemantics;
1897 fs = normalize(rounding_mode, lostFraction);
1898 *losesInfo = (fs != opOK);
1899 } else if (category == fcNaN) {
1900 int shift = toSemantics.precision - semantics->precision;
1901 // Do this now so significandParts gets the right answer
1902 const fltSemantics *oldSemantics = semantics;
1903 semantics = &toSemantics;
1905 // No normalization here, just truncate
1907 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1908 else if (shift < 0) {
1909 unsigned ushift = -shift;
1910 // Figure out if we are losing information. This happens
1911 // if are shifting out something other than 0s, or if the x87 long
1912 // double input did not have its integer bit set (pseudo-NaN), or if the
1913 // x87 long double input did not have its QNan bit set (because the x87
1914 // hardware sets this bit when converting a lower-precision NaN to
1915 // x87 long double).
1916 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1918 if (oldSemantics == &APFloat::x87DoubleExtended &&
1919 (!(*significandParts() & 0x8000000000000000ULL) ||
1920 !(*significandParts() & 0x4000000000000000ULL)))
1922 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1924 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1925 // does not give you back the same bits. This is dubious, and we
1926 // don't currently do it. You're really supposed to get
1927 // an invalid operation signal at runtime, but nobody does that.
1930 semantics = &toSemantics;
1938 /* Convert a floating point number to an integer according to the
1939 rounding mode. If the rounded integer value is out of range this
1940 returns an invalid operation exception and the contents of the
1941 destination parts are unspecified. If the rounded value is in
1942 range but the floating point number is not the exact integer, the C
1943 standard doesn't require an inexact exception to be raised. IEEE
1944 854 does require it so we do that.
1946 Note that for conversions to integer type the C standard requires
1947 round-to-zero to always be used. */
1949 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1951 roundingMode rounding_mode,
1952 bool *isExact) const
1954 lostFraction lost_fraction;
1955 const integerPart *src;
1956 unsigned int dstPartsCount, truncatedBits;
1958 assertArithmeticOK(*semantics);
1962 /* Handle the three special cases first. */
1963 if (category == fcInfinity || category == fcNaN)
1966 dstPartsCount = partCountForBits(width);
1968 if (category == fcZero) {
1969 APInt::tcSet(parts, 0, dstPartsCount);
1970 // Negative zero can't be represented as an int.
1975 src = significandParts();
1977 /* Step 1: place our absolute value, with any fraction truncated, in
1980 /* Our absolute value is less than one; truncate everything. */
1981 APInt::tcSet(parts, 0, dstPartsCount);
1982 /* For exponent -1 the integer bit represents .5, look at that.
1983 For smaller exponents leftmost truncated bit is 0. */
1984 truncatedBits = semantics->precision -1U - exponent;
1986 /* We want the most significant (exponent + 1) bits; the rest are
1988 unsigned int bits = exponent + 1U;
1990 /* Hopelessly large in magnitude? */
1994 if (bits < semantics->precision) {
1995 /* We truncate (semantics->precision - bits) bits. */
1996 truncatedBits = semantics->precision - bits;
1997 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1999 /* We want at least as many bits as are available. */
2000 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2001 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2006 /* Step 2: work out any lost fraction, and increment the absolute
2007 value if we would round away from zero. */
2008 if (truncatedBits) {
2009 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2011 if (lost_fraction != lfExactlyZero &&
2012 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2013 if (APInt::tcIncrement(parts, dstPartsCount))
2014 return opInvalidOp; /* Overflow. */
2017 lost_fraction = lfExactlyZero;
2020 /* Step 3: check if we fit in the destination. */
2021 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2025 /* Negative numbers cannot be represented as unsigned. */
2029 /* It takes omsb bits to represent the unsigned integer value.
2030 We lose a bit for the sign, but care is needed as the
2031 maximally negative integer is a special case. */
2032 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2035 /* This case can happen because of rounding. */
2040 APInt::tcNegate (parts, dstPartsCount);
2042 if (omsb >= width + !isSigned)
2046 if (lost_fraction == lfExactlyZero) {
2053 /* Same as convertToSignExtendedInteger, except we provide
2054 deterministic values in case of an invalid operation exception,
2055 namely zero for NaNs and the minimal or maximal value respectively
2056 for underflow or overflow.
2057 The *isExact output tells whether the result is exact, in the sense
2058 that converting it back to the original floating point type produces
2059 the original value. This is almost equivalent to result==opOK,
2060 except for negative zeroes.
2063 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2065 roundingMode rounding_mode, bool *isExact) const
2069 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2072 if (fs == opInvalidOp) {
2073 unsigned int bits, dstPartsCount;
2075 dstPartsCount = partCountForBits(width);
2077 if (category == fcNaN)
2082 bits = width - isSigned;
2084 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2085 if (sign && isSigned)
2086 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2092 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2093 an APSInt, whose initial bit-width and signed-ness are used to determine the
2094 precision of the conversion.
2097 APFloat::convertToInteger(APSInt &result,
2098 roundingMode rounding_mode, bool *isExact) const
2100 unsigned bitWidth = result.getBitWidth();
2101 SmallVector<uint64_t, 4> parts(result.getNumWords());
2102 opStatus status = convertToInteger(
2103 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2104 // Keeps the original signed-ness.
2105 result = APInt(bitWidth, parts);
2109 /* Convert an unsigned integer SRC to a floating point number,
2110 rounding according to ROUNDING_MODE. The sign of the floating
2111 point number is not modified. */
2113 APFloat::convertFromUnsignedParts(const integerPart *src,
2114 unsigned int srcCount,
2115 roundingMode rounding_mode)
2117 unsigned int omsb, precision, dstCount;
2119 lostFraction lost_fraction;
2121 assertArithmeticOK(*semantics);
2122 category = fcNormal;
2123 omsb = APInt::tcMSB(src, srcCount) + 1;
2124 dst = significandParts();
2125 dstCount = partCount();
2126 precision = semantics->precision;
2128 /* We want the most significant PRECISON bits of SRC. There may not
2129 be that many; extract what we can. */
2130 if (precision <= omsb) {
2131 exponent = omsb - 1;
2132 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2134 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2136 exponent = precision - 1;
2137 lost_fraction = lfExactlyZero;
2138 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2141 return normalize(rounding_mode, lost_fraction);
2145 APFloat::convertFromAPInt(const APInt &Val,
2147 roundingMode rounding_mode)
2149 unsigned int partCount = Val.getNumWords();
2153 if (isSigned && api.isNegative()) {
2158 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2161 /* Convert a two's complement integer SRC to a floating point number,
2162 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2163 integer is signed, in which case it must be sign-extended. */
2165 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2166 unsigned int srcCount,
2168 roundingMode rounding_mode)
2172 assertArithmeticOK(*semantics);
2174 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2177 /* If we're signed and negative negate a copy. */
2179 copy = new integerPart[srcCount];
2180 APInt::tcAssign(copy, src, srcCount);
2181 APInt::tcNegate(copy, srcCount);
2182 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2186 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2192 /* FIXME: should this just take a const APInt reference? */
2194 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2195 unsigned int width, bool isSigned,
2196 roundingMode rounding_mode)
2198 unsigned int partCount = partCountForBits(width);
2199 APInt api = APInt(width, makeArrayRef(parts, partCount));
2202 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2207 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2211 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2213 lostFraction lost_fraction = lfExactlyZero;
2214 integerPart *significand;
2215 unsigned int bitPos, partsCount;
2216 StringRef::iterator dot, firstSignificantDigit;
2220 category = fcNormal;
2222 significand = significandParts();
2223 partsCount = partCount();
2224 bitPos = partsCount * integerPartWidth;
2226 /* Skip leading zeroes and any (hexa)decimal point. */
2227 StringRef::iterator begin = s.begin();
2228 StringRef::iterator end = s.end();
2229 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2230 firstSignificantDigit = p;
2233 integerPart hex_value;
2236 assert(dot == end && "String contains multiple dots");
2243 hex_value = hexDigitValue(*p);
2244 if (hex_value == -1U) {
2253 /* Store the number whilst 4-bit nibbles remain. */
2256 hex_value <<= bitPos % integerPartWidth;
2257 significand[bitPos / integerPartWidth] |= hex_value;
2259 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2260 while (p != end && hexDigitValue(*p) != -1U)
2267 /* Hex floats require an exponent but not a hexadecimal point. */
2268 assert(p != end && "Hex strings require an exponent");
2269 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2270 assert(p != begin && "Significand has no digits");
2271 assert((dot == end || p - begin != 1) && "Significand has no digits");
2273 /* Ignore the exponent if we are zero. */
2274 if (p != firstSignificantDigit) {
2277 /* Implicit hexadecimal point? */
2281 /* Calculate the exponent adjustment implicit in the number of
2282 significant digits. */
2283 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2284 if (expAdjustment < 0)
2286 expAdjustment = expAdjustment * 4 - 1;
2288 /* Adjust for writing the significand starting at the most
2289 significant nibble. */
2290 expAdjustment += semantics->precision;
2291 expAdjustment -= partsCount * integerPartWidth;
2293 /* Adjust for the given exponent. */
2294 exponent = totalExponent(p + 1, end, expAdjustment);
2297 return normalize(rounding_mode, lost_fraction);
2301 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2302 unsigned sigPartCount, int exp,
2303 roundingMode rounding_mode)
2305 unsigned int parts, pow5PartCount;
2306 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2307 integerPart pow5Parts[maxPowerOfFiveParts];
2310 isNearest = (rounding_mode == rmNearestTiesToEven ||
2311 rounding_mode == rmNearestTiesToAway);
2313 parts = partCountForBits(semantics->precision + 11);
2315 /* Calculate pow(5, abs(exp)). */
2316 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2318 for (;; parts *= 2) {
2319 opStatus sigStatus, powStatus;
2320 unsigned int excessPrecision, truncatedBits;
2322 calcSemantics.precision = parts * integerPartWidth - 1;
2323 excessPrecision = calcSemantics.precision - semantics->precision;
2324 truncatedBits = excessPrecision;
2326 APFloat decSig(calcSemantics, fcZero, sign);
2327 APFloat pow5(calcSemantics, fcZero, false);
2329 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2330 rmNearestTiesToEven);
2331 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2332 rmNearestTiesToEven);
2333 /* Add exp, as 10^n = 5^n * 2^n. */
2334 decSig.exponent += exp;
2336 lostFraction calcLostFraction;
2337 integerPart HUerr, HUdistance;
2338 unsigned int powHUerr;
2341 /* multiplySignificand leaves the precision-th bit set to 1. */
2342 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2343 powHUerr = powStatus != opOK;
2345 calcLostFraction = decSig.divideSignificand(pow5);
2346 /* Denormal numbers have less precision. */
2347 if (decSig.exponent < semantics->minExponent) {
2348 excessPrecision += (semantics->minExponent - decSig.exponent);
2349 truncatedBits = excessPrecision;
2350 if (excessPrecision > calcSemantics.precision)
2351 excessPrecision = calcSemantics.precision;
2353 /* Extra half-ulp lost in reciprocal of exponent. */
2354 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2357 /* Both multiplySignificand and divideSignificand return the
2358 result with the integer bit set. */
2359 assert(APInt::tcExtractBit
2360 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2362 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2364 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2365 excessPrecision, isNearest);
2367 /* Are we guaranteed to round correctly if we truncate? */
2368 if (HUdistance >= HUerr) {
2369 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2370 calcSemantics.precision - excessPrecision,
2372 /* Take the exponent of decSig. If we tcExtract-ed less bits
2373 above we must adjust our exponent to compensate for the
2374 implicit right shift. */
2375 exponent = (decSig.exponent + semantics->precision
2376 - (calcSemantics.precision - excessPrecision));
2377 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2380 return normalize(rounding_mode, calcLostFraction);
2386 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2391 /* Scan the text. */
2392 StringRef::iterator p = str.begin();
2393 interpretDecimal(p, str.end(), &D);
2395 /* Handle the quick cases. First the case of no significant digits,
2396 i.e. zero, and then exponents that are obviously too large or too
2397 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2398 definitely overflows if
2400 (exp - 1) * L >= maxExponent
2402 and definitely underflows to zero where
2404 (exp + 1) * L <= minExponent - precision
2406 With integer arithmetic the tightest bounds for L are
2408 93/28 < L < 196/59 [ numerator <= 256 ]
2409 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2412 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2416 /* Check whether the normalized exponent is high enough to overflow
2417 max during the log-rebasing in the max-exponent check below. */
2418 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2419 fs = handleOverflow(rounding_mode);
2421 /* If it wasn't, then it also wasn't high enough to overflow max
2422 during the log-rebasing in the min-exponent check. Check that it
2423 won't overflow min in either check, then perform the min-exponent
2425 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2426 (D.normalizedExponent + 1) * 28738 <=
2427 8651 * (semantics->minExponent - (int) semantics->precision)) {
2428 /* Underflow to zero and round. */
2430 fs = normalize(rounding_mode, lfLessThanHalf);
2432 /* We can finally safely perform the max-exponent check. */
2433 } else if ((D.normalizedExponent - 1) * 42039
2434 >= 12655 * semantics->maxExponent) {
2435 /* Overflow and round. */
2436 fs = handleOverflow(rounding_mode);
2438 integerPart *decSignificand;
2439 unsigned int partCount;
2441 /* A tight upper bound on number of bits required to hold an
2442 N-digit decimal integer is N * 196 / 59. Allocate enough space
2443 to hold the full significand, and an extra part required by
2445 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2446 partCount = partCountForBits(1 + 196 * partCount / 59);
2447 decSignificand = new integerPart[partCount + 1];
2450 /* Convert to binary efficiently - we do almost all multiplication
2451 in an integerPart. When this would overflow do we do a single
2452 bignum multiplication, and then revert again to multiplication
2453 in an integerPart. */
2455 integerPart decValue, val, multiplier;
2463 if (p == str.end()) {
2467 decValue = decDigitValue(*p++);
2468 assert(decValue < 10U && "Invalid character in significand");
2470 val = val * 10 + decValue;
2471 /* The maximum number that can be multiplied by ten with any
2472 digit added without overflowing an integerPart. */
2473 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2475 /* Multiply out the current part. */
2476 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2477 partCount, partCount + 1, false);
2479 /* If we used another part (likely but not guaranteed), increase
2481 if (decSignificand[partCount])
2483 } while (p <= D.lastSigDigit);
2485 category = fcNormal;
2486 fs = roundSignificandWithExponent(decSignificand, partCount,
2487 D.exponent, rounding_mode);
2489 delete [] decSignificand;
2496 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2498 assertArithmeticOK(*semantics);
2499 assert(!str.empty() && "Invalid string length");
2501 /* Handle a leading minus sign. */
2502 StringRef::iterator p = str.begin();
2503 size_t slen = str.size();
2504 sign = *p == '-' ? 1 : 0;
2505 if (*p == '-' || *p == '+') {
2508 assert(slen && "String has no digits");
2511 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2512 assert(slen - 2 && "Invalid string");
2513 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2517 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2520 /* Write out a hexadecimal representation of the floating point value
2521 to DST, which must be of sufficient size, in the C99 form
2522 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2523 excluding the terminating NUL.
2525 If UPPERCASE, the output is in upper case, otherwise in lower case.
2527 HEXDIGITS digits appear altogether, rounding the value if
2528 necessary. If HEXDIGITS is 0, the minimal precision to display the
2529 number precisely is used instead. If nothing would appear after
2530 the decimal point it is suppressed.
2532 The decimal exponent is always printed and has at least one digit.
2533 Zero values display an exponent of zero. Infinities and NaNs
2534 appear as "infinity" or "nan" respectively.
2536 The above rules are as specified by C99. There is ambiguity about
2537 what the leading hexadecimal digit should be. This implementation
2538 uses whatever is necessary so that the exponent is displayed as
2539 stored. This implies the exponent will fall within the IEEE format
2540 range, and the leading hexadecimal digit will be 0 (for denormals),
2541 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2542 any other digits zero).
2545 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2546 bool upperCase, roundingMode rounding_mode) const
2550 assertArithmeticOK(*semantics);
2558 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2559 dst += sizeof infinityL - 1;
2563 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2564 dst += sizeof NaNU - 1;
2569 *dst++ = upperCase ? 'X': 'x';
2571 if (hexDigits > 1) {
2573 memset (dst, '0', hexDigits - 1);
2574 dst += hexDigits - 1;
2576 *dst++ = upperCase ? 'P': 'p';
2581 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2587 return static_cast<unsigned int>(dst - p);
2590 /* Does the hard work of outputting the correctly rounded hexadecimal
2591 form of a normal floating point number with the specified number of
2592 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2593 digits necessary to print the value precisely is output. */
2595 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2597 roundingMode rounding_mode) const
2599 unsigned int count, valueBits, shift, partsCount, outputDigits;
2600 const char *hexDigitChars;
2601 const integerPart *significand;
2606 *dst++ = upperCase ? 'X': 'x';
2609 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2611 significand = significandParts();
2612 partsCount = partCount();
2614 /* +3 because the first digit only uses the single integer bit, so
2615 we have 3 virtual zero most-significant-bits. */
2616 valueBits = semantics->precision + 3;
2617 shift = integerPartWidth - valueBits % integerPartWidth;
2619 /* The natural number of digits required ignoring trailing
2620 insignificant zeroes. */
2621 outputDigits = (valueBits - significandLSB () + 3) / 4;
2623 /* hexDigits of zero means use the required number for the
2624 precision. Otherwise, see if we are truncating. If we are,
2625 find out if we need to round away from zero. */
2627 if (hexDigits < outputDigits) {
2628 /* We are dropping non-zero bits, so need to check how to round.
2629 "bits" is the number of dropped bits. */
2631 lostFraction fraction;
2633 bits = valueBits - hexDigits * 4;
2634 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2635 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2637 outputDigits = hexDigits;
2640 /* Write the digits consecutively, and start writing in the location
2641 of the hexadecimal point. We move the most significant digit
2642 left and add the hexadecimal point later. */
2645 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2647 while (outputDigits && count) {
2650 /* Put the most significant integerPartWidth bits in "part". */
2651 if (--count == partsCount)
2652 part = 0; /* An imaginary higher zero part. */
2654 part = significand[count] << shift;
2657 part |= significand[count - 1] >> (integerPartWidth - shift);
2659 /* Convert as much of "part" to hexdigits as we can. */
2660 unsigned int curDigits = integerPartWidth / 4;
2662 if (curDigits > outputDigits)
2663 curDigits = outputDigits;
2664 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2665 outputDigits -= curDigits;
2671 /* Note that hexDigitChars has a trailing '0'. */
2674 *q = hexDigitChars[hexDigitValue (*q) + 1];
2675 } while (*q == '0');
2678 /* Add trailing zeroes. */
2679 memset (dst, '0', outputDigits);
2680 dst += outputDigits;
2683 /* Move the most significant digit to before the point, and if there
2684 is something after the decimal point add it. This must come
2685 after rounding above. */
2692 /* Finally output the exponent. */
2693 *dst++ = upperCase ? 'P': 'p';
2695 return writeSignedDecimal (dst, exponent);
2698 // For good performance it is desirable for different APFloats
2699 // to produce different integers.
2701 APFloat::getHashValue() const
2703 if (category==fcZero) return sign<<8 | semantics->precision ;
2704 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2705 else if (category==fcNaN) return 1<<10 | semantics->precision;
2707 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2708 const integerPart* p = significandParts();
2709 for (int i=partCount(); i>0; i--, p++)
2710 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2715 // Conversion from APFloat to/from host float/double. It may eventually be
2716 // possible to eliminate these and have everybody deal with APFloats, but that
2717 // will take a while. This approach will not easily extend to long double.
2718 // Current implementation requires integerPartWidth==64, which is correct at
2719 // the moment but could be made more general.
2721 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2722 // the actual IEEE respresentations. We compensate for that here.
2725 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2727 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2728 assert(partCount()==2);
2730 uint64_t myexponent, mysignificand;
2732 if (category==fcNormal) {
2733 myexponent = exponent+16383; //bias
2734 mysignificand = significandParts()[0];
2735 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2736 myexponent = 0; // denormal
2737 } else if (category==fcZero) {
2740 } else if (category==fcInfinity) {
2741 myexponent = 0x7fff;
2742 mysignificand = 0x8000000000000000ULL;
2744 assert(category == fcNaN && "Unknown category");
2745 myexponent = 0x7fff;
2746 mysignificand = significandParts()[0];
2750 words[0] = mysignificand;
2751 words[1] = ((uint64_t)(sign & 1) << 15) |
2752 (myexponent & 0x7fffLL);
2753 return APInt(80, words);
2757 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2759 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2760 assert(partCount()==2);
2762 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2764 if (category==fcNormal) {
2765 myexponent = exponent + 1023; //bias
2766 myexponent2 = exponent2 + 1023;
2767 mysignificand = significandParts()[0];
2768 mysignificand2 = significandParts()[1];
2769 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2770 myexponent = 0; // denormal
2771 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2772 myexponent2 = 0; // denormal
2773 } else if (category==fcZero) {
2778 } else if (category==fcInfinity) {
2784 assert(category == fcNaN && "Unknown category");
2786 mysignificand = significandParts()[0];
2787 myexponent2 = exponent2;
2788 mysignificand2 = significandParts()[1];
2792 words[0] = ((uint64_t)(sign & 1) << 63) |
2793 ((myexponent & 0x7ff) << 52) |
2794 (mysignificand & 0xfffffffffffffLL);
2795 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2796 ((myexponent2 & 0x7ff) << 52) |
2797 (mysignificand2 & 0xfffffffffffffLL);
2798 return APInt(128, words);
2802 APFloat::convertQuadrupleAPFloatToAPInt() const
2804 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2805 assert(partCount()==2);
2807 uint64_t myexponent, mysignificand, mysignificand2;
2809 if (category==fcNormal) {
2810 myexponent = exponent+16383; //bias
2811 mysignificand = significandParts()[0];
2812 mysignificand2 = significandParts()[1];
2813 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2814 myexponent = 0; // denormal
2815 } else if (category==fcZero) {
2817 mysignificand = mysignificand2 = 0;
2818 } else if (category==fcInfinity) {
2819 myexponent = 0x7fff;
2820 mysignificand = mysignificand2 = 0;
2822 assert(category == fcNaN && "Unknown category!");
2823 myexponent = 0x7fff;
2824 mysignificand = significandParts()[0];
2825 mysignificand2 = significandParts()[1];
2829 words[0] = mysignificand;
2830 words[1] = ((uint64_t)(sign & 1) << 63) |
2831 ((myexponent & 0x7fff) << 48) |
2832 (mysignificand2 & 0xffffffffffffLL);
2834 return APInt(128, words);
2838 APFloat::convertDoubleAPFloatToAPInt() const
2840 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2841 assert(partCount()==1);
2843 uint64_t myexponent, mysignificand;
2845 if (category==fcNormal) {
2846 myexponent = exponent+1023; //bias
2847 mysignificand = *significandParts();
2848 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2849 myexponent = 0; // denormal
2850 } else if (category==fcZero) {
2853 } else if (category==fcInfinity) {
2857 assert(category == fcNaN && "Unknown category!");
2859 mysignificand = *significandParts();
2862 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2863 ((myexponent & 0x7ff) << 52) |
2864 (mysignificand & 0xfffffffffffffLL))));
2868 APFloat::convertFloatAPFloatToAPInt() const
2870 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2871 assert(partCount()==1);
2873 uint32_t myexponent, mysignificand;
2875 if (category==fcNormal) {
2876 myexponent = exponent+127; //bias
2877 mysignificand = (uint32_t)*significandParts();
2878 if (myexponent == 1 && !(mysignificand & 0x800000))
2879 myexponent = 0; // denormal
2880 } else if (category==fcZero) {
2883 } else if (category==fcInfinity) {
2887 assert(category == fcNaN && "Unknown category!");
2889 mysignificand = (uint32_t)*significandParts();
2892 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2893 (mysignificand & 0x7fffff)));
2897 APFloat::convertHalfAPFloatToAPInt() const
2899 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2900 assert(partCount()==1);
2902 uint32_t myexponent, mysignificand;
2904 if (category==fcNormal) {
2905 myexponent = exponent+15; //bias
2906 mysignificand = (uint32_t)*significandParts();
2907 if (myexponent == 1 && !(mysignificand & 0x400))
2908 myexponent = 0; // denormal
2909 } else if (category==fcZero) {
2912 } else if (category==fcInfinity) {
2916 assert(category == fcNaN && "Unknown category!");
2918 mysignificand = (uint32_t)*significandParts();
2921 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2922 (mysignificand & 0x3ff)));
2925 // This function creates an APInt that is just a bit map of the floating
2926 // point constant as it would appear in memory. It is not a conversion,
2927 // and treating the result as a normal integer is unlikely to be useful.
2930 APFloat::bitcastToAPInt() const
2932 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2933 return convertHalfAPFloatToAPInt();
2935 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2936 return convertFloatAPFloatToAPInt();
2938 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2939 return convertDoubleAPFloatToAPInt();
2941 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2942 return convertQuadrupleAPFloatToAPInt();
2944 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2945 return convertPPCDoubleDoubleAPFloatToAPInt();
2947 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2949 return convertF80LongDoubleAPFloatToAPInt();
2953 APFloat::convertToFloat() const
2955 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2956 "Float semantics are not IEEEsingle");
2957 APInt api = bitcastToAPInt();
2958 return api.bitsToFloat();
2962 APFloat::convertToDouble() const
2964 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2965 "Float semantics are not IEEEdouble");
2966 APInt api = bitcastToAPInt();
2967 return api.bitsToDouble();
2970 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2971 /// does not support these bit patterns:
2972 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2973 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2974 /// exponent = 0, integer bit 1 ("pseudodenormal")
2975 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2976 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2978 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2980 assert(api.getBitWidth()==80);
2981 uint64_t i1 = api.getRawData()[0];
2982 uint64_t i2 = api.getRawData()[1];
2983 uint64_t myexponent = (i2 & 0x7fff);
2984 uint64_t mysignificand = i1;
2986 initialize(&APFloat::x87DoubleExtended);
2987 assert(partCount()==2);
2989 sign = static_cast<unsigned int>(i2>>15);
2990 if (myexponent==0 && mysignificand==0) {
2991 // exponent, significand meaningless
2993 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2994 // exponent, significand meaningless
2995 category = fcInfinity;
2996 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2997 // exponent meaningless
2999 significandParts()[0] = mysignificand;
3000 significandParts()[1] = 0;
3002 category = fcNormal;
3003 exponent = myexponent - 16383;
3004 significandParts()[0] = mysignificand;
3005 significandParts()[1] = 0;
3006 if (myexponent==0) // denormal
3012 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3014 assert(api.getBitWidth()==128);
3015 uint64_t i1 = api.getRawData()[0];
3016 uint64_t i2 = api.getRawData()[1];
3017 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3018 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3019 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3020 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3022 initialize(&APFloat::PPCDoubleDouble);
3023 assert(partCount()==2);
3025 sign = static_cast<unsigned int>(i1>>63);
3026 sign2 = static_cast<unsigned int>(i2>>63);
3027 if (myexponent==0 && mysignificand==0) {
3028 // exponent, significand meaningless
3029 // exponent2 and significand2 are required to be 0; we don't check
3031 } else if (myexponent==0x7ff && mysignificand==0) {
3032 // exponent, significand meaningless
3033 // exponent2 and significand2 are required to be 0; we don't check
3034 category = fcInfinity;
3035 } else if (myexponent==0x7ff && mysignificand!=0) {
3036 // exponent meaningless. So is the whole second word, but keep it
3039 exponent2 = myexponent2;
3040 significandParts()[0] = mysignificand;
3041 significandParts()[1] = mysignificand2;
3043 category = fcNormal;
3044 // Note there is no category2; the second word is treated as if it is
3045 // fcNormal, although it might be something else considered by itself.
3046 exponent = myexponent - 1023;
3047 exponent2 = myexponent2 - 1023;
3048 significandParts()[0] = mysignificand;
3049 significandParts()[1] = mysignificand2;
3050 if (myexponent==0) // denormal
3053 significandParts()[0] |= 0x10000000000000LL; // integer bit
3057 significandParts()[1] |= 0x10000000000000LL; // integer bit
3062 APFloat::initFromQuadrupleAPInt(const APInt &api)
3064 assert(api.getBitWidth()==128);
3065 uint64_t i1 = api.getRawData()[0];
3066 uint64_t i2 = api.getRawData()[1];
3067 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3068 uint64_t mysignificand = i1;
3069 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3071 initialize(&APFloat::IEEEquad);
3072 assert(partCount()==2);
3074 sign = static_cast<unsigned int>(i2>>63);
3075 if (myexponent==0 &&
3076 (mysignificand==0 && mysignificand2==0)) {
3077 // exponent, significand meaningless
3079 } else if (myexponent==0x7fff &&
3080 (mysignificand==0 && mysignificand2==0)) {
3081 // exponent, significand meaningless
3082 category = fcInfinity;
3083 } else if (myexponent==0x7fff &&
3084 (mysignificand!=0 || mysignificand2 !=0)) {
3085 // exponent meaningless
3087 significandParts()[0] = mysignificand;
3088 significandParts()[1] = mysignificand2;
3090 category = fcNormal;
3091 exponent = myexponent - 16383;
3092 significandParts()[0] = mysignificand;
3093 significandParts()[1] = mysignificand2;
3094 if (myexponent==0) // denormal
3097 significandParts()[1] |= 0x1000000000000LL; // integer bit
3102 APFloat::initFromDoubleAPInt(const APInt &api)
3104 assert(api.getBitWidth()==64);
3105 uint64_t i = *api.getRawData();
3106 uint64_t myexponent = (i >> 52) & 0x7ff;
3107 uint64_t mysignificand = i & 0xfffffffffffffLL;
3109 initialize(&APFloat::IEEEdouble);
3110 assert(partCount()==1);
3112 sign = static_cast<unsigned int>(i>>63);
3113 if (myexponent==0 && mysignificand==0) {
3114 // exponent, significand meaningless
3116 } else if (myexponent==0x7ff && mysignificand==0) {
3117 // exponent, significand meaningless
3118 category = fcInfinity;
3119 } else if (myexponent==0x7ff && mysignificand!=0) {
3120 // exponent meaningless
3122 *significandParts() = mysignificand;
3124 category = fcNormal;
3125 exponent = myexponent - 1023;
3126 *significandParts() = mysignificand;
3127 if (myexponent==0) // denormal
3130 *significandParts() |= 0x10000000000000LL; // integer bit
3135 APFloat::initFromFloatAPInt(const APInt & api)
3137 assert(api.getBitWidth()==32);
3138 uint32_t i = (uint32_t)*api.getRawData();
3139 uint32_t myexponent = (i >> 23) & 0xff;
3140 uint32_t mysignificand = i & 0x7fffff;
3142 initialize(&APFloat::IEEEsingle);
3143 assert(partCount()==1);
3146 if (myexponent==0 && mysignificand==0) {
3147 // exponent, significand meaningless
3149 } else if (myexponent==0xff && mysignificand==0) {
3150 // exponent, significand meaningless
3151 category = fcInfinity;
3152 } else if (myexponent==0xff && mysignificand!=0) {
3153 // sign, exponent, significand meaningless
3155 *significandParts() = mysignificand;
3157 category = fcNormal;
3158 exponent = myexponent - 127; //bias
3159 *significandParts() = mysignificand;
3160 if (myexponent==0) // denormal
3163 *significandParts() |= 0x800000; // integer bit
3168 APFloat::initFromHalfAPInt(const APInt & api)
3170 assert(api.getBitWidth()==16);
3171 uint32_t i = (uint32_t)*api.getRawData();
3172 uint32_t myexponent = (i >> 10) & 0x1f;
3173 uint32_t mysignificand = i & 0x3ff;
3175 initialize(&APFloat::IEEEhalf);
3176 assert(partCount()==1);
3179 if (myexponent==0 && mysignificand==0) {
3180 // exponent, significand meaningless
3182 } else if (myexponent==0x1f && mysignificand==0) {
3183 // exponent, significand meaningless
3184 category = fcInfinity;
3185 } else if (myexponent==0x1f && mysignificand!=0) {
3186 // sign, exponent, significand meaningless
3188 *significandParts() = mysignificand;
3190 category = fcNormal;
3191 exponent = myexponent - 15; //bias
3192 *significandParts() = mysignificand;
3193 if (myexponent==0) // denormal
3196 *significandParts() |= 0x400; // integer bit
3200 /// Treat api as containing the bits of a floating point number. Currently
3201 /// we infer the floating point type from the size of the APInt. The
3202 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3203 /// when the size is anything else).
3205 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3207 if (api.getBitWidth() == 16)
3208 return initFromHalfAPInt(api);
3209 else if (api.getBitWidth() == 32)
3210 return initFromFloatAPInt(api);
3211 else if (api.getBitWidth()==64)
3212 return initFromDoubleAPInt(api);
3213 else if (api.getBitWidth()==80)
3214 return initFromF80LongDoubleAPInt(api);
3215 else if (api.getBitWidth()==128)
3217 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3219 llvm_unreachable(0);
3223 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3225 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3228 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3229 APFloat Val(Sem, fcNormal, Negative);
3231 // We want (in interchange format):
3232 // sign = {Negative}
3234 // significand = 1..1
3236 Val.exponent = Sem.maxExponent; // unbiased
3238 // 1-initialize all bits....
3239 Val.zeroSignificand();
3240 integerPart *significand = Val.significandParts();
3241 unsigned N = partCountForBits(Sem.precision);
3242 for (unsigned i = 0; i != N; ++i)
3243 significand[i] = ~((integerPart) 0);
3245 // ...and then clear the top bits for internal consistency.
3247 (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
3252 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3253 APFloat Val(Sem, fcNormal, Negative);
3255 // We want (in interchange format):
3256 // sign = {Negative}
3258 // significand = 0..01
3260 Val.exponent = Sem.minExponent; // unbiased
3261 Val.zeroSignificand();
3262 Val.significandParts()[0] = 1;
3266 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3267 APFloat Val(Sem, fcNormal, Negative);
3269 // We want (in interchange format):
3270 // sign = {Negative}
3272 // significand = 10..0
3274 Val.exponent = Sem.minExponent;
3275 Val.zeroSignificand();
3276 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3277 (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
3282 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3283 initFromAPInt(api, isIEEE);
3286 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3287 initFromAPInt(APInt::floatToBits(f));
3290 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3291 initFromAPInt(APInt::doubleToBits(d));
3295 static void append(SmallVectorImpl<char> &Buffer,
3296 unsigned N, const char *Str) {
3297 unsigned Start = Buffer.size();
3298 Buffer.set_size(Start + N);
3299 memcpy(&Buffer[Start], Str, N);
3302 template <unsigned N>
3303 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3304 append(Buffer, N, Str);
3307 /// Removes data from the given significand until it is no more
3308 /// precise than is required for the desired precision.
3309 void AdjustToPrecision(APInt &significand,
3310 int &exp, unsigned FormatPrecision) {
3311 unsigned bits = significand.getActiveBits();
3313 // 196/59 is a very slight overestimate of lg_2(10).
3314 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3316 if (bits <= bitsRequired) return;
3318 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3319 if (!tensRemovable) return;
3321 exp += tensRemovable;
3323 APInt divisor(significand.getBitWidth(), 1);
3324 APInt powten(significand.getBitWidth(), 10);
3326 if (tensRemovable & 1)
3328 tensRemovable >>= 1;
3329 if (!tensRemovable) break;
3333 significand = significand.udiv(divisor);
3335 // Truncate the significand down to its active bit count, but
3336 // don't try to drop below 32.
3337 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3338 significand = significand.trunc(newPrecision);
3342 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3343 int &exp, unsigned FormatPrecision) {
3344 unsigned N = buffer.size();
3345 if (N <= FormatPrecision) return;
3347 // The most significant figures are the last ones in the buffer.
3348 unsigned FirstSignificant = N - FormatPrecision;
3351 // FIXME: this probably shouldn't use 'round half up'.
3353 // Rounding down is just a truncation, except we also want to drop
3354 // trailing zeros from the new result.
3355 if (buffer[FirstSignificant - 1] < '5') {
3356 while (buffer[FirstSignificant] == '0')
3359 exp += FirstSignificant;
3360 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3364 // Rounding up requires a decimal add-with-carry. If we continue
3365 // the carry, the newly-introduced zeros will just be truncated.
3366 for (unsigned I = FirstSignificant; I != N; ++I) {
3367 if (buffer[I] == '9') {
3375 // If we carried through, we have exactly one digit of precision.
3376 if (FirstSignificant == N) {
3377 exp += FirstSignificant;
3379 buffer.push_back('1');
3383 exp += FirstSignificant;
3384 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3388 void APFloat::toString(SmallVectorImpl<char> &Str,
3389 unsigned FormatPrecision,
3390 unsigned FormatMaxPadding) const {
3394 return append(Str, "-Inf");
3396 return append(Str, "+Inf");
3398 case fcNaN: return append(Str, "NaN");
3404 if (!FormatMaxPadding)
3405 append(Str, "0.0E+0");
3417 // Decompose the number into an APInt and an exponent.
3418 int exp = exponent - ((int) semantics->precision - 1);
3419 APInt significand(semantics->precision,
3420 makeArrayRef(significandParts(),
3421 partCountForBits(semantics->precision)));
3423 // Set FormatPrecision if zero. We want to do this before we
3424 // truncate trailing zeros, as those are part of the precision.
3425 if (!FormatPrecision) {
3426 // It's an interesting question whether to use the nominal
3427 // precision or the active precision here for denormals.
3429 // FormatPrecision = ceil(significandBits / lg_2(10))
3430 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3433 // Ignore trailing binary zeros.
3434 int trailingZeros = significand.countTrailingZeros();
3435 exp += trailingZeros;
3436 significand = significand.lshr(trailingZeros);
3438 // Change the exponent from 2^e to 10^e.
3441 } else if (exp > 0) {
3443 significand = significand.zext(semantics->precision + exp);
3444 significand <<= exp;
3446 } else { /* exp < 0 */
3449 // We transform this using the identity:
3450 // (N)(2^-e) == (N)(5^e)(10^-e)
3451 // This means we have to multiply N (the significand) by 5^e.
3452 // To avoid overflow, we have to operate on numbers large
3453 // enough to store N * 5^e:
3454 // log2(N * 5^e) == log2(N) + e * log2(5)
3455 // <= semantics->precision + e * 137 / 59
3456 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3458 unsigned precision = semantics->precision + 137 * texp / 59;
3460 // Multiply significand by 5^e.
3461 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3462 significand = significand.zext(precision);
3463 APInt five_to_the_i(precision, 5);
3465 if (texp & 1) significand *= five_to_the_i;
3469 five_to_the_i *= five_to_the_i;
3473 AdjustToPrecision(significand, exp, FormatPrecision);
3475 llvm::SmallVector<char, 256> buffer;
3478 unsigned precision = significand.getBitWidth();
3479 APInt ten(precision, 10);
3480 APInt digit(precision, 0);
3482 bool inTrail = true;
3483 while (significand != 0) {
3484 // digit <- significand % 10
3485 // significand <- significand / 10
3486 APInt::udivrem(significand, ten, significand, digit);
3488 unsigned d = digit.getZExtValue();
3490 // Drop trailing zeros.
3491 if (inTrail && !d) exp++;
3493 buffer.push_back((char) ('0' + d));
3498 assert(!buffer.empty() && "no characters in buffer!");
3500 // Drop down to FormatPrecision.
3501 // TODO: don't do more precise calculations above than are required.
3502 AdjustToPrecision(buffer, exp, FormatPrecision);
3504 unsigned NDigits = buffer.size();
3506 // Check whether we should use scientific notation.
3507 bool FormatScientific;
3508 if (!FormatMaxPadding)
3509 FormatScientific = true;
3514 // But we shouldn't make the number look more precise than it is.
3515 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3516 NDigits + (unsigned) exp > FormatPrecision);
3518 // Power of the most significant digit.
3519 int MSD = exp + (int) (NDigits - 1);
3522 FormatScientific = false;
3524 // 765e-5 == 0.00765
3526 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3531 // Scientific formatting is pretty straightforward.
3532 if (FormatScientific) {
3533 exp += (NDigits - 1);
3535 Str.push_back(buffer[NDigits-1]);
3540 for (unsigned I = 1; I != NDigits; ++I)
3541 Str.push_back(buffer[NDigits-1-I]);
3544 Str.push_back(exp >= 0 ? '+' : '-');
3545 if (exp < 0) exp = -exp;
3546 SmallVector<char, 6> expbuf;
3548 expbuf.push_back((char) ('0' + (exp % 10)));
3551 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3552 Str.push_back(expbuf[E-1-I]);
3556 // Non-scientific, positive exponents.
3558 for (unsigned I = 0; I != NDigits; ++I)
3559 Str.push_back(buffer[NDigits-1-I]);
3560 for (unsigned I = 0; I != (unsigned) exp; ++I)
3565 // Non-scientific, negative exponents.
3567 // The number of digits to the left of the decimal point.
3568 int NWholeDigits = exp + (int) NDigits;
3571 if (NWholeDigits > 0) {
3572 for (; I != (unsigned) NWholeDigits; ++I)
3573 Str.push_back(buffer[NDigits-I-1]);
3576 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3580 for (unsigned Z = 1; Z != NZeros; ++Z)
3584 for (; I != NDigits; ++I)
3585 Str.push_back(buffer[NDigits-I-1]);
3588 bool APFloat::getExactInverse(APFloat *inv) const {
3589 // We can only guarantee the existence of an exact inverse for IEEE floats.
3590 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3591 semantics != &IEEEdouble && semantics != &IEEEquad)
3594 // Special floats and denormals have no exact inverse.
3595 if (category != fcNormal)
3598 // Check that the number is a power of two by making sure that only the
3599 // integer bit is set in the significand.
3600 if (significandLSB() != semantics->precision - 1)
3604 APFloat reciprocal(*semantics, 1ULL);
3605 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3608 // Avoid multiplication with a denormal, it is not safe on all platforms and
3609 // may be slower than a normal division.
3610 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3613 assert(reciprocal.category == fcNormal &&
3614 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);