1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
30 /* Assumed in hexadecimal significand parsing, and conversion to
31 hexadecimal strings. */
32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
37 /* Represents floating point arithmetic semantics. */
39 /* The largest E such that 2^E is representable; this matches the
40 definition of IEEE 754. */
41 exponent_t maxExponent;
43 /* The smallest E such that 2^E is a normalized number; this
44 matches the definition of IEEE 754. */
45 exponent_t minExponent;
47 /* Number of bits in the significand. This includes the integer
49 unsigned int precision;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
59 /* The PowerPC format consists of two doubles. It does not map cleanly
60 onto the usual format above. It is approximated using twice the
61 mantissa bits. Note that for exponents near the double minimum,
62 we no longer can represent the full 106 mantissa bits, so those
63 will be treated as denormal numbers.
65 FIXME: While this approximation is equivalent to what GCC uses for
66 compile-time arithmetic on PPC double-double numbers, it is not able
67 to represent all possible values held by a PPC double-double number,
68 for example: (long double) 1.0 + (long double) 0x1p-106
69 Should this be replaced by a full emulation of PPC double-double? */
70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
72 /* A tight upper bound on number of parts required to hold the value
75 power * 815 / (351 * integerPartWidth) + 1
77 However, whilst the result may require only this many parts,
78 because we are multiplying two values to get it, the
79 multiplication may require an extra part with the excess part
80 being zero (consider the trivial case of 1 * 1, tcFullMultiply
81 requires two parts to hold the single-part result). So we add an
82 extra one to guarantee enough space whilst multiplying. */
83 const unsigned int maxExponent = 16383;
84 const unsigned int maxPrecision = 113;
85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
87 / (351 * integerPartWidth));
90 /* A bunch of private, handy routines. */
92 static inline unsigned int
93 partCountForBits(unsigned int bits)
95 return ((bits) + integerPartWidth - 1) / integerPartWidth;
98 /* Returns 0U-9U. Return values >= 10U are not digits. */
99 static inline unsigned int
100 decDigitValue(unsigned int c)
105 /* Return the value of a decimal exponent of the form
108 If the exponent overflows, returns a large exponent with the
111 readExponent(StringRef::iterator begin, StringRef::iterator end)
114 unsigned int absExponent;
115 const unsigned int overlargeExponent = 24000; /* FIXME. */
116 StringRef::iterator p = begin;
118 assert(p != end && "Exponent has no digits");
120 isNegative = (*p == '-');
121 if (*p == '-' || *p == '+') {
123 assert(p != end && "Exponent has no digits");
126 absExponent = decDigitValue(*p++);
127 assert(absExponent < 10U && "Invalid character in exponent");
129 for (; p != end; ++p) {
132 value = decDigitValue(*p);
133 assert(value < 10U && "Invalid character in exponent");
135 value += absExponent * 10;
136 if (absExponent >= overlargeExponent) {
137 absExponent = overlargeExponent;
138 p = end; /* outwit assert below */
144 assert(p == end && "Invalid exponent in exponent");
147 return -(int) absExponent;
149 return (int) absExponent;
152 /* This is ugly and needs cleaning up, but I don't immediately see
153 how whilst remaining safe. */
155 totalExponent(StringRef::iterator p, StringRef::iterator end,
156 int exponentAdjustment)
158 int unsignedExponent;
159 bool negative, overflow;
162 assert(p != end && "Exponent has no digits");
164 negative = *p == '-';
165 if (*p == '-' || *p == '+') {
167 assert(p != end && "Exponent has no digits");
170 unsignedExponent = 0;
172 for (; p != end; ++p) {
175 value = decDigitValue(*p);
176 assert(value < 10U && "Invalid character in exponent");
178 unsignedExponent = unsignedExponent * 10 + value;
179 if (unsignedExponent > 32767) {
185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
189 exponent = unsignedExponent;
191 exponent = -exponent;
192 exponent += exponentAdjustment;
193 if (exponent > 32767 || exponent < -32768)
198 exponent = negative ? -32768: 32767;
203 static StringRef::iterator
204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
205 StringRef::iterator *dot)
207 StringRef::iterator p = begin;
209 while (*p == '0' && p != end)
215 assert(end - begin != 1 && "Significand has no digits");
217 while (*p == '0' && p != end)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
248 StringRef::iterator dot = end;
249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
255 for (; p != end; ++p) {
257 assert(dot == end && "String contains multiple dots");
262 if (decDigitValue(*p) >= 10U)
267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
268 assert(p != begin && "Significand has no digits");
269 assert((dot == end || p - begin != 1) && "Significand has no digits");
271 /* p points to the first non-digit in the string */
272 D->exponent = readExponent(p + 1, end);
274 /* Implied decimal point? */
279 /* If number is all zeroes accept any exponent. */
280 if (p != D->firstSigDigit) {
281 /* Drop insignificant trailing zeroes. */
286 while (p != begin && *p == '0');
287 while (p != begin && *p == '.');
290 /* Adjust the exponents for any decimal point. */
291 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
292 D->normalizedExponent = (D->exponent +
293 static_cast<exponent_t>((p - D->firstSigDigit)
294 - (dot > D->firstSigDigit && dot < p)));
300 /* Return the trailing fraction of a hexadecimal number.
301 DIGITVALUE is the first hex digit of the fraction, P points to
304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
305 unsigned int digitValue)
307 unsigned int hexDigit;
309 /* If the first trailing digit isn't 0 or 8 we can work out the
310 fraction immediately. */
312 return lfMoreThanHalf;
313 else if (digitValue < 8 && digitValue > 0)
314 return lfLessThanHalf;
316 /* Otherwise we need to find the first non-zero digit. */
320 assert(p != end && "Invalid trailing hexadecimal fraction!");
322 hexDigit = hexDigitValue(*p);
324 /* If we ran off the end it is exactly zero or one-half, otherwise
327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
332 /* Return the fraction lost were a bignum truncated losing the least
333 significant BITS bits. */
335 lostFractionThroughTruncation(const integerPart *parts,
336 unsigned int partCount,
341 lsb = APInt::tcLSB(parts, partCount);
343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
345 return lfExactlyZero;
347 return lfExactlyHalf;
348 if (bits <= partCount * integerPartWidth &&
349 APInt::tcExtractBit(parts, bits - 1))
350 return lfMoreThanHalf;
352 return lfLessThanHalf;
355 /* Shift DST right BITS bits noting lost fraction. */
357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
359 lostFraction lost_fraction;
361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
363 APInt::tcShiftRight(dst, parts, bits);
365 return lost_fraction;
368 /* Combine the effect of two lost fractions. */
370 combineLostFractions(lostFraction moreSignificant,
371 lostFraction lessSignificant)
373 if (lessSignificant != lfExactlyZero) {
374 if (moreSignificant == lfExactlyZero)
375 moreSignificant = lfLessThanHalf;
376 else if (moreSignificant == lfExactlyHalf)
377 moreSignificant = lfMoreThanHalf;
380 return moreSignificant;
383 /* The error from the true value, in half-ulps, on multiplying two
384 floating point numbers, which differ from the value they
385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
386 than the returned value.
388 See "How to Read Floating Point Numbers Accurately" by William D
391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
395 if (HUerr1 + HUerr2 == 0)
396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
398 return inexactMultiply + 2 * (HUerr1 + HUerr2);
401 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
402 when the least significant BITS are truncated. BITS cannot be
405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
407 unsigned int count, partBits;
408 integerPart part, boundary;
413 count = bits / integerPartWidth;
414 partBits = bits % integerPartWidth + 1;
416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
419 boundary = (integerPart) 1 << (partBits - 1);
424 if (part - boundary <= boundary - part)
425 return part - boundary;
427 return boundary - part;
430 if (part == boundary) {
433 return ~(integerPart) 0; /* A lot. */
436 } else if (part == boundary - 1) {
439 return ~(integerPart) 0; /* A lot. */
444 return ~(integerPart) 0; /* A lot. */
447 /* Place pow(5, power) in DST, and return the number of parts used.
448 DST must be at least one part larger than size of the answer. */
450 powerOf5(integerPart *dst, unsigned int power)
452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
455 pow5s[0] = 78125 * 5;
457 unsigned int partsCount[16] = { 1 };
458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
460 assert(power <= maxExponent);
465 *p1 = firstEightPowers[power & 7];
471 for (unsigned int n = 0; power; power >>= 1, n++) {
476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
478 pc = partsCount[n - 1];
479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
481 if (pow5[pc - 1] == 0)
489 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
491 if (p2[result - 1] == 0)
494 /* Now result is in p1 with partsCount parts and p2 is scratch
496 tmp = p1, p1 = p2, p2 = tmp;
503 APInt::tcAssign(dst, p1, result);
508 /* Zero at the end to avoid modular arithmetic when adding one; used
509 when rounding up during hexadecimal output. */
510 static const char hexDigitsLower[] = "0123456789abcdef0";
511 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
512 static const char infinityL[] = "infinity";
513 static const char infinityU[] = "INFINITY";
514 static const char NaNL[] = "nan";
515 static const char NaNU[] = "NAN";
517 /* Write out an integerPart in hexadecimal, starting with the most
518 significant nibble. Write out exactly COUNT hexdigits, return
521 partAsHex (char *dst, integerPart part, unsigned int count,
522 const char *hexDigitChars)
524 unsigned int result = count;
526 assert(count != 0 && count <= integerPartWidth / 4);
528 part >>= (integerPartWidth - 4 * count);
530 dst[count] = hexDigitChars[part & 0xf];
537 /* Write out an unsigned decimal integer. */
539 writeUnsignedDecimal (char *dst, unsigned int n)
555 /* Write out a signed decimal integer. */
557 writeSignedDecimal (char *dst, int value)
561 dst = writeUnsignedDecimal(dst, -(unsigned) value);
563 dst = writeUnsignedDecimal(dst, value);
570 APFloat::initialize(const fltSemantics *ourSemantics)
574 semantics = ourSemantics;
577 significand.parts = new integerPart[count];
581 APFloat::freeSignificand()
584 delete [] significand.parts;
588 APFloat::assign(const APFloat &rhs)
590 assert(semantics == rhs.semantics);
593 category = rhs.category;
594 exponent = rhs.exponent;
595 if (category == fcNormal || category == fcNaN)
596 copySignificand(rhs);
600 APFloat::copySignificand(const APFloat &rhs)
602 assert(category == fcNormal || category == fcNaN);
603 assert(rhs.partCount() >= partCount());
605 APInt::tcAssign(significandParts(), rhs.significandParts(),
609 /* Make this number a NaN, with an arbitrary but deterministic value
610 for the significand. If double or longer, this is a signalling NaN,
611 which may not be ideal. If float, this is QNaN(0). */
612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
617 integerPart *significand = significandParts();
618 unsigned numParts = partCount();
620 // Set the significand bits to the fill.
621 if (!fill || fill->getNumWords() < numParts)
622 APInt::tcSet(significand, 0, numParts);
624 APInt::tcAssign(significand, fill->getRawData(),
625 std::min(fill->getNumWords(), numParts));
627 // Zero out the excess bits of the significand.
628 unsigned bitsToPreserve = semantics->precision - 1;
629 unsigned part = bitsToPreserve / 64;
630 bitsToPreserve %= 64;
631 significand[part] &= ((1ULL << bitsToPreserve) - 1);
632 for (part++; part != numParts; ++part)
633 significand[part] = 0;
636 unsigned QNaNBit = semantics->precision - 2;
639 // We always have to clear the QNaN bit to make it an SNaN.
640 APInt::tcClearBit(significand, QNaNBit);
642 // If there are no bits set in the payload, we have to set
643 // *something* to make it a NaN instead of an infinity;
644 // conventionally, this is the next bit down from the QNaN bit.
645 if (APInt::tcIsZero(significand, numParts))
646 APInt::tcSetBit(significand, QNaNBit - 1);
648 // We always have to set the QNaN bit to make it a QNaN.
649 APInt::tcSetBit(significand, QNaNBit);
652 // For x87 extended precision, we want to make a NaN, not a
653 // pseudo-NaN. Maybe we should expose the ability to make
655 if (semantics == &APFloat::x87DoubleExtended)
656 APInt::tcSetBit(significand, QNaNBit + 1);
659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
661 APFloat value(Sem, uninitialized);
662 value.makeNaN(SNaN, Negative, fill);
667 APFloat::operator=(const APFloat &rhs)
670 if (semantics != rhs.semantics) {
672 initialize(rhs.semantics);
681 APFloat::isDenormal() const {
682 return isNormal() && (exponent == semantics->minExponent) &&
683 (APInt::tcExtractBit(significandParts(),
684 semantics->precision - 1) == 0);
688 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
691 if (semantics != rhs.semantics ||
692 category != rhs.category ||
695 if (category==fcZero || category==fcInfinity)
697 else if (category==fcNormal && exponent!=rhs.exponent)
701 const integerPart* p=significandParts();
702 const integerPart* q=rhs.significandParts();
703 for (; i>0; i--, p++, q++) {
711 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
712 initialize(&ourSemantics);
715 exponent = ourSemantics.precision - 1;
716 significandParts()[0] = value;
717 normalize(rmNearestTiesToEven, lfExactlyZero);
720 APFloat::APFloat(const fltSemantics &ourSemantics) {
721 initialize(&ourSemantics);
726 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
727 // Allocates storage if necessary but does not initialize it.
728 initialize(&ourSemantics);
731 APFloat::APFloat(const fltSemantics &ourSemantics,
732 fltCategory ourCategory, bool negative) {
733 initialize(&ourSemantics);
734 category = ourCategory;
736 if (category == fcNormal)
738 else if (ourCategory == fcNaN)
742 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
743 initialize(&ourSemantics);
744 convertFromString(text, rmNearestTiesToEven);
747 APFloat::APFloat(const APFloat &rhs) {
748 initialize(rhs.semantics);
757 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
758 void APFloat::Profile(FoldingSetNodeID& ID) const {
759 ID.Add(bitcastToAPInt());
763 APFloat::partCount() const
765 return partCountForBits(semantics->precision + 1);
769 APFloat::semanticsPrecision(const fltSemantics &semantics)
771 return semantics.precision;
775 APFloat::significandParts() const
777 return const_cast<APFloat *>(this)->significandParts();
781 APFloat::significandParts()
783 assert(category == fcNormal || category == fcNaN);
786 return significand.parts;
788 return &significand.part;
792 APFloat::zeroSignificand()
795 APInt::tcSet(significandParts(), 0, partCount());
798 /* Increment an fcNormal floating point number's significand. */
800 APFloat::incrementSignificand()
804 carry = APInt::tcIncrement(significandParts(), partCount());
806 /* Our callers should never cause us to overflow. */
811 /* Add the significand of the RHS. Returns the carry flag. */
813 APFloat::addSignificand(const APFloat &rhs)
817 parts = significandParts();
819 assert(semantics == rhs.semantics);
820 assert(exponent == rhs.exponent);
822 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
825 /* Subtract the significand of the RHS with a borrow flag. Returns
828 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
832 parts = significandParts();
834 assert(semantics == rhs.semantics);
835 assert(exponent == rhs.exponent);
837 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
841 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
842 on to the full-precision result of the multiplication. Returns the
845 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
847 unsigned int omsb; // One, not zero, based MSB.
848 unsigned int partsCount, newPartsCount, precision;
849 integerPart *lhsSignificand;
850 integerPart scratch[4];
851 integerPart *fullSignificand;
852 lostFraction lost_fraction;
855 assert(semantics == rhs.semantics);
857 precision = semantics->precision;
858 newPartsCount = partCountForBits(precision * 2);
860 if (newPartsCount > 4)
861 fullSignificand = new integerPart[newPartsCount];
863 fullSignificand = scratch;
865 lhsSignificand = significandParts();
866 partsCount = partCount();
868 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
869 rhs.significandParts(), partsCount, partsCount);
871 lost_fraction = lfExactlyZero;
872 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
873 exponent += rhs.exponent;
876 Significand savedSignificand = significand;
877 const fltSemantics *savedSemantics = semantics;
878 fltSemantics extendedSemantics;
880 unsigned int extendedPrecision;
882 /* Normalize our MSB. */
883 extendedPrecision = precision + precision - 1;
884 if (omsb != extendedPrecision) {
885 APInt::tcShiftLeft(fullSignificand, newPartsCount,
886 extendedPrecision - omsb);
887 exponent -= extendedPrecision - omsb;
890 /* Create new semantics. */
891 extendedSemantics = *semantics;
892 extendedSemantics.precision = extendedPrecision;
894 if (newPartsCount == 1)
895 significand.part = fullSignificand[0];
897 significand.parts = fullSignificand;
898 semantics = &extendedSemantics;
900 APFloat extendedAddend(*addend);
901 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
902 assert(status == opOK);
904 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
906 /* Restore our state. */
907 if (newPartsCount == 1)
908 fullSignificand[0] = significand.part;
909 significand = savedSignificand;
910 semantics = savedSemantics;
912 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
915 exponent -= (precision - 1);
917 if (omsb > precision) {
918 unsigned int bits, significantParts;
921 bits = omsb - precision;
922 significantParts = partCountForBits(omsb);
923 lf = shiftRight(fullSignificand, significantParts, bits);
924 lost_fraction = combineLostFractions(lf, lost_fraction);
928 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
930 if (newPartsCount > 4)
931 delete [] fullSignificand;
933 return lost_fraction;
936 /* Multiply the significands of LHS and RHS to DST. */
938 APFloat::divideSignificand(const APFloat &rhs)
940 unsigned int bit, i, partsCount;
941 const integerPart *rhsSignificand;
942 integerPart *lhsSignificand, *dividend, *divisor;
943 integerPart scratch[4];
944 lostFraction lost_fraction;
946 assert(semantics == rhs.semantics);
948 lhsSignificand = significandParts();
949 rhsSignificand = rhs.significandParts();
950 partsCount = partCount();
953 dividend = new integerPart[partsCount * 2];
957 divisor = dividend + partsCount;
959 /* Copy the dividend and divisor as they will be modified in-place. */
960 for (i = 0; i < partsCount; i++) {
961 dividend[i] = lhsSignificand[i];
962 divisor[i] = rhsSignificand[i];
963 lhsSignificand[i] = 0;
966 exponent -= rhs.exponent;
968 unsigned int precision = semantics->precision;
970 /* Normalize the divisor. */
971 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
974 APInt::tcShiftLeft(divisor, partsCount, bit);
977 /* Normalize the dividend. */
978 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
981 APInt::tcShiftLeft(dividend, partsCount, bit);
984 /* Ensure the dividend >= divisor initially for the loop below.
985 Incidentally, this means that the division loop below is
986 guaranteed to set the integer bit to one. */
987 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
989 APInt::tcShiftLeft(dividend, partsCount, 1);
990 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
994 for (bit = precision; bit; bit -= 1) {
995 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
996 APInt::tcSubtract(dividend, divisor, 0, partsCount);
997 APInt::tcSetBit(lhsSignificand, bit - 1);
1000 APInt::tcShiftLeft(dividend, partsCount, 1);
1003 /* Figure out the lost fraction. */
1004 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1007 lost_fraction = lfMoreThanHalf;
1009 lost_fraction = lfExactlyHalf;
1010 else if (APInt::tcIsZero(dividend, partsCount))
1011 lost_fraction = lfExactlyZero;
1013 lost_fraction = lfLessThanHalf;
1018 return lost_fraction;
1022 APFloat::significandMSB() const
1024 return APInt::tcMSB(significandParts(), partCount());
1028 APFloat::significandLSB() const
1030 return APInt::tcLSB(significandParts(), partCount());
1033 /* Note that a zero result is NOT normalized to fcZero. */
1035 APFloat::shiftSignificandRight(unsigned int bits)
1037 /* Our exponent should not overflow. */
1038 assert((exponent_t) (exponent + bits) >= exponent);
1042 return shiftRight(significandParts(), partCount(), bits);
1045 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1047 APFloat::shiftSignificandLeft(unsigned int bits)
1049 assert(bits < semantics->precision);
1052 unsigned int partsCount = partCount();
1054 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1057 assert(!APInt::tcIsZero(significandParts(), partsCount));
1062 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1066 assert(semantics == rhs.semantics);
1067 assert(category == fcNormal);
1068 assert(rhs.category == fcNormal);
1070 compare = exponent - rhs.exponent;
1072 /* If exponents are equal, do an unsigned bignum comparison of the
1075 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1079 return cmpGreaterThan;
1080 else if (compare < 0)
1086 /* Handle overflow. Sign is preserved. We either become infinity or
1087 the largest finite number. */
1089 APFloat::handleOverflow(roundingMode rounding_mode)
1092 if (rounding_mode == rmNearestTiesToEven ||
1093 rounding_mode == rmNearestTiesToAway ||
1094 (rounding_mode == rmTowardPositive && !sign) ||
1095 (rounding_mode == rmTowardNegative && sign)) {
1096 category = fcInfinity;
1097 return (opStatus) (opOverflow | opInexact);
1100 /* Otherwise we become the largest finite number. */
1101 category = fcNormal;
1102 exponent = semantics->maxExponent;
1103 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1104 semantics->precision);
1109 /* Returns TRUE if, when truncating the current number, with BIT the
1110 new LSB, with the given lost fraction and rounding mode, the result
1111 would need to be rounded away from zero (i.e., by increasing the
1112 signficand). This routine must work for fcZero of both signs, and
1113 fcNormal numbers. */
1115 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1116 lostFraction lost_fraction,
1117 unsigned int bit) const
1119 /* NaNs and infinities should not have lost fractions. */
1120 assert(category == fcNormal || category == fcZero);
1122 /* Current callers never pass this so we don't handle it. */
1123 assert(lost_fraction != lfExactlyZero);
1125 switch (rounding_mode) {
1126 case rmNearestTiesToAway:
1127 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1129 case rmNearestTiesToEven:
1130 if (lost_fraction == lfMoreThanHalf)
1133 /* Our zeroes don't have a significand to test. */
1134 if (lost_fraction == lfExactlyHalf && category != fcZero)
1135 return APInt::tcExtractBit(significandParts(), bit);
1142 case rmTowardPositive:
1143 return sign == false;
1145 case rmTowardNegative:
1146 return sign == true;
1148 llvm_unreachable("Invalid rounding mode found");
1152 APFloat::normalize(roundingMode rounding_mode,
1153 lostFraction lost_fraction)
1155 unsigned int omsb; /* One, not zero, based MSB. */
1158 if (category != fcNormal)
1161 /* Before rounding normalize the exponent of fcNormal numbers. */
1162 omsb = significandMSB() + 1;
1165 /* OMSB is numbered from 1. We want to place it in the integer
1166 bit numbered PRECISION if possible, with a compensating change in
1168 exponentChange = omsb - semantics->precision;
1170 /* If the resulting exponent is too high, overflow according to
1171 the rounding mode. */
1172 if (exponent + exponentChange > semantics->maxExponent)
1173 return handleOverflow(rounding_mode);
1175 /* Subnormal numbers have exponent minExponent, and their MSB
1176 is forced based on that. */
1177 if (exponent + exponentChange < semantics->minExponent)
1178 exponentChange = semantics->minExponent - exponent;
1180 /* Shifting left is easy as we don't lose precision. */
1181 if (exponentChange < 0) {
1182 assert(lost_fraction == lfExactlyZero);
1184 shiftSignificandLeft(-exponentChange);
1189 if (exponentChange > 0) {
1192 /* Shift right and capture any new lost fraction. */
1193 lf = shiftSignificandRight(exponentChange);
1195 lost_fraction = combineLostFractions(lf, lost_fraction);
1197 /* Keep OMSB up-to-date. */
1198 if (omsb > (unsigned) exponentChange)
1199 omsb -= exponentChange;
1205 /* Now round the number according to rounding_mode given the lost
1208 /* As specified in IEEE 754, since we do not trap we do not report
1209 underflow for exact results. */
1210 if (lost_fraction == lfExactlyZero) {
1211 /* Canonicalize zeroes. */
1218 /* Increment the significand if we're rounding away from zero. */
1219 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1221 exponent = semantics->minExponent;
1223 incrementSignificand();
1224 omsb = significandMSB() + 1;
1226 /* Did the significand increment overflow? */
1227 if (omsb == (unsigned) semantics->precision + 1) {
1228 /* Renormalize by incrementing the exponent and shifting our
1229 significand right one. However if we already have the
1230 maximum exponent we overflow to infinity. */
1231 if (exponent == semantics->maxExponent) {
1232 category = fcInfinity;
1234 return (opStatus) (opOverflow | opInexact);
1237 shiftSignificandRight(1);
1243 /* The normal case - we were and are not denormal, and any
1244 significand increment above didn't overflow. */
1245 if (omsb == semantics->precision)
1248 /* We have a non-zero denormal. */
1249 assert(omsb < semantics->precision);
1251 /* Canonicalize zeroes. */
1255 /* The fcZero case is a denormal that underflowed to zero. */
1256 return (opStatus) (opUnderflow | opInexact);
1260 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1262 switch (convolve(category, rhs.category)) {
1264 llvm_unreachable(0);
1266 case convolve(fcNaN, fcZero):
1267 case convolve(fcNaN, fcNormal):
1268 case convolve(fcNaN, fcInfinity):
1269 case convolve(fcNaN, fcNaN):
1270 case convolve(fcNormal, fcZero):
1271 case convolve(fcInfinity, fcNormal):
1272 case convolve(fcInfinity, fcZero):
1275 case convolve(fcZero, fcNaN):
1276 case convolve(fcNormal, fcNaN):
1277 case convolve(fcInfinity, fcNaN):
1279 copySignificand(rhs);
1282 case convolve(fcNormal, fcInfinity):
1283 case convolve(fcZero, fcInfinity):
1284 category = fcInfinity;
1285 sign = rhs.sign ^ subtract;
1288 case convolve(fcZero, fcNormal):
1290 sign = rhs.sign ^ subtract;
1293 case convolve(fcZero, fcZero):
1294 /* Sign depends on rounding mode; handled by caller. */
1297 case convolve(fcInfinity, fcInfinity):
1298 /* Differently signed infinities can only be validly
1300 if (((sign ^ rhs.sign)!=0) != subtract) {
1307 case convolve(fcNormal, fcNormal):
1312 /* Add or subtract two normal numbers. */
1314 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1317 lostFraction lost_fraction;
1320 /* Determine if the operation on the absolute values is effectively
1321 an addition or subtraction. */
1322 subtract ^= (sign ^ rhs.sign) ? true : false;
1324 /* Are we bigger exponent-wise than the RHS? */
1325 bits = exponent - rhs.exponent;
1327 /* Subtraction is more subtle than one might naively expect. */
1329 APFloat temp_rhs(rhs);
1333 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1334 lost_fraction = lfExactlyZero;
1335 } else if (bits > 0) {
1336 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1337 shiftSignificandLeft(1);
1340 lost_fraction = shiftSignificandRight(-bits - 1);
1341 temp_rhs.shiftSignificandLeft(1);
1346 carry = temp_rhs.subtractSignificand
1347 (*this, lost_fraction != lfExactlyZero);
1348 copySignificand(temp_rhs);
1351 carry = subtractSignificand
1352 (temp_rhs, lost_fraction != lfExactlyZero);
1355 /* Invert the lost fraction - it was on the RHS and
1357 if (lost_fraction == lfLessThanHalf)
1358 lost_fraction = lfMoreThanHalf;
1359 else if (lost_fraction == lfMoreThanHalf)
1360 lost_fraction = lfLessThanHalf;
1362 /* The code above is intended to ensure that no borrow is
1368 APFloat temp_rhs(rhs);
1370 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1371 carry = addSignificand(temp_rhs);
1373 lost_fraction = shiftSignificandRight(-bits);
1374 carry = addSignificand(rhs);
1377 /* We have a guard bit; generating a carry cannot happen. */
1382 return lost_fraction;
1386 APFloat::multiplySpecials(const APFloat &rhs)
1388 switch (convolve(category, rhs.category)) {
1390 llvm_unreachable(0);
1392 case convolve(fcNaN, fcZero):
1393 case convolve(fcNaN, fcNormal):
1394 case convolve(fcNaN, fcInfinity):
1395 case convolve(fcNaN, fcNaN):
1398 case convolve(fcZero, fcNaN):
1399 case convolve(fcNormal, fcNaN):
1400 case convolve(fcInfinity, fcNaN):
1402 copySignificand(rhs);
1405 case convolve(fcNormal, fcInfinity):
1406 case convolve(fcInfinity, fcNormal):
1407 case convolve(fcInfinity, fcInfinity):
1408 category = fcInfinity;
1411 case convolve(fcZero, fcNormal):
1412 case convolve(fcNormal, fcZero):
1413 case convolve(fcZero, fcZero):
1417 case convolve(fcZero, fcInfinity):
1418 case convolve(fcInfinity, fcZero):
1422 case convolve(fcNormal, fcNormal):
1428 APFloat::divideSpecials(const APFloat &rhs)
1430 switch (convolve(category, rhs.category)) {
1432 llvm_unreachable(0);
1434 case convolve(fcNaN, fcZero):
1435 case convolve(fcNaN, fcNormal):
1436 case convolve(fcNaN, fcInfinity):
1437 case convolve(fcNaN, fcNaN):
1438 case convolve(fcInfinity, fcZero):
1439 case convolve(fcInfinity, fcNormal):
1440 case convolve(fcZero, fcInfinity):
1441 case convolve(fcZero, fcNormal):
1444 case convolve(fcZero, fcNaN):
1445 case convolve(fcNormal, fcNaN):
1446 case convolve(fcInfinity, fcNaN):
1448 copySignificand(rhs);
1451 case convolve(fcNormal, fcInfinity):
1455 case convolve(fcNormal, fcZero):
1456 category = fcInfinity;
1459 case convolve(fcInfinity, fcInfinity):
1460 case convolve(fcZero, fcZero):
1464 case convolve(fcNormal, fcNormal):
1470 APFloat::modSpecials(const APFloat &rhs)
1472 switch (convolve(category, rhs.category)) {
1474 llvm_unreachable(0);
1476 case convolve(fcNaN, fcZero):
1477 case convolve(fcNaN, fcNormal):
1478 case convolve(fcNaN, fcInfinity):
1479 case convolve(fcNaN, fcNaN):
1480 case convolve(fcZero, fcInfinity):
1481 case convolve(fcZero, fcNormal):
1482 case convolve(fcNormal, fcInfinity):
1485 case convolve(fcZero, fcNaN):
1486 case convolve(fcNormal, fcNaN):
1487 case convolve(fcInfinity, fcNaN):
1489 copySignificand(rhs);
1492 case convolve(fcNormal, fcZero):
1493 case convolve(fcInfinity, fcZero):
1494 case convolve(fcInfinity, fcNormal):
1495 case convolve(fcInfinity, fcInfinity):
1496 case convolve(fcZero, fcZero):
1500 case convolve(fcNormal, fcNormal):
1507 APFloat::changeSign()
1509 /* Look mummy, this one's easy. */
1514 APFloat::clearSign()
1516 /* So is this one. */
1521 APFloat::copySign(const APFloat &rhs)
1527 /* Normalized addition or subtraction. */
1529 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1534 fs = addOrSubtractSpecials(rhs, subtract);
1536 /* This return code means it was not a simple case. */
1537 if (fs == opDivByZero) {
1538 lostFraction lost_fraction;
1540 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1541 fs = normalize(rounding_mode, lost_fraction);
1543 /* Can only be zero if we lost no fraction. */
1544 assert(category != fcZero || lost_fraction == lfExactlyZero);
1547 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1548 positive zero unless rounding to minus infinity, except that
1549 adding two like-signed zeroes gives that zero. */
1550 if (category == fcZero) {
1551 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1552 sign = (rounding_mode == rmTowardNegative);
1558 /* Normalized addition. */
1560 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1562 return addOrSubtract(rhs, rounding_mode, false);
1565 /* Normalized subtraction. */
1567 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1569 return addOrSubtract(rhs, rounding_mode, true);
1572 /* Normalized multiply. */
1574 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1579 fs = multiplySpecials(rhs);
1581 if (category == fcNormal) {
1582 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1583 fs = normalize(rounding_mode, lost_fraction);
1584 if (lost_fraction != lfExactlyZero)
1585 fs = (opStatus) (fs | opInexact);
1591 /* Normalized divide. */
1593 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1598 fs = divideSpecials(rhs);
1600 if (category == fcNormal) {
1601 lostFraction lost_fraction = divideSignificand(rhs);
1602 fs = normalize(rounding_mode, lost_fraction);
1603 if (lost_fraction != lfExactlyZero)
1604 fs = (opStatus) (fs | opInexact);
1610 /* Normalized remainder. This is not currently correct in all cases. */
1612 APFloat::remainder(const APFloat &rhs)
1616 unsigned int origSign = sign;
1618 fs = V.divide(rhs, rmNearestTiesToEven);
1619 if (fs == opDivByZero)
1622 int parts = partCount();
1623 integerPart *x = new integerPart[parts];
1625 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1626 rmNearestTiesToEven, &ignored);
1627 if (fs==opInvalidOp)
1630 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1631 rmNearestTiesToEven);
1632 assert(fs==opOK); // should always work
1634 fs = V.multiply(rhs, rmNearestTiesToEven);
1635 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1637 fs = subtract(V, rmNearestTiesToEven);
1638 assert(fs==opOK || fs==opInexact); // likewise
1641 sign = origSign; // IEEE754 requires this
1646 /* Normalized llvm frem (C fmod).
1647 This is not currently correct in all cases. */
1649 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1652 fs = modSpecials(rhs);
1654 if (category == fcNormal && rhs.category == fcNormal) {
1656 unsigned int origSign = sign;
1658 fs = V.divide(rhs, rmNearestTiesToEven);
1659 if (fs == opDivByZero)
1662 int parts = partCount();
1663 integerPart *x = new integerPart[parts];
1665 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1666 rmTowardZero, &ignored);
1667 if (fs==opInvalidOp)
1670 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1671 rmNearestTiesToEven);
1672 assert(fs==opOK); // should always work
1674 fs = V.multiply(rhs, rounding_mode);
1675 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1677 fs = subtract(V, rounding_mode);
1678 assert(fs==opOK || fs==opInexact); // likewise
1681 sign = origSign; // IEEE754 requires this
1687 /* Normalized fused-multiply-add. */
1689 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1690 const APFloat &addend,
1691 roundingMode rounding_mode)
1695 /* Post-multiplication sign, before addition. */
1696 sign ^= multiplicand.sign;
1698 /* If and only if all arguments are normal do we need to do an
1699 extended-precision calculation. */
1700 if (category == fcNormal &&
1701 multiplicand.category == fcNormal &&
1702 addend.category == fcNormal) {
1703 lostFraction lost_fraction;
1705 lost_fraction = multiplySignificand(multiplicand, &addend);
1706 fs = normalize(rounding_mode, lost_fraction);
1707 if (lost_fraction != lfExactlyZero)
1708 fs = (opStatus) (fs | opInexact);
1710 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1711 positive zero unless rounding to minus infinity, except that
1712 adding two like-signed zeroes gives that zero. */
1713 if (category == fcZero && sign != addend.sign)
1714 sign = (rounding_mode == rmTowardNegative);
1716 fs = multiplySpecials(multiplicand);
1718 /* FS can only be opOK or opInvalidOp. There is no more work
1719 to do in the latter case. The IEEE-754R standard says it is
1720 implementation-defined in this case whether, if ADDEND is a
1721 quiet NaN, we raise invalid op; this implementation does so.
1723 If we need to do the addition we can do so with normal
1726 fs = addOrSubtract(addend, rounding_mode, false);
1732 /* Rounding-mode corrrect round to integral value. */
1733 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1736 // If the exponent is large enough, we know that this value is already
1737 // integral, and the arithmetic below would potentially cause it to saturate
1738 // to +/-Inf. Bail out early instead.
1739 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1742 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1743 // precision of our format, and then subtract it back off again. The choice
1744 // of rounding modes for the addition/subtraction determines the rounding mode
1745 // for our integral rounding as well.
1746 // NOTE: When the input value is negative, we do subtraction followed by
1747 // addition instead.
1748 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1749 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1750 APFloat MagicConstant(*semantics);
1751 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1752 rmNearestTiesToEven);
1753 MagicConstant.copySign(*this);
1758 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1759 bool inputSign = isNegative();
1761 fs = add(MagicConstant, rounding_mode);
1762 if (fs != opOK && fs != opInexact)
1765 fs = subtract(MagicConstant, rounding_mode);
1767 // Restore the input sign.
1768 if (inputSign != isNegative())
1775 /* Comparison requires normalized numbers. */
1777 APFloat::compare(const APFloat &rhs) const
1781 assert(semantics == rhs.semantics);
1783 switch (convolve(category, rhs.category)) {
1785 llvm_unreachable(0);
1787 case convolve(fcNaN, fcZero):
1788 case convolve(fcNaN, fcNormal):
1789 case convolve(fcNaN, fcInfinity):
1790 case convolve(fcNaN, fcNaN):
1791 case convolve(fcZero, fcNaN):
1792 case convolve(fcNormal, fcNaN):
1793 case convolve(fcInfinity, fcNaN):
1794 return cmpUnordered;
1796 case convolve(fcInfinity, fcNormal):
1797 case convolve(fcInfinity, fcZero):
1798 case convolve(fcNormal, fcZero):
1802 return cmpGreaterThan;
1804 case convolve(fcNormal, fcInfinity):
1805 case convolve(fcZero, fcInfinity):
1806 case convolve(fcZero, fcNormal):
1808 return cmpGreaterThan;
1812 case convolve(fcInfinity, fcInfinity):
1813 if (sign == rhs.sign)
1818 return cmpGreaterThan;
1820 case convolve(fcZero, fcZero):
1823 case convolve(fcNormal, fcNormal):
1827 /* Two normal numbers. Do they have the same sign? */
1828 if (sign != rhs.sign) {
1830 result = cmpLessThan;
1832 result = cmpGreaterThan;
1834 /* Compare absolute values; invert result if negative. */
1835 result = compareAbsoluteValue(rhs);
1838 if (result == cmpLessThan)
1839 result = cmpGreaterThan;
1840 else if (result == cmpGreaterThan)
1841 result = cmpLessThan;
1848 /// APFloat::convert - convert a value of one floating point type to another.
1849 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1850 /// records whether the transformation lost information, i.e. whether
1851 /// converting the result back to the original type will produce the
1852 /// original value (this is almost the same as return value==fsOK, but there
1853 /// are edge cases where this is not so).
1856 APFloat::convert(const fltSemantics &toSemantics,
1857 roundingMode rounding_mode, bool *losesInfo)
1859 lostFraction lostFraction;
1860 unsigned int newPartCount, oldPartCount;
1863 const fltSemantics &fromSemantics = *semantics;
1865 lostFraction = lfExactlyZero;
1866 newPartCount = partCountForBits(toSemantics.precision + 1);
1867 oldPartCount = partCount();
1868 shift = toSemantics.precision - fromSemantics.precision;
1870 bool X86SpecialNan = false;
1871 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1872 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1873 (!(*significandParts() & 0x8000000000000000ULL) ||
1874 !(*significandParts() & 0x4000000000000000ULL))) {
1875 // x86 has some unusual NaNs which cannot be represented in any other
1876 // format; note them here.
1877 X86SpecialNan = true;
1880 // If this is a truncation, perform the shift before we narrow the storage.
1881 if (shift < 0 && (category==fcNormal || category==fcNaN))
1882 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1884 // Fix the storage so it can hold to new value.
1885 if (newPartCount > oldPartCount) {
1886 // The new type requires more storage; make it available.
1887 integerPart *newParts;
1888 newParts = new integerPart[newPartCount];
1889 APInt::tcSet(newParts, 0, newPartCount);
1890 if (category==fcNormal || category==fcNaN)
1891 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1893 significand.parts = newParts;
1894 } else if (newPartCount == 1 && oldPartCount != 1) {
1895 // Switch to built-in storage for a single part.
1896 integerPart newPart = 0;
1897 if (category==fcNormal || category==fcNaN)
1898 newPart = significandParts()[0];
1900 significand.part = newPart;
1903 // Now that we have the right storage, switch the semantics.
1904 semantics = &toSemantics;
1906 // If this is an extension, perform the shift now that the storage is
1908 if (shift > 0 && (category==fcNormal || category==fcNaN))
1909 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1911 if (category == fcNormal) {
1912 fs = normalize(rounding_mode, lostFraction);
1913 *losesInfo = (fs != opOK);
1914 } else if (category == fcNaN) {
1915 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1917 // For x87 extended precision, we want to make a NaN, not a special NaN if
1918 // the input wasn't special either.
1919 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
1920 APInt::tcSetBit(significandParts(), semantics->precision - 1);
1922 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1923 // does not give you back the same bits. This is dubious, and we
1924 // don't currently do it. You're really supposed to get
1925 // an invalid operation signal at runtime, but nobody does that.
1935 /* Convert a floating point number to an integer according to the
1936 rounding mode. If the rounded integer value is out of range this
1937 returns an invalid operation exception and the contents of the
1938 destination parts are unspecified. If the rounded value is in
1939 range but the floating point number is not the exact integer, the C
1940 standard doesn't require an inexact exception to be raised. IEEE
1941 854 does require it so we do that.
1943 Note that for conversions to integer type the C standard requires
1944 round-to-zero to always be used. */
1946 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1948 roundingMode rounding_mode,
1949 bool *isExact) const
1951 lostFraction lost_fraction;
1952 const integerPart *src;
1953 unsigned int dstPartsCount, truncatedBits;
1957 /* Handle the three special cases first. */
1958 if (category == fcInfinity || category == fcNaN)
1961 dstPartsCount = partCountForBits(width);
1963 if (category == fcZero) {
1964 APInt::tcSet(parts, 0, dstPartsCount);
1965 // Negative zero can't be represented as an int.
1970 src = significandParts();
1972 /* Step 1: place our absolute value, with any fraction truncated, in
1975 /* Our absolute value is less than one; truncate everything. */
1976 APInt::tcSet(parts, 0, dstPartsCount);
1977 /* For exponent -1 the integer bit represents .5, look at that.
1978 For smaller exponents leftmost truncated bit is 0. */
1979 truncatedBits = semantics->precision -1U - exponent;
1981 /* We want the most significant (exponent + 1) bits; the rest are
1983 unsigned int bits = exponent + 1U;
1985 /* Hopelessly large in magnitude? */
1989 if (bits < semantics->precision) {
1990 /* We truncate (semantics->precision - bits) bits. */
1991 truncatedBits = semantics->precision - bits;
1992 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1994 /* We want at least as many bits as are available. */
1995 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1996 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2001 /* Step 2: work out any lost fraction, and increment the absolute
2002 value if we would round away from zero. */
2003 if (truncatedBits) {
2004 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2006 if (lost_fraction != lfExactlyZero &&
2007 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2008 if (APInt::tcIncrement(parts, dstPartsCount))
2009 return opInvalidOp; /* Overflow. */
2012 lost_fraction = lfExactlyZero;
2015 /* Step 3: check if we fit in the destination. */
2016 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2020 /* Negative numbers cannot be represented as unsigned. */
2024 /* It takes omsb bits to represent the unsigned integer value.
2025 We lose a bit for the sign, but care is needed as the
2026 maximally negative integer is a special case. */
2027 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2030 /* This case can happen because of rounding. */
2035 APInt::tcNegate (parts, dstPartsCount);
2037 if (omsb >= width + !isSigned)
2041 if (lost_fraction == lfExactlyZero) {
2048 /* Same as convertToSignExtendedInteger, except we provide
2049 deterministic values in case of an invalid operation exception,
2050 namely zero for NaNs and the minimal or maximal value respectively
2051 for underflow or overflow.
2052 The *isExact output tells whether the result is exact, in the sense
2053 that converting it back to the original floating point type produces
2054 the original value. This is almost equivalent to result==opOK,
2055 except for negative zeroes.
2058 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2060 roundingMode rounding_mode, bool *isExact) const
2064 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2067 if (fs == opInvalidOp) {
2068 unsigned int bits, dstPartsCount;
2070 dstPartsCount = partCountForBits(width);
2072 if (category == fcNaN)
2077 bits = width - isSigned;
2079 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2080 if (sign && isSigned)
2081 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2087 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2088 an APSInt, whose initial bit-width and signed-ness are used to determine the
2089 precision of the conversion.
2092 APFloat::convertToInteger(APSInt &result,
2093 roundingMode rounding_mode, bool *isExact) const
2095 unsigned bitWidth = result.getBitWidth();
2096 SmallVector<uint64_t, 4> parts(result.getNumWords());
2097 opStatus status = convertToInteger(
2098 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2099 // Keeps the original signed-ness.
2100 result = APInt(bitWidth, parts);
2104 /* Convert an unsigned integer SRC to a floating point number,
2105 rounding according to ROUNDING_MODE. The sign of the floating
2106 point number is not modified. */
2108 APFloat::convertFromUnsignedParts(const integerPart *src,
2109 unsigned int srcCount,
2110 roundingMode rounding_mode)
2112 unsigned int omsb, precision, dstCount;
2114 lostFraction lost_fraction;
2116 category = fcNormal;
2117 omsb = APInt::tcMSB(src, srcCount) + 1;
2118 dst = significandParts();
2119 dstCount = partCount();
2120 precision = semantics->precision;
2122 /* We want the most significant PRECISION bits of SRC. There may not
2123 be that many; extract what we can. */
2124 if (precision <= omsb) {
2125 exponent = omsb - 1;
2126 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2128 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2130 exponent = precision - 1;
2131 lost_fraction = lfExactlyZero;
2132 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2135 return normalize(rounding_mode, lost_fraction);
2139 APFloat::convertFromAPInt(const APInt &Val,
2141 roundingMode rounding_mode)
2143 unsigned int partCount = Val.getNumWords();
2147 if (isSigned && api.isNegative()) {
2152 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2155 /* Convert a two's complement integer SRC to a floating point number,
2156 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2157 integer is signed, in which case it must be sign-extended. */
2159 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2160 unsigned int srcCount,
2162 roundingMode rounding_mode)
2167 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2170 /* If we're signed and negative negate a copy. */
2172 copy = new integerPart[srcCount];
2173 APInt::tcAssign(copy, src, srcCount);
2174 APInt::tcNegate(copy, srcCount);
2175 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2179 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2185 /* FIXME: should this just take a const APInt reference? */
2187 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2188 unsigned int width, bool isSigned,
2189 roundingMode rounding_mode)
2191 unsigned int partCount = partCountForBits(width);
2192 APInt api = APInt(width, makeArrayRef(parts, partCount));
2195 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2200 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2204 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2206 lostFraction lost_fraction = lfExactlyZero;
2207 integerPart *significand;
2208 unsigned int bitPos, partsCount;
2209 StringRef::iterator dot, firstSignificantDigit;
2213 category = fcNormal;
2215 significand = significandParts();
2216 partsCount = partCount();
2217 bitPos = partsCount * integerPartWidth;
2219 /* Skip leading zeroes and any (hexa)decimal point. */
2220 StringRef::iterator begin = s.begin();
2221 StringRef::iterator end = s.end();
2222 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2223 firstSignificantDigit = p;
2226 integerPart hex_value;
2229 assert(dot == end && "String contains multiple dots");
2236 hex_value = hexDigitValue(*p);
2237 if (hex_value == -1U) {
2246 /* Store the number whilst 4-bit nibbles remain. */
2249 hex_value <<= bitPos % integerPartWidth;
2250 significand[bitPos / integerPartWidth] |= hex_value;
2252 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2253 while (p != end && hexDigitValue(*p) != -1U)
2260 /* Hex floats require an exponent but not a hexadecimal point. */
2261 assert(p != end && "Hex strings require an exponent");
2262 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2263 assert(p != begin && "Significand has no digits");
2264 assert((dot == end || p - begin != 1) && "Significand has no digits");
2266 /* Ignore the exponent if we are zero. */
2267 if (p != firstSignificantDigit) {
2270 /* Implicit hexadecimal point? */
2274 /* Calculate the exponent adjustment implicit in the number of
2275 significant digits. */
2276 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2277 if (expAdjustment < 0)
2279 expAdjustment = expAdjustment * 4 - 1;
2281 /* Adjust for writing the significand starting at the most
2282 significant nibble. */
2283 expAdjustment += semantics->precision;
2284 expAdjustment -= partsCount * integerPartWidth;
2286 /* Adjust for the given exponent. */
2287 exponent = totalExponent(p + 1, end, expAdjustment);
2290 return normalize(rounding_mode, lost_fraction);
2294 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2295 unsigned sigPartCount, int exp,
2296 roundingMode rounding_mode)
2298 unsigned int parts, pow5PartCount;
2299 fltSemantics calcSemantics = { 32767, -32767, 0 };
2300 integerPart pow5Parts[maxPowerOfFiveParts];
2303 isNearest = (rounding_mode == rmNearestTiesToEven ||
2304 rounding_mode == rmNearestTiesToAway);
2306 parts = partCountForBits(semantics->precision + 11);
2308 /* Calculate pow(5, abs(exp)). */
2309 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2311 for (;; parts *= 2) {
2312 opStatus sigStatus, powStatus;
2313 unsigned int excessPrecision, truncatedBits;
2315 calcSemantics.precision = parts * integerPartWidth - 1;
2316 excessPrecision = calcSemantics.precision - semantics->precision;
2317 truncatedBits = excessPrecision;
2319 APFloat decSig(calcSemantics, fcZero, sign);
2320 APFloat pow5(calcSemantics, fcZero, false);
2322 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2323 rmNearestTiesToEven);
2324 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2325 rmNearestTiesToEven);
2326 /* Add exp, as 10^n = 5^n * 2^n. */
2327 decSig.exponent += exp;
2329 lostFraction calcLostFraction;
2330 integerPart HUerr, HUdistance;
2331 unsigned int powHUerr;
2334 /* multiplySignificand leaves the precision-th bit set to 1. */
2335 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2336 powHUerr = powStatus != opOK;
2338 calcLostFraction = decSig.divideSignificand(pow5);
2339 /* Denormal numbers have less precision. */
2340 if (decSig.exponent < semantics->minExponent) {
2341 excessPrecision += (semantics->minExponent - decSig.exponent);
2342 truncatedBits = excessPrecision;
2343 if (excessPrecision > calcSemantics.precision)
2344 excessPrecision = calcSemantics.precision;
2346 /* Extra half-ulp lost in reciprocal of exponent. */
2347 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2350 /* Both multiplySignificand and divideSignificand return the
2351 result with the integer bit set. */
2352 assert(APInt::tcExtractBit
2353 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2355 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2357 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2358 excessPrecision, isNearest);
2360 /* Are we guaranteed to round correctly if we truncate? */
2361 if (HUdistance >= HUerr) {
2362 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2363 calcSemantics.precision - excessPrecision,
2365 /* Take the exponent of decSig. If we tcExtract-ed less bits
2366 above we must adjust our exponent to compensate for the
2367 implicit right shift. */
2368 exponent = (decSig.exponent + semantics->precision
2369 - (calcSemantics.precision - excessPrecision));
2370 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2373 return normalize(rounding_mode, calcLostFraction);
2379 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2384 /* Scan the text. */
2385 StringRef::iterator p = str.begin();
2386 interpretDecimal(p, str.end(), &D);
2388 /* Handle the quick cases. First the case of no significant digits,
2389 i.e. zero, and then exponents that are obviously too large or too
2390 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2391 definitely overflows if
2393 (exp - 1) * L >= maxExponent
2395 and definitely underflows to zero where
2397 (exp + 1) * L <= minExponent - precision
2399 With integer arithmetic the tightest bounds for L are
2401 93/28 < L < 196/59 [ numerator <= 256 ]
2402 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2405 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2409 /* Check whether the normalized exponent is high enough to overflow
2410 max during the log-rebasing in the max-exponent check below. */
2411 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2412 fs = handleOverflow(rounding_mode);
2414 /* If it wasn't, then it also wasn't high enough to overflow max
2415 during the log-rebasing in the min-exponent check. Check that it
2416 won't overflow min in either check, then perform the min-exponent
2418 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2419 (D.normalizedExponent + 1) * 28738 <=
2420 8651 * (semantics->minExponent - (int) semantics->precision)) {
2421 /* Underflow to zero and round. */
2423 fs = normalize(rounding_mode, lfLessThanHalf);
2425 /* We can finally safely perform the max-exponent check. */
2426 } else if ((D.normalizedExponent - 1) * 42039
2427 >= 12655 * semantics->maxExponent) {
2428 /* Overflow and round. */
2429 fs = handleOverflow(rounding_mode);
2431 integerPart *decSignificand;
2432 unsigned int partCount;
2434 /* A tight upper bound on number of bits required to hold an
2435 N-digit decimal integer is N * 196 / 59. Allocate enough space
2436 to hold the full significand, and an extra part required by
2438 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2439 partCount = partCountForBits(1 + 196 * partCount / 59);
2440 decSignificand = new integerPart[partCount + 1];
2443 /* Convert to binary efficiently - we do almost all multiplication
2444 in an integerPart. When this would overflow do we do a single
2445 bignum multiplication, and then revert again to multiplication
2446 in an integerPart. */
2448 integerPart decValue, val, multiplier;
2456 if (p == str.end()) {
2460 decValue = decDigitValue(*p++);
2461 assert(decValue < 10U && "Invalid character in significand");
2463 val = val * 10 + decValue;
2464 /* The maximum number that can be multiplied by ten with any
2465 digit added without overflowing an integerPart. */
2466 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2468 /* Multiply out the current part. */
2469 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2470 partCount, partCount + 1, false);
2472 /* If we used another part (likely but not guaranteed), increase
2474 if (decSignificand[partCount])
2476 } while (p <= D.lastSigDigit);
2478 category = fcNormal;
2479 fs = roundSignificandWithExponent(decSignificand, partCount,
2480 D.exponent, rounding_mode);
2482 delete [] decSignificand;
2489 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2491 assert(!str.empty() && "Invalid string length");
2493 /* Handle a leading minus sign. */
2494 StringRef::iterator p = str.begin();
2495 size_t slen = str.size();
2496 sign = *p == '-' ? 1 : 0;
2497 if (*p == '-' || *p == '+') {
2500 assert(slen && "String has no digits");
2503 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2504 assert(slen - 2 && "Invalid string");
2505 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2509 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2512 /* Write out a hexadecimal representation of the floating point value
2513 to DST, which must be of sufficient size, in the C99 form
2514 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2515 excluding the terminating NUL.
2517 If UPPERCASE, the output is in upper case, otherwise in lower case.
2519 HEXDIGITS digits appear altogether, rounding the value if
2520 necessary. If HEXDIGITS is 0, the minimal precision to display the
2521 number precisely is used instead. If nothing would appear after
2522 the decimal point it is suppressed.
2524 The decimal exponent is always printed and has at least one digit.
2525 Zero values display an exponent of zero. Infinities and NaNs
2526 appear as "infinity" or "nan" respectively.
2528 The above rules are as specified by C99. There is ambiguity about
2529 what the leading hexadecimal digit should be. This implementation
2530 uses whatever is necessary so that the exponent is displayed as
2531 stored. This implies the exponent will fall within the IEEE format
2532 range, and the leading hexadecimal digit will be 0 (for denormals),
2533 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2534 any other digits zero).
2537 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2538 bool upperCase, roundingMode rounding_mode) const
2548 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2549 dst += sizeof infinityL - 1;
2553 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2554 dst += sizeof NaNU - 1;
2559 *dst++ = upperCase ? 'X': 'x';
2561 if (hexDigits > 1) {
2563 memset (dst, '0', hexDigits - 1);
2564 dst += hexDigits - 1;
2566 *dst++ = upperCase ? 'P': 'p';
2571 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2577 return static_cast<unsigned int>(dst - p);
2580 /* Does the hard work of outputting the correctly rounded hexadecimal
2581 form of a normal floating point number with the specified number of
2582 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2583 digits necessary to print the value precisely is output. */
2585 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2587 roundingMode rounding_mode) const
2589 unsigned int count, valueBits, shift, partsCount, outputDigits;
2590 const char *hexDigitChars;
2591 const integerPart *significand;
2596 *dst++ = upperCase ? 'X': 'x';
2599 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2601 significand = significandParts();
2602 partsCount = partCount();
2604 /* +3 because the first digit only uses the single integer bit, so
2605 we have 3 virtual zero most-significant-bits. */
2606 valueBits = semantics->precision + 3;
2607 shift = integerPartWidth - valueBits % integerPartWidth;
2609 /* The natural number of digits required ignoring trailing
2610 insignificant zeroes. */
2611 outputDigits = (valueBits - significandLSB () + 3) / 4;
2613 /* hexDigits of zero means use the required number for the
2614 precision. Otherwise, see if we are truncating. If we are,
2615 find out if we need to round away from zero. */
2617 if (hexDigits < outputDigits) {
2618 /* We are dropping non-zero bits, so need to check how to round.
2619 "bits" is the number of dropped bits. */
2621 lostFraction fraction;
2623 bits = valueBits - hexDigits * 4;
2624 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2625 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2627 outputDigits = hexDigits;
2630 /* Write the digits consecutively, and start writing in the location
2631 of the hexadecimal point. We move the most significant digit
2632 left and add the hexadecimal point later. */
2635 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2637 while (outputDigits && count) {
2640 /* Put the most significant integerPartWidth bits in "part". */
2641 if (--count == partsCount)
2642 part = 0; /* An imaginary higher zero part. */
2644 part = significand[count] << shift;
2647 part |= significand[count - 1] >> (integerPartWidth - shift);
2649 /* Convert as much of "part" to hexdigits as we can. */
2650 unsigned int curDigits = integerPartWidth / 4;
2652 if (curDigits > outputDigits)
2653 curDigits = outputDigits;
2654 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2655 outputDigits -= curDigits;
2661 /* Note that hexDigitChars has a trailing '0'. */
2664 *q = hexDigitChars[hexDigitValue (*q) + 1];
2665 } while (*q == '0');
2668 /* Add trailing zeroes. */
2669 memset (dst, '0', outputDigits);
2670 dst += outputDigits;
2673 /* Move the most significant digit to before the point, and if there
2674 is something after the decimal point add it. This must come
2675 after rounding above. */
2682 /* Finally output the exponent. */
2683 *dst++ = upperCase ? 'P': 'p';
2685 return writeSignedDecimal (dst, exponent);
2688 hash_code llvm::hash_value(const APFloat &Arg) {
2689 if (Arg.category != APFloat::fcNormal)
2690 return hash_combine((uint8_t)Arg.category,
2691 // NaN has no sign, fix it at zero.
2692 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2693 Arg.semantics->precision);
2695 // Normal floats need their exponent and significand hashed.
2696 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2697 Arg.semantics->precision, Arg.exponent,
2699 Arg.significandParts(),
2700 Arg.significandParts() + Arg.partCount()));
2703 // Conversion from APFloat to/from host float/double. It may eventually be
2704 // possible to eliminate these and have everybody deal with APFloats, but that
2705 // will take a while. This approach will not easily extend to long double.
2706 // Current implementation requires integerPartWidth==64, which is correct at
2707 // the moment but could be made more general.
2709 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2710 // the actual IEEE respresentations. We compensate for that here.
2713 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2715 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2716 assert(partCount()==2);
2718 uint64_t myexponent, mysignificand;
2720 if (category==fcNormal) {
2721 myexponent = exponent+16383; //bias
2722 mysignificand = significandParts()[0];
2723 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2724 myexponent = 0; // denormal
2725 } else if (category==fcZero) {
2728 } else if (category==fcInfinity) {
2729 myexponent = 0x7fff;
2730 mysignificand = 0x8000000000000000ULL;
2732 assert(category == fcNaN && "Unknown category");
2733 myexponent = 0x7fff;
2734 mysignificand = significandParts()[0];
2738 words[0] = mysignificand;
2739 words[1] = ((uint64_t)(sign & 1) << 15) |
2740 (myexponent & 0x7fffLL);
2741 return APInt(80, words);
2745 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2747 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2748 assert(partCount()==2);
2754 // Convert number to double. To avoid spurious underflows, we re-
2755 // normalize against the "double" minExponent first, and only *then*
2756 // truncate the mantissa. The result of that second conversion
2757 // may be inexact, but should never underflow.
2758 // Declare fltSemantics before APFloat that uses it (and
2759 // saves pointer to it) to ensure correct destruction order.
2760 fltSemantics extendedSemantics = *semantics;
2761 extendedSemantics.minExponent = IEEEdouble.minExponent;
2762 APFloat extended(*this);
2763 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2764 assert(fs == opOK && !losesInfo);
2767 APFloat u(extended);
2768 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2769 assert(fs == opOK || fs == opInexact);
2771 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2773 // If conversion was exact or resulted in a special case, we're done;
2774 // just set the second double to zero. Otherwise, re-convert back to
2775 // the extended format and compute the difference. This now should
2776 // convert exactly to double.
2777 if (u.category == fcNormal && losesInfo) {
2778 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2779 assert(fs == opOK && !losesInfo);
2782 APFloat v(extended);
2783 v.subtract(u, rmNearestTiesToEven);
2784 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2785 assert(fs == opOK && !losesInfo);
2787 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2792 return APInt(128, words);
2796 APFloat::convertQuadrupleAPFloatToAPInt() const
2798 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2799 assert(partCount()==2);
2801 uint64_t myexponent, mysignificand, mysignificand2;
2803 if (category==fcNormal) {
2804 myexponent = exponent+16383; //bias
2805 mysignificand = significandParts()[0];
2806 mysignificand2 = significandParts()[1];
2807 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2808 myexponent = 0; // denormal
2809 } else if (category==fcZero) {
2811 mysignificand = mysignificand2 = 0;
2812 } else if (category==fcInfinity) {
2813 myexponent = 0x7fff;
2814 mysignificand = mysignificand2 = 0;
2816 assert(category == fcNaN && "Unknown category!");
2817 myexponent = 0x7fff;
2818 mysignificand = significandParts()[0];
2819 mysignificand2 = significandParts()[1];
2823 words[0] = mysignificand;
2824 words[1] = ((uint64_t)(sign & 1) << 63) |
2825 ((myexponent & 0x7fff) << 48) |
2826 (mysignificand2 & 0xffffffffffffLL);
2828 return APInt(128, words);
2832 APFloat::convertDoubleAPFloatToAPInt() const
2834 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2835 assert(partCount()==1);
2837 uint64_t myexponent, mysignificand;
2839 if (category==fcNormal) {
2840 myexponent = exponent+1023; //bias
2841 mysignificand = *significandParts();
2842 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2843 myexponent = 0; // denormal
2844 } else if (category==fcZero) {
2847 } else if (category==fcInfinity) {
2851 assert(category == fcNaN && "Unknown category!");
2853 mysignificand = *significandParts();
2856 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2857 ((myexponent & 0x7ff) << 52) |
2858 (mysignificand & 0xfffffffffffffLL))));
2862 APFloat::convertFloatAPFloatToAPInt() const
2864 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2865 assert(partCount()==1);
2867 uint32_t myexponent, mysignificand;
2869 if (category==fcNormal) {
2870 myexponent = exponent+127; //bias
2871 mysignificand = (uint32_t)*significandParts();
2872 if (myexponent == 1 && !(mysignificand & 0x800000))
2873 myexponent = 0; // denormal
2874 } else if (category==fcZero) {
2877 } else if (category==fcInfinity) {
2881 assert(category == fcNaN && "Unknown category!");
2883 mysignificand = (uint32_t)*significandParts();
2886 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2887 (mysignificand & 0x7fffff)));
2891 APFloat::convertHalfAPFloatToAPInt() const
2893 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2894 assert(partCount()==1);
2896 uint32_t myexponent, mysignificand;
2898 if (category==fcNormal) {
2899 myexponent = exponent+15; //bias
2900 mysignificand = (uint32_t)*significandParts();
2901 if (myexponent == 1 && !(mysignificand & 0x400))
2902 myexponent = 0; // denormal
2903 } else if (category==fcZero) {
2906 } else if (category==fcInfinity) {
2910 assert(category == fcNaN && "Unknown category!");
2912 mysignificand = (uint32_t)*significandParts();
2915 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2916 (mysignificand & 0x3ff)));
2919 // This function creates an APInt that is just a bit map of the floating
2920 // point constant as it would appear in memory. It is not a conversion,
2921 // and treating the result as a normal integer is unlikely to be useful.
2924 APFloat::bitcastToAPInt() const
2926 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2927 return convertHalfAPFloatToAPInt();
2929 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2930 return convertFloatAPFloatToAPInt();
2932 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2933 return convertDoubleAPFloatToAPInt();
2935 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2936 return convertQuadrupleAPFloatToAPInt();
2938 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2939 return convertPPCDoubleDoubleAPFloatToAPInt();
2941 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2943 return convertF80LongDoubleAPFloatToAPInt();
2947 APFloat::convertToFloat() const
2949 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2950 "Float semantics are not IEEEsingle");
2951 APInt api = bitcastToAPInt();
2952 return api.bitsToFloat();
2956 APFloat::convertToDouble() const
2958 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2959 "Float semantics are not IEEEdouble");
2960 APInt api = bitcastToAPInt();
2961 return api.bitsToDouble();
2964 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2965 /// does not support these bit patterns:
2966 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2967 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2968 /// exponent = 0, integer bit 1 ("pseudodenormal")
2969 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2970 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2972 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2974 assert(api.getBitWidth()==80);
2975 uint64_t i1 = api.getRawData()[0];
2976 uint64_t i2 = api.getRawData()[1];
2977 uint64_t myexponent = (i2 & 0x7fff);
2978 uint64_t mysignificand = i1;
2980 initialize(&APFloat::x87DoubleExtended);
2981 assert(partCount()==2);
2983 sign = static_cast<unsigned int>(i2>>15);
2984 if (myexponent==0 && mysignificand==0) {
2985 // exponent, significand meaningless
2987 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2988 // exponent, significand meaningless
2989 category = fcInfinity;
2990 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2991 // exponent meaningless
2993 significandParts()[0] = mysignificand;
2994 significandParts()[1] = 0;
2996 category = fcNormal;
2997 exponent = myexponent - 16383;
2998 significandParts()[0] = mysignificand;
2999 significandParts()[1] = 0;
3000 if (myexponent==0) // denormal
3006 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3008 assert(api.getBitWidth()==128);
3009 uint64_t i1 = api.getRawData()[0];
3010 uint64_t i2 = api.getRawData()[1];
3014 // Get the first double and convert to our format.
3015 initFromDoubleAPInt(APInt(64, i1));
3016 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3017 assert(fs == opOK && !losesInfo);
3020 // Unless we have a special case, add in second double.
3021 if (category == fcNormal) {
3022 APFloat v(IEEEdouble, APInt(64, i2));
3023 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3024 assert(fs == opOK && !losesInfo);
3027 add(v, rmNearestTiesToEven);
3032 APFloat::initFromQuadrupleAPInt(const APInt &api)
3034 assert(api.getBitWidth()==128);
3035 uint64_t i1 = api.getRawData()[0];
3036 uint64_t i2 = api.getRawData()[1];
3037 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3038 uint64_t mysignificand = i1;
3039 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3041 initialize(&APFloat::IEEEquad);
3042 assert(partCount()==2);
3044 sign = static_cast<unsigned int>(i2>>63);
3045 if (myexponent==0 &&
3046 (mysignificand==0 && mysignificand2==0)) {
3047 // exponent, significand meaningless
3049 } else if (myexponent==0x7fff &&
3050 (mysignificand==0 && mysignificand2==0)) {
3051 // exponent, significand meaningless
3052 category = fcInfinity;
3053 } else if (myexponent==0x7fff &&
3054 (mysignificand!=0 || mysignificand2 !=0)) {
3055 // exponent meaningless
3057 significandParts()[0] = mysignificand;
3058 significandParts()[1] = mysignificand2;
3060 category = fcNormal;
3061 exponent = myexponent - 16383;
3062 significandParts()[0] = mysignificand;
3063 significandParts()[1] = mysignificand2;
3064 if (myexponent==0) // denormal
3067 significandParts()[1] |= 0x1000000000000LL; // integer bit
3072 APFloat::initFromDoubleAPInt(const APInt &api)
3074 assert(api.getBitWidth()==64);
3075 uint64_t i = *api.getRawData();
3076 uint64_t myexponent = (i >> 52) & 0x7ff;
3077 uint64_t mysignificand = i & 0xfffffffffffffLL;
3079 initialize(&APFloat::IEEEdouble);
3080 assert(partCount()==1);
3082 sign = static_cast<unsigned int>(i>>63);
3083 if (myexponent==0 && mysignificand==0) {
3084 // exponent, significand meaningless
3086 } else if (myexponent==0x7ff && mysignificand==0) {
3087 // exponent, significand meaningless
3088 category = fcInfinity;
3089 } else if (myexponent==0x7ff && mysignificand!=0) {
3090 // exponent meaningless
3092 *significandParts() = mysignificand;
3094 category = fcNormal;
3095 exponent = myexponent - 1023;
3096 *significandParts() = mysignificand;
3097 if (myexponent==0) // denormal
3100 *significandParts() |= 0x10000000000000LL; // integer bit
3105 APFloat::initFromFloatAPInt(const APInt & api)
3107 assert(api.getBitWidth()==32);
3108 uint32_t i = (uint32_t)*api.getRawData();
3109 uint32_t myexponent = (i >> 23) & 0xff;
3110 uint32_t mysignificand = i & 0x7fffff;
3112 initialize(&APFloat::IEEEsingle);
3113 assert(partCount()==1);
3116 if (myexponent==0 && mysignificand==0) {
3117 // exponent, significand meaningless
3119 } else if (myexponent==0xff && mysignificand==0) {
3120 // exponent, significand meaningless
3121 category = fcInfinity;
3122 } else if (myexponent==0xff && mysignificand!=0) {
3123 // sign, exponent, significand meaningless
3125 *significandParts() = mysignificand;
3127 category = fcNormal;
3128 exponent = myexponent - 127; //bias
3129 *significandParts() = mysignificand;
3130 if (myexponent==0) // denormal
3133 *significandParts() |= 0x800000; // integer bit
3138 APFloat::initFromHalfAPInt(const APInt & api)
3140 assert(api.getBitWidth()==16);
3141 uint32_t i = (uint32_t)*api.getRawData();
3142 uint32_t myexponent = (i >> 10) & 0x1f;
3143 uint32_t mysignificand = i & 0x3ff;
3145 initialize(&APFloat::IEEEhalf);
3146 assert(partCount()==1);
3149 if (myexponent==0 && mysignificand==0) {
3150 // exponent, significand meaningless
3152 } else if (myexponent==0x1f && mysignificand==0) {
3153 // exponent, significand meaningless
3154 category = fcInfinity;
3155 } else if (myexponent==0x1f && mysignificand!=0) {
3156 // sign, exponent, significand meaningless
3158 *significandParts() = mysignificand;
3160 category = fcNormal;
3161 exponent = myexponent - 15; //bias
3162 *significandParts() = mysignificand;
3163 if (myexponent==0) // denormal
3166 *significandParts() |= 0x400; // integer bit
3170 /// Treat api as containing the bits of a floating point number. Currently
3171 /// we infer the floating point type from the size of the APInt. The
3172 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3173 /// when the size is anything else).
3175 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3177 if (Sem == &IEEEhalf)
3178 return initFromHalfAPInt(api);
3179 if (Sem == &IEEEsingle)
3180 return initFromFloatAPInt(api);
3181 if (Sem == &IEEEdouble)
3182 return initFromDoubleAPInt(api);
3183 if (Sem == &x87DoubleExtended)
3184 return initFromF80LongDoubleAPInt(api);
3185 if (Sem == &IEEEquad)
3186 return initFromQuadrupleAPInt(api);
3187 if (Sem == &PPCDoubleDouble)
3188 return initFromPPCDoubleDoubleAPInt(api);
3190 llvm_unreachable(0);
3194 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3198 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3200 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3202 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3204 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3207 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3208 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3210 llvm_unreachable("Unknown floating bit width");
3214 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3215 APFloat Val(Sem, fcNormal, Negative);
3217 // We want (in interchange format):
3218 // sign = {Negative}
3220 // significand = 1..1
3222 Val.exponent = Sem.maxExponent; // unbiased
3224 // 1-initialize all bits....
3225 Val.zeroSignificand();
3226 integerPart *significand = Val.significandParts();
3227 unsigned N = partCountForBits(Sem.precision);
3228 for (unsigned i = 0; i != N; ++i)
3229 significand[i] = ~((integerPart) 0);
3231 // ...and then clear the top bits for internal consistency.
3232 if (Sem.precision % integerPartWidth != 0)
3234 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3239 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3240 APFloat Val(Sem, fcNormal, Negative);
3242 // We want (in interchange format):
3243 // sign = {Negative}
3245 // significand = 0..01
3247 Val.exponent = Sem.minExponent; // unbiased
3248 Val.zeroSignificand();
3249 Val.significandParts()[0] = 1;
3253 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3254 APFloat Val(Sem, fcNormal, Negative);
3256 // We want (in interchange format):
3257 // sign = {Negative}
3259 // significand = 10..0
3261 Val.exponent = Sem.minExponent;
3262 Val.zeroSignificand();
3263 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3264 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3269 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3270 initFromAPInt(&Sem, API);
3273 APFloat::APFloat(float f) {
3274 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3277 APFloat::APFloat(double d) {
3278 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3282 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3283 Buffer.append(Str.begin(), Str.end());
3286 /// Removes data from the given significand until it is no more
3287 /// precise than is required for the desired precision.
3288 void AdjustToPrecision(APInt &significand,
3289 int &exp, unsigned FormatPrecision) {
3290 unsigned bits = significand.getActiveBits();
3292 // 196/59 is a very slight overestimate of lg_2(10).
3293 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3295 if (bits <= bitsRequired) return;
3297 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3298 if (!tensRemovable) return;
3300 exp += tensRemovable;
3302 APInt divisor(significand.getBitWidth(), 1);
3303 APInt powten(significand.getBitWidth(), 10);
3305 if (tensRemovable & 1)
3307 tensRemovable >>= 1;
3308 if (!tensRemovable) break;
3312 significand = significand.udiv(divisor);
3314 // Truncate the significand down to its active bit count, but
3315 // don't try to drop below 32.
3316 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3317 significand = significand.trunc(newPrecision);
3321 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3322 int &exp, unsigned FormatPrecision) {
3323 unsigned N = buffer.size();
3324 if (N <= FormatPrecision) return;
3326 // The most significant figures are the last ones in the buffer.
3327 unsigned FirstSignificant = N - FormatPrecision;
3330 // FIXME: this probably shouldn't use 'round half up'.
3332 // Rounding down is just a truncation, except we also want to drop
3333 // trailing zeros from the new result.
3334 if (buffer[FirstSignificant - 1] < '5') {
3335 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3338 exp += FirstSignificant;
3339 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3343 // Rounding up requires a decimal add-with-carry. If we continue
3344 // the carry, the newly-introduced zeros will just be truncated.
3345 for (unsigned I = FirstSignificant; I != N; ++I) {
3346 if (buffer[I] == '9') {
3354 // If we carried through, we have exactly one digit of precision.
3355 if (FirstSignificant == N) {
3356 exp += FirstSignificant;
3358 buffer.push_back('1');
3362 exp += FirstSignificant;
3363 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3367 void APFloat::toString(SmallVectorImpl<char> &Str,
3368 unsigned FormatPrecision,
3369 unsigned FormatMaxPadding) const {
3373 return append(Str, "-Inf");
3375 return append(Str, "+Inf");
3377 case fcNaN: return append(Str, "NaN");
3383 if (!FormatMaxPadding)
3384 append(Str, "0.0E+0");
3396 // Decompose the number into an APInt and an exponent.
3397 int exp = exponent - ((int) semantics->precision - 1);
3398 APInt significand(semantics->precision,
3399 makeArrayRef(significandParts(),
3400 partCountForBits(semantics->precision)));
3402 // Set FormatPrecision if zero. We want to do this before we
3403 // truncate trailing zeros, as those are part of the precision.
3404 if (!FormatPrecision) {
3405 // It's an interesting question whether to use the nominal
3406 // precision or the active precision here for denormals.
3408 // FormatPrecision = ceil(significandBits / lg_2(10))
3409 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3412 // Ignore trailing binary zeros.
3413 int trailingZeros = significand.countTrailingZeros();
3414 exp += trailingZeros;
3415 significand = significand.lshr(trailingZeros);
3417 // Change the exponent from 2^e to 10^e.
3420 } else if (exp > 0) {
3422 significand = significand.zext(semantics->precision + exp);
3423 significand <<= exp;
3425 } else { /* exp < 0 */
3428 // We transform this using the identity:
3429 // (N)(2^-e) == (N)(5^e)(10^-e)
3430 // This means we have to multiply N (the significand) by 5^e.
3431 // To avoid overflow, we have to operate on numbers large
3432 // enough to store N * 5^e:
3433 // log2(N * 5^e) == log2(N) + e * log2(5)
3434 // <= semantics->precision + e * 137 / 59
3435 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3437 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3439 // Multiply significand by 5^e.
3440 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3441 significand = significand.zext(precision);
3442 APInt five_to_the_i(precision, 5);
3444 if (texp & 1) significand *= five_to_the_i;
3448 five_to_the_i *= five_to_the_i;
3452 AdjustToPrecision(significand, exp, FormatPrecision);
3454 SmallVector<char, 256> buffer;
3457 unsigned precision = significand.getBitWidth();
3458 APInt ten(precision, 10);
3459 APInt digit(precision, 0);
3461 bool inTrail = true;
3462 while (significand != 0) {
3463 // digit <- significand % 10
3464 // significand <- significand / 10
3465 APInt::udivrem(significand, ten, significand, digit);
3467 unsigned d = digit.getZExtValue();
3469 // Drop trailing zeros.
3470 if (inTrail && !d) exp++;
3472 buffer.push_back((char) ('0' + d));
3477 assert(!buffer.empty() && "no characters in buffer!");
3479 // Drop down to FormatPrecision.
3480 // TODO: don't do more precise calculations above than are required.
3481 AdjustToPrecision(buffer, exp, FormatPrecision);
3483 unsigned NDigits = buffer.size();
3485 // Check whether we should use scientific notation.
3486 bool FormatScientific;
3487 if (!FormatMaxPadding)
3488 FormatScientific = true;
3493 // But we shouldn't make the number look more precise than it is.
3494 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3495 NDigits + (unsigned) exp > FormatPrecision);
3497 // Power of the most significant digit.
3498 int MSD = exp + (int) (NDigits - 1);
3501 FormatScientific = false;
3503 // 765e-5 == 0.00765
3505 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3510 // Scientific formatting is pretty straightforward.
3511 if (FormatScientific) {
3512 exp += (NDigits - 1);
3514 Str.push_back(buffer[NDigits-1]);
3519 for (unsigned I = 1; I != NDigits; ++I)
3520 Str.push_back(buffer[NDigits-1-I]);
3523 Str.push_back(exp >= 0 ? '+' : '-');
3524 if (exp < 0) exp = -exp;
3525 SmallVector<char, 6> expbuf;
3527 expbuf.push_back((char) ('0' + (exp % 10)));
3530 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3531 Str.push_back(expbuf[E-1-I]);
3535 // Non-scientific, positive exponents.
3537 for (unsigned I = 0; I != NDigits; ++I)
3538 Str.push_back(buffer[NDigits-1-I]);
3539 for (unsigned I = 0; I != (unsigned) exp; ++I)
3544 // Non-scientific, negative exponents.
3546 // The number of digits to the left of the decimal point.
3547 int NWholeDigits = exp + (int) NDigits;
3550 if (NWholeDigits > 0) {
3551 for (; I != (unsigned) NWholeDigits; ++I)
3552 Str.push_back(buffer[NDigits-I-1]);
3555 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3559 for (unsigned Z = 1; Z != NZeros; ++Z)
3563 for (; I != NDigits; ++I)
3564 Str.push_back(buffer[NDigits-I-1]);
3567 bool APFloat::getExactInverse(APFloat *inv) const {
3568 // Special floats and denormals have no exact inverse.
3569 if (category != fcNormal)
3572 // Check that the number is a power of two by making sure that only the
3573 // integer bit is set in the significand.
3574 if (significandLSB() != semantics->precision - 1)
3578 APFloat reciprocal(*semantics, 1ULL);
3579 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3582 // Avoid multiplication with a denormal, it is not safe on all platforms and
3583 // may be slower than a normal division.
3584 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3587 assert(reciprocal.category == fcNormal &&
3588 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);