1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
30 /* Assumed in hexadecimal significand parsing, and conversion to
31 hexadecimal strings. */
32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
37 /* Represents floating point arithmetic semantics. */
39 /* The largest E such that 2^E is representable; this matches the
40 definition of IEEE 754. */
41 exponent_t maxExponent;
43 /* The smallest E such that 2^E is a normalized number; this
44 matches the definition of IEEE 754. */
45 exponent_t minExponent;
47 /* Number of bits in the significand. This includes the integer
49 unsigned int precision;
52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
57 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
59 /* The PowerPC format consists of two doubles. It does not map cleanly
60 onto the usual format above. It is approximated using twice the
61 mantissa bits. Note that for exponents near the double minimum,
62 we no longer can represent the full 106 mantissa bits, so those
63 will be treated as denormal numbers.
65 FIXME: While this approximation is equivalent to what GCC uses for
66 compile-time arithmetic on PPC double-double numbers, it is not able
67 to represent all possible values held by a PPC double-double number,
68 for example: (long double) 1.0 + (long double) 0x1p-106
69 Should this be replaced by a full emulation of PPC double-double? */
70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
72 /* A tight upper bound on number of parts required to hold the value
75 power * 815 / (351 * integerPartWidth) + 1
77 However, whilst the result may require only this many parts,
78 because we are multiplying two values to get it, the
79 multiplication may require an extra part with the excess part
80 being zero (consider the trivial case of 1 * 1, tcFullMultiply
81 requires two parts to hold the single-part result). So we add an
82 extra one to guarantee enough space whilst multiplying. */
83 const unsigned int maxExponent = 16383;
84 const unsigned int maxPrecision = 113;
85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
87 / (351 * integerPartWidth));
90 /* A bunch of private, handy routines. */
92 static inline unsigned int
93 partCountForBits(unsigned int bits)
95 return ((bits) + integerPartWidth - 1) / integerPartWidth;
98 /* Returns 0U-9U. Return values >= 10U are not digits. */
99 static inline unsigned int
100 decDigitValue(unsigned int c)
105 /* Return the value of a decimal exponent of the form
108 If the exponent overflows, returns a large exponent with the
111 readExponent(StringRef::iterator begin, StringRef::iterator end)
114 unsigned int absExponent;
115 const unsigned int overlargeExponent = 24000; /* FIXME. */
116 StringRef::iterator p = begin;
118 assert(p != end && "Exponent has no digits");
120 isNegative = (*p == '-');
121 if (*p == '-' || *p == '+') {
123 assert(p != end && "Exponent has no digits");
126 absExponent = decDigitValue(*p++);
127 assert(absExponent < 10U && "Invalid character in exponent");
129 for (; p != end; ++p) {
132 value = decDigitValue(*p);
133 assert(value < 10U && "Invalid character in exponent");
135 value += absExponent * 10;
136 if (absExponent >= overlargeExponent) {
137 absExponent = overlargeExponent;
138 p = end; /* outwit assert below */
144 assert(p == end && "Invalid exponent in exponent");
147 return -(int) absExponent;
149 return (int) absExponent;
152 /* This is ugly and needs cleaning up, but I don't immediately see
153 how whilst remaining safe. */
155 totalExponent(StringRef::iterator p, StringRef::iterator end,
156 int exponentAdjustment)
158 int unsignedExponent;
159 bool negative, overflow;
162 assert(p != end && "Exponent has no digits");
164 negative = *p == '-';
165 if (*p == '-' || *p == '+') {
167 assert(p != end && "Exponent has no digits");
170 unsignedExponent = 0;
172 for (; p != end; ++p) {
175 value = decDigitValue(*p);
176 assert(value < 10U && "Invalid character in exponent");
178 unsignedExponent = unsignedExponent * 10 + value;
179 if (unsignedExponent > 32767) {
185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
189 exponent = unsignedExponent;
191 exponent = -exponent;
192 exponent += exponentAdjustment;
193 if (exponent > 32767 || exponent < -32768)
198 exponent = negative ? -32768: 32767;
203 static StringRef::iterator
204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
205 StringRef::iterator *dot)
207 StringRef::iterator p = begin;
209 while (*p == '0' && p != end)
215 assert(end - begin != 1 && "Significand has no digits");
217 while (*p == '0' && p != end)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
248 StringRef::iterator dot = end;
249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
255 for (; p != end; ++p) {
257 assert(dot == end && "String contains multiple dots");
262 if (decDigitValue(*p) >= 10U)
267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
268 assert(p != begin && "Significand has no digits");
269 assert((dot == end || p - begin != 1) && "Significand has no digits");
271 /* p points to the first non-digit in the string */
272 D->exponent = readExponent(p + 1, end);
274 /* Implied decimal point? */
279 /* If number is all zeroes accept any exponent. */
280 if (p != D->firstSigDigit) {
281 /* Drop insignificant trailing zeroes. */
286 while (p != begin && *p == '0');
287 while (p != begin && *p == '.');
290 /* Adjust the exponents for any decimal point. */
291 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
292 D->normalizedExponent = (D->exponent +
293 static_cast<exponent_t>((p - D->firstSigDigit)
294 - (dot > D->firstSigDigit && dot < p)));
300 /* Return the trailing fraction of a hexadecimal number.
301 DIGITVALUE is the first hex digit of the fraction, P points to
304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
305 unsigned int digitValue)
307 unsigned int hexDigit;
309 /* If the first trailing digit isn't 0 or 8 we can work out the
310 fraction immediately. */
312 return lfMoreThanHalf;
313 else if (digitValue < 8 && digitValue > 0)
314 return lfLessThanHalf;
316 /* Otherwise we need to find the first non-zero digit. */
320 assert(p != end && "Invalid trailing hexadecimal fraction!");
322 hexDigit = hexDigitValue(*p);
324 /* If we ran off the end it is exactly zero or one-half, otherwise
327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
332 /* Return the fraction lost were a bignum truncated losing the least
333 significant BITS bits. */
335 lostFractionThroughTruncation(const integerPart *parts,
336 unsigned int partCount,
341 lsb = APInt::tcLSB(parts, partCount);
343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
345 return lfExactlyZero;
347 return lfExactlyHalf;
348 if (bits <= partCount * integerPartWidth &&
349 APInt::tcExtractBit(parts, bits - 1))
350 return lfMoreThanHalf;
352 return lfLessThanHalf;
355 /* Shift DST right BITS bits noting lost fraction. */
357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
359 lostFraction lost_fraction;
361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
363 APInt::tcShiftRight(dst, parts, bits);
365 return lost_fraction;
368 /* Combine the effect of two lost fractions. */
370 combineLostFractions(lostFraction moreSignificant,
371 lostFraction lessSignificant)
373 if (lessSignificant != lfExactlyZero) {
374 if (moreSignificant == lfExactlyZero)
375 moreSignificant = lfLessThanHalf;
376 else if (moreSignificant == lfExactlyHalf)
377 moreSignificant = lfMoreThanHalf;
380 return moreSignificant;
383 /* The error from the true value, in half-ulps, on multiplying two
384 floating point numbers, which differ from the value they
385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
386 than the returned value.
388 See "How to Read Floating Point Numbers Accurately" by William D
391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
395 if (HUerr1 + HUerr2 == 0)
396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
398 return inexactMultiply + 2 * (HUerr1 + HUerr2);
401 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
402 when the least significant BITS are truncated. BITS cannot be
405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
407 unsigned int count, partBits;
408 integerPart part, boundary;
413 count = bits / integerPartWidth;
414 partBits = bits % integerPartWidth + 1;
416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
419 boundary = (integerPart) 1 << (partBits - 1);
424 if (part - boundary <= boundary - part)
425 return part - boundary;
427 return boundary - part;
430 if (part == boundary) {
433 return ~(integerPart) 0; /* A lot. */
436 } else if (part == boundary - 1) {
439 return ~(integerPart) 0; /* A lot. */
444 return ~(integerPart) 0; /* A lot. */
447 /* Place pow(5, power) in DST, and return the number of parts used.
448 DST must be at least one part larger than size of the answer. */
450 powerOf5(integerPart *dst, unsigned int power)
452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
455 pow5s[0] = 78125 * 5;
457 unsigned int partsCount[16] = { 1 };
458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
460 assert(power <= maxExponent);
465 *p1 = firstEightPowers[power & 7];
471 for (unsigned int n = 0; power; power >>= 1, n++) {
476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
478 pc = partsCount[n - 1];
479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
481 if (pow5[pc - 1] == 0)
489 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
491 if (p2[result - 1] == 0)
494 /* Now result is in p1 with partsCount parts and p2 is scratch
496 tmp = p1, p1 = p2, p2 = tmp;
503 APInt::tcAssign(dst, p1, result);
508 /* Zero at the end to avoid modular arithmetic when adding one; used
509 when rounding up during hexadecimal output. */
510 static const char hexDigitsLower[] = "0123456789abcdef0";
511 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
512 static const char infinityL[] = "infinity";
513 static const char infinityU[] = "INFINITY";
514 static const char NaNL[] = "nan";
515 static const char NaNU[] = "NAN";
517 /* Write out an integerPart in hexadecimal, starting with the most
518 significant nibble. Write out exactly COUNT hexdigits, return
521 partAsHex (char *dst, integerPart part, unsigned int count,
522 const char *hexDigitChars)
524 unsigned int result = count;
526 assert(count != 0 && count <= integerPartWidth / 4);
528 part >>= (integerPartWidth - 4 * count);
530 dst[count] = hexDigitChars[part & 0xf];
537 /* Write out an unsigned decimal integer. */
539 writeUnsignedDecimal (char *dst, unsigned int n)
555 /* Write out a signed decimal integer. */
557 writeSignedDecimal (char *dst, int value)
561 dst = writeUnsignedDecimal(dst, -(unsigned) value);
563 dst = writeUnsignedDecimal(dst, value);
570 APFloat::initialize(const fltSemantics *ourSemantics)
574 semantics = ourSemantics;
577 significand.parts = new integerPart[count];
581 APFloat::freeSignificand()
584 delete [] significand.parts;
588 APFloat::assign(const APFloat &rhs)
590 assert(semantics == rhs.semantics);
593 category = rhs.category;
594 exponent = rhs.exponent;
595 if (category == fcNormal || category == fcNaN)
596 copySignificand(rhs);
600 APFloat::copySignificand(const APFloat &rhs)
602 assert(category == fcNormal || category == fcNaN);
603 assert(rhs.partCount() >= partCount());
605 APInt::tcAssign(significandParts(), rhs.significandParts(),
609 /* Make this number a NaN, with an arbitrary but deterministic value
610 for the significand. If double or longer, this is a signalling NaN,
611 which may not be ideal. If float, this is QNaN(0). */
612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
617 integerPart *significand = significandParts();
618 unsigned numParts = partCount();
620 // Set the significand bits to the fill.
621 if (!fill || fill->getNumWords() < numParts)
622 APInt::tcSet(significand, 0, numParts);
624 APInt::tcAssign(significand, fill->getRawData(),
625 std::min(fill->getNumWords(), numParts));
627 // Zero out the excess bits of the significand.
628 unsigned bitsToPreserve = semantics->precision - 1;
629 unsigned part = bitsToPreserve / 64;
630 bitsToPreserve %= 64;
631 significand[part] &= ((1ULL << bitsToPreserve) - 1);
632 for (part++; part != numParts; ++part)
633 significand[part] = 0;
636 unsigned QNaNBit = semantics->precision - 2;
639 // We always have to clear the QNaN bit to make it an SNaN.
640 APInt::tcClearBit(significand, QNaNBit);
642 // If there are no bits set in the payload, we have to set
643 // *something* to make it a NaN instead of an infinity;
644 // conventionally, this is the next bit down from the QNaN bit.
645 if (APInt::tcIsZero(significand, numParts))
646 APInt::tcSetBit(significand, QNaNBit - 1);
648 // We always have to set the QNaN bit to make it a QNaN.
649 APInt::tcSetBit(significand, QNaNBit);
652 // For x87 extended precision, we want to make a NaN, not a
653 // pseudo-NaN. Maybe we should expose the ability to make
655 if (semantics == &APFloat::x87DoubleExtended)
656 APInt::tcSetBit(significand, QNaNBit + 1);
659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
661 APFloat value(Sem, uninitialized);
662 value.makeNaN(SNaN, Negative, fill);
667 APFloat::operator=(const APFloat &rhs)
670 if (semantics != rhs.semantics) {
672 initialize(rhs.semantics);
681 APFloat::isDenormal() const {
682 return isNormal() && (exponent == semantics->minExponent) &&
683 (APInt::tcExtractBit(significandParts(),
684 semantics->precision - 1) == 0);
688 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
691 if (semantics != rhs.semantics ||
692 category != rhs.category ||
695 if (category==fcZero || category==fcInfinity)
697 else if (category==fcNormal && exponent!=rhs.exponent)
701 const integerPart* p=significandParts();
702 const integerPart* q=rhs.significandParts();
703 for (; i>0; i--, p++, q++) {
711 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
712 initialize(&ourSemantics);
715 exponent = ourSemantics.precision - 1;
716 significandParts()[0] = value;
717 normalize(rmNearestTiesToEven, lfExactlyZero);
720 APFloat::APFloat(const fltSemantics &ourSemantics) {
721 initialize(&ourSemantics);
726 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
727 // Allocates storage if necessary but does not initialize it.
728 initialize(&ourSemantics);
731 APFloat::APFloat(const fltSemantics &ourSemantics,
732 fltCategory ourCategory, bool negative) {
733 initialize(&ourSemantics);
734 category = ourCategory;
736 if (category == fcNormal)
738 else if (ourCategory == fcNaN)
742 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
743 initialize(&ourSemantics);
744 convertFromString(text, rmNearestTiesToEven);
747 APFloat::APFloat(const APFloat &rhs) {
748 initialize(rhs.semantics);
757 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
758 void APFloat::Profile(FoldingSetNodeID& ID) const {
759 ID.Add(bitcastToAPInt());
763 APFloat::partCount() const
765 return partCountForBits(semantics->precision + 1);
769 APFloat::semanticsPrecision(const fltSemantics &semantics)
771 return semantics.precision;
775 APFloat::significandParts() const
777 return const_cast<APFloat *>(this)->significandParts();
781 APFloat::significandParts()
783 assert(category == fcNormal || category == fcNaN);
786 return significand.parts;
788 return &significand.part;
792 APFloat::zeroSignificand()
795 APInt::tcSet(significandParts(), 0, partCount());
798 /* Increment an fcNormal floating point number's significand. */
800 APFloat::incrementSignificand()
804 carry = APInt::tcIncrement(significandParts(), partCount());
806 /* Our callers should never cause us to overflow. */
811 /* Add the significand of the RHS. Returns the carry flag. */
813 APFloat::addSignificand(const APFloat &rhs)
817 parts = significandParts();
819 assert(semantics == rhs.semantics);
820 assert(exponent == rhs.exponent);
822 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
825 /* Subtract the significand of the RHS with a borrow flag. Returns
828 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
832 parts = significandParts();
834 assert(semantics == rhs.semantics);
835 assert(exponent == rhs.exponent);
837 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
841 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
842 on to the full-precision result of the multiplication. Returns the
845 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
847 unsigned int omsb; // One, not zero, based MSB.
848 unsigned int partsCount, newPartsCount, precision;
849 integerPart *lhsSignificand;
850 integerPart scratch[4];
851 integerPart *fullSignificand;
852 lostFraction lost_fraction;
855 assert(semantics == rhs.semantics);
857 precision = semantics->precision;
858 newPartsCount = partCountForBits(precision * 2);
860 if (newPartsCount > 4)
861 fullSignificand = new integerPart[newPartsCount];
863 fullSignificand = scratch;
865 lhsSignificand = significandParts();
866 partsCount = partCount();
868 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
869 rhs.significandParts(), partsCount, partsCount);
871 lost_fraction = lfExactlyZero;
872 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
873 exponent += rhs.exponent;
876 Significand savedSignificand = significand;
877 const fltSemantics *savedSemantics = semantics;
878 fltSemantics extendedSemantics;
880 unsigned int extendedPrecision;
882 /* Normalize our MSB. */
883 extendedPrecision = precision + precision - 1;
884 if (omsb != extendedPrecision) {
885 APInt::tcShiftLeft(fullSignificand, newPartsCount,
886 extendedPrecision - omsb);
887 exponent -= extendedPrecision - omsb;
890 /* Create new semantics. */
891 extendedSemantics = *semantics;
892 extendedSemantics.precision = extendedPrecision;
894 if (newPartsCount == 1)
895 significand.part = fullSignificand[0];
897 significand.parts = fullSignificand;
898 semantics = &extendedSemantics;
900 APFloat extendedAddend(*addend);
901 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
902 assert(status == opOK);
904 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
906 /* Restore our state. */
907 if (newPartsCount == 1)
908 fullSignificand[0] = significand.part;
909 significand = savedSignificand;
910 semantics = savedSemantics;
912 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
915 exponent -= (precision - 1);
917 if (omsb > precision) {
918 unsigned int bits, significantParts;
921 bits = omsb - precision;
922 significantParts = partCountForBits(omsb);
923 lf = shiftRight(fullSignificand, significantParts, bits);
924 lost_fraction = combineLostFractions(lf, lost_fraction);
928 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
930 if (newPartsCount > 4)
931 delete [] fullSignificand;
933 return lost_fraction;
936 /* Multiply the significands of LHS and RHS to DST. */
938 APFloat::divideSignificand(const APFloat &rhs)
940 unsigned int bit, i, partsCount;
941 const integerPart *rhsSignificand;
942 integerPart *lhsSignificand, *dividend, *divisor;
943 integerPart scratch[4];
944 lostFraction lost_fraction;
946 assert(semantics == rhs.semantics);
948 lhsSignificand = significandParts();
949 rhsSignificand = rhs.significandParts();
950 partsCount = partCount();
953 dividend = new integerPart[partsCount * 2];
957 divisor = dividend + partsCount;
959 /* Copy the dividend and divisor as they will be modified in-place. */
960 for (i = 0; i < partsCount; i++) {
961 dividend[i] = lhsSignificand[i];
962 divisor[i] = rhsSignificand[i];
963 lhsSignificand[i] = 0;
966 exponent -= rhs.exponent;
968 unsigned int precision = semantics->precision;
970 /* Normalize the divisor. */
971 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
974 APInt::tcShiftLeft(divisor, partsCount, bit);
977 /* Normalize the dividend. */
978 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
981 APInt::tcShiftLeft(dividend, partsCount, bit);
984 /* Ensure the dividend >= divisor initially for the loop below.
985 Incidentally, this means that the division loop below is
986 guaranteed to set the integer bit to one. */
987 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
989 APInt::tcShiftLeft(dividend, partsCount, 1);
990 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
994 for (bit = precision; bit; bit -= 1) {
995 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
996 APInt::tcSubtract(dividend, divisor, 0, partsCount);
997 APInt::tcSetBit(lhsSignificand, bit - 1);
1000 APInt::tcShiftLeft(dividend, partsCount, 1);
1003 /* Figure out the lost fraction. */
1004 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1007 lost_fraction = lfMoreThanHalf;
1009 lost_fraction = lfExactlyHalf;
1010 else if (APInt::tcIsZero(dividend, partsCount))
1011 lost_fraction = lfExactlyZero;
1013 lost_fraction = lfLessThanHalf;
1018 return lost_fraction;
1022 APFloat::significandMSB() const
1024 return APInt::tcMSB(significandParts(), partCount());
1028 APFloat::significandLSB() const
1030 return APInt::tcLSB(significandParts(), partCount());
1033 /* Note that a zero result is NOT normalized to fcZero. */
1035 APFloat::shiftSignificandRight(unsigned int bits)
1037 /* Our exponent should not overflow. */
1038 assert((exponent_t) (exponent + bits) >= exponent);
1042 return shiftRight(significandParts(), partCount(), bits);
1045 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1047 APFloat::shiftSignificandLeft(unsigned int bits)
1049 assert(bits < semantics->precision);
1052 unsigned int partsCount = partCount();
1054 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1057 assert(!APInt::tcIsZero(significandParts(), partsCount));
1062 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1066 assert(semantics == rhs.semantics);
1067 assert(category == fcNormal);
1068 assert(rhs.category == fcNormal);
1070 compare = exponent - rhs.exponent;
1072 /* If exponents are equal, do an unsigned bignum comparison of the
1075 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1079 return cmpGreaterThan;
1080 else if (compare < 0)
1086 /* Handle overflow. Sign is preserved. We either become infinity or
1087 the largest finite number. */
1089 APFloat::handleOverflow(roundingMode rounding_mode)
1092 if (rounding_mode == rmNearestTiesToEven ||
1093 rounding_mode == rmNearestTiesToAway ||
1094 (rounding_mode == rmTowardPositive && !sign) ||
1095 (rounding_mode == rmTowardNegative && sign)) {
1096 category = fcInfinity;
1097 return (opStatus) (opOverflow | opInexact);
1100 /* Otherwise we become the largest finite number. */
1101 category = fcNormal;
1102 exponent = semantics->maxExponent;
1103 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1104 semantics->precision);
1109 /* Returns TRUE if, when truncating the current number, with BIT the
1110 new LSB, with the given lost fraction and rounding mode, the result
1111 would need to be rounded away from zero (i.e., by increasing the
1112 signficand). This routine must work for fcZero of both signs, and
1113 fcNormal numbers. */
1115 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1116 lostFraction lost_fraction,
1117 unsigned int bit) const
1119 /* NaNs and infinities should not have lost fractions. */
1120 assert(category == fcNormal || category == fcZero);
1122 /* Current callers never pass this so we don't handle it. */
1123 assert(lost_fraction != lfExactlyZero);
1125 switch (rounding_mode) {
1126 case rmNearestTiesToAway:
1127 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1129 case rmNearestTiesToEven:
1130 if (lost_fraction == lfMoreThanHalf)
1133 /* Our zeroes don't have a significand to test. */
1134 if (lost_fraction == lfExactlyHalf && category != fcZero)
1135 return APInt::tcExtractBit(significandParts(), bit);
1142 case rmTowardPositive:
1143 return sign == false;
1145 case rmTowardNegative:
1146 return sign == true;
1148 llvm_unreachable("Invalid rounding mode found");
1152 APFloat::normalize(roundingMode rounding_mode,
1153 lostFraction lost_fraction)
1155 unsigned int omsb; /* One, not zero, based MSB. */
1158 if (category != fcNormal)
1161 /* Before rounding normalize the exponent of fcNormal numbers. */
1162 omsb = significandMSB() + 1;
1165 /* OMSB is numbered from 1. We want to place it in the integer
1166 bit numbered PRECISION if possible, with a compensating change in
1168 exponentChange = omsb - semantics->precision;
1170 /* If the resulting exponent is too high, overflow according to
1171 the rounding mode. */
1172 if (exponent + exponentChange > semantics->maxExponent)
1173 return handleOverflow(rounding_mode);
1175 /* Subnormal numbers have exponent minExponent, and their MSB
1176 is forced based on that. */
1177 if (exponent + exponentChange < semantics->minExponent)
1178 exponentChange = semantics->minExponent - exponent;
1180 /* Shifting left is easy as we don't lose precision. */
1181 if (exponentChange < 0) {
1182 assert(lost_fraction == lfExactlyZero);
1184 shiftSignificandLeft(-exponentChange);
1189 if (exponentChange > 0) {
1192 /* Shift right and capture any new lost fraction. */
1193 lf = shiftSignificandRight(exponentChange);
1195 lost_fraction = combineLostFractions(lf, lost_fraction);
1197 /* Keep OMSB up-to-date. */
1198 if (omsb > (unsigned) exponentChange)
1199 omsb -= exponentChange;
1205 /* Now round the number according to rounding_mode given the lost
1208 /* As specified in IEEE 754, since we do not trap we do not report
1209 underflow for exact results. */
1210 if (lost_fraction == lfExactlyZero) {
1211 /* Canonicalize zeroes. */
1218 /* Increment the significand if we're rounding away from zero. */
1219 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1221 exponent = semantics->minExponent;
1223 incrementSignificand();
1224 omsb = significandMSB() + 1;
1226 /* Did the significand increment overflow? */
1227 if (omsb == (unsigned) semantics->precision + 1) {
1228 /* Renormalize by incrementing the exponent and shifting our
1229 significand right one. However if we already have the
1230 maximum exponent we overflow to infinity. */
1231 if (exponent == semantics->maxExponent) {
1232 category = fcInfinity;
1234 return (opStatus) (opOverflow | opInexact);
1237 shiftSignificandRight(1);
1243 /* The normal case - we were and are not denormal, and any
1244 significand increment above didn't overflow. */
1245 if (omsb == semantics->precision)
1248 /* We have a non-zero denormal. */
1249 assert(omsb < semantics->precision);
1251 /* Canonicalize zeroes. */
1255 /* The fcZero case is a denormal that underflowed to zero. */
1256 return (opStatus) (opUnderflow | opInexact);
1260 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1262 switch (convolve(category, rhs.category)) {
1264 llvm_unreachable(0);
1266 case convolve(fcNaN, fcZero):
1267 case convolve(fcNaN, fcNormal):
1268 case convolve(fcNaN, fcInfinity):
1269 case convolve(fcNaN, fcNaN):
1270 case convolve(fcNormal, fcZero):
1271 case convolve(fcInfinity, fcNormal):
1272 case convolve(fcInfinity, fcZero):
1275 case convolve(fcZero, fcNaN):
1276 case convolve(fcNormal, fcNaN):
1277 case convolve(fcInfinity, fcNaN):
1279 copySignificand(rhs);
1282 case convolve(fcNormal, fcInfinity):
1283 case convolve(fcZero, fcInfinity):
1284 category = fcInfinity;
1285 sign = rhs.sign ^ subtract;
1288 case convolve(fcZero, fcNormal):
1290 sign = rhs.sign ^ subtract;
1293 case convolve(fcZero, fcZero):
1294 /* Sign depends on rounding mode; handled by caller. */
1297 case convolve(fcInfinity, fcInfinity):
1298 /* Differently signed infinities can only be validly
1300 if (((sign ^ rhs.sign)!=0) != subtract) {
1307 case convolve(fcNormal, fcNormal):
1312 /* Add or subtract two normal numbers. */
1314 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1317 lostFraction lost_fraction;
1320 /* Determine if the operation on the absolute values is effectively
1321 an addition or subtraction. */
1322 subtract ^= (sign ^ rhs.sign) ? true : false;
1324 /* Are we bigger exponent-wise than the RHS? */
1325 bits = exponent - rhs.exponent;
1327 /* Subtraction is more subtle than one might naively expect. */
1329 APFloat temp_rhs(rhs);
1333 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1334 lost_fraction = lfExactlyZero;
1335 } else if (bits > 0) {
1336 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1337 shiftSignificandLeft(1);
1340 lost_fraction = shiftSignificandRight(-bits - 1);
1341 temp_rhs.shiftSignificandLeft(1);
1346 carry = temp_rhs.subtractSignificand
1347 (*this, lost_fraction != lfExactlyZero);
1348 copySignificand(temp_rhs);
1351 carry = subtractSignificand
1352 (temp_rhs, lost_fraction != lfExactlyZero);
1355 /* Invert the lost fraction - it was on the RHS and
1357 if (lost_fraction == lfLessThanHalf)
1358 lost_fraction = lfMoreThanHalf;
1359 else if (lost_fraction == lfMoreThanHalf)
1360 lost_fraction = lfLessThanHalf;
1362 /* The code above is intended to ensure that no borrow is
1368 APFloat temp_rhs(rhs);
1370 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1371 carry = addSignificand(temp_rhs);
1373 lost_fraction = shiftSignificandRight(-bits);
1374 carry = addSignificand(rhs);
1377 /* We have a guard bit; generating a carry cannot happen. */
1382 return lost_fraction;
1386 APFloat::multiplySpecials(const APFloat &rhs)
1388 switch (convolve(category, rhs.category)) {
1390 llvm_unreachable(0);
1392 case convolve(fcNaN, fcZero):
1393 case convolve(fcNaN, fcNormal):
1394 case convolve(fcNaN, fcInfinity):
1395 case convolve(fcNaN, fcNaN):
1398 case convolve(fcZero, fcNaN):
1399 case convolve(fcNormal, fcNaN):
1400 case convolve(fcInfinity, fcNaN):
1402 copySignificand(rhs);
1405 case convolve(fcNormal, fcInfinity):
1406 case convolve(fcInfinity, fcNormal):
1407 case convolve(fcInfinity, fcInfinity):
1408 category = fcInfinity;
1411 case convolve(fcZero, fcNormal):
1412 case convolve(fcNormal, fcZero):
1413 case convolve(fcZero, fcZero):
1417 case convolve(fcZero, fcInfinity):
1418 case convolve(fcInfinity, fcZero):
1422 case convolve(fcNormal, fcNormal):
1428 APFloat::divideSpecials(const APFloat &rhs)
1430 switch (convolve(category, rhs.category)) {
1432 llvm_unreachable(0);
1434 case convolve(fcNaN, fcZero):
1435 case convolve(fcNaN, fcNormal):
1436 case convolve(fcNaN, fcInfinity):
1437 case convolve(fcNaN, fcNaN):
1438 case convolve(fcInfinity, fcZero):
1439 case convolve(fcInfinity, fcNormal):
1440 case convolve(fcZero, fcInfinity):
1441 case convolve(fcZero, fcNormal):
1444 case convolve(fcZero, fcNaN):
1445 case convolve(fcNormal, fcNaN):
1446 case convolve(fcInfinity, fcNaN):
1448 copySignificand(rhs);
1451 case convolve(fcNormal, fcInfinity):
1455 case convolve(fcNormal, fcZero):
1456 category = fcInfinity;
1459 case convolve(fcInfinity, fcInfinity):
1460 case convolve(fcZero, fcZero):
1464 case convolve(fcNormal, fcNormal):
1470 APFloat::modSpecials(const APFloat &rhs)
1472 switch (convolve(category, rhs.category)) {
1474 llvm_unreachable(0);
1476 case convolve(fcNaN, fcZero):
1477 case convolve(fcNaN, fcNormal):
1478 case convolve(fcNaN, fcInfinity):
1479 case convolve(fcNaN, fcNaN):
1480 case convolve(fcZero, fcInfinity):
1481 case convolve(fcZero, fcNormal):
1482 case convolve(fcNormal, fcInfinity):
1485 case convolve(fcZero, fcNaN):
1486 case convolve(fcNormal, fcNaN):
1487 case convolve(fcInfinity, fcNaN):
1489 copySignificand(rhs);
1492 case convolve(fcNormal, fcZero):
1493 case convolve(fcInfinity, fcZero):
1494 case convolve(fcInfinity, fcNormal):
1495 case convolve(fcInfinity, fcInfinity):
1496 case convolve(fcZero, fcZero):
1500 case convolve(fcNormal, fcNormal):
1507 APFloat::changeSign()
1509 /* Look mummy, this one's easy. */
1514 APFloat::clearSign()
1516 /* So is this one. */
1521 APFloat::copySign(const APFloat &rhs)
1527 /* Normalized addition or subtraction. */
1529 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1534 fs = addOrSubtractSpecials(rhs, subtract);
1536 /* This return code means it was not a simple case. */
1537 if (fs == opDivByZero) {
1538 lostFraction lost_fraction;
1540 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1541 fs = normalize(rounding_mode, lost_fraction);
1543 /* Can only be zero if we lost no fraction. */
1544 assert(category != fcZero || lost_fraction == lfExactlyZero);
1547 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1548 positive zero unless rounding to minus infinity, except that
1549 adding two like-signed zeroes gives that zero. */
1550 if (category == fcZero) {
1551 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1552 sign = (rounding_mode == rmTowardNegative);
1558 /* Normalized addition. */
1560 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1562 return addOrSubtract(rhs, rounding_mode, false);
1565 /* Normalized subtraction. */
1567 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1569 return addOrSubtract(rhs, rounding_mode, true);
1572 /* Normalized multiply. */
1574 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1579 fs = multiplySpecials(rhs);
1581 if (category == fcNormal) {
1582 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1583 fs = normalize(rounding_mode, lost_fraction);
1584 if (lost_fraction != lfExactlyZero)
1585 fs = (opStatus) (fs | opInexact);
1591 /* Normalized divide. */
1593 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1598 fs = divideSpecials(rhs);
1600 if (category == fcNormal) {
1601 lostFraction lost_fraction = divideSignificand(rhs);
1602 fs = normalize(rounding_mode, lost_fraction);
1603 if (lost_fraction != lfExactlyZero)
1604 fs = (opStatus) (fs | opInexact);
1610 /* Normalized remainder. This is not currently correct in all cases. */
1612 APFloat::remainder(const APFloat &rhs)
1616 unsigned int origSign = sign;
1618 fs = V.divide(rhs, rmNearestTiesToEven);
1619 if (fs == opDivByZero)
1622 int parts = partCount();
1623 integerPart *x = new integerPart[parts];
1625 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1626 rmNearestTiesToEven, &ignored);
1627 if (fs==opInvalidOp)
1630 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1631 rmNearestTiesToEven);
1632 assert(fs==opOK); // should always work
1634 fs = V.multiply(rhs, rmNearestTiesToEven);
1635 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1637 fs = subtract(V, rmNearestTiesToEven);
1638 assert(fs==opOK || fs==opInexact); // likewise
1641 sign = origSign; // IEEE754 requires this
1646 /* Normalized llvm frem (C fmod).
1647 This is not currently correct in all cases. */
1649 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1652 fs = modSpecials(rhs);
1654 if (category == fcNormal && rhs.category == fcNormal) {
1656 unsigned int origSign = sign;
1658 fs = V.divide(rhs, rmNearestTiesToEven);
1659 if (fs == opDivByZero)
1662 int parts = partCount();
1663 integerPart *x = new integerPart[parts];
1665 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1666 rmTowardZero, &ignored);
1667 if (fs==opInvalidOp)
1670 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1671 rmNearestTiesToEven);
1672 assert(fs==opOK); // should always work
1674 fs = V.multiply(rhs, rounding_mode);
1675 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1677 fs = subtract(V, rounding_mode);
1678 assert(fs==opOK || fs==opInexact); // likewise
1681 sign = origSign; // IEEE754 requires this
1687 /* Normalized fused-multiply-add. */
1689 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1690 const APFloat &addend,
1691 roundingMode rounding_mode)
1695 /* Post-multiplication sign, before addition. */
1696 sign ^= multiplicand.sign;
1698 /* If and only if all arguments are normal do we need to do an
1699 extended-precision calculation. */
1700 if (category == fcNormal &&
1701 multiplicand.category == fcNormal &&
1702 addend.category == fcNormal) {
1703 lostFraction lost_fraction;
1705 lost_fraction = multiplySignificand(multiplicand, &addend);
1706 fs = normalize(rounding_mode, lost_fraction);
1707 if (lost_fraction != lfExactlyZero)
1708 fs = (opStatus) (fs | opInexact);
1710 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1711 positive zero unless rounding to minus infinity, except that
1712 adding two like-signed zeroes gives that zero. */
1713 if (category == fcZero && sign != addend.sign)
1714 sign = (rounding_mode == rmTowardNegative);
1716 fs = multiplySpecials(multiplicand);
1718 /* FS can only be opOK or opInvalidOp. There is no more work
1719 to do in the latter case. The IEEE-754R standard says it is
1720 implementation-defined in this case whether, if ADDEND is a
1721 quiet NaN, we raise invalid op; this implementation does so.
1723 If we need to do the addition we can do so with normal
1726 fs = addOrSubtract(addend, rounding_mode, false);
1732 /* Rounding-mode corrrect round to integral value. */
1733 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1736 // If the exponent is large enough, we know that this value is already
1737 // integral, and the arithmetic below would potentially cause it to saturate
1738 // to +/-Inf. Bail out early instead.
1739 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1742 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1743 // precision of our format, and then subtract it back off again. The choice
1744 // of rounding modes for the addition/subtraction determines the rounding mode
1745 // for our integral rounding as well.
1746 // NOTE: When the input value is negative, we do subtraction followed by
1747 // addition instead.
1748 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1749 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1750 APFloat MagicConstant(*semantics);
1751 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1752 rmNearestTiesToEven);
1753 MagicConstant.copySign(*this);
1758 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1759 bool inputSign = isNegative();
1761 fs = add(MagicConstant, rounding_mode);
1762 if (fs != opOK && fs != opInexact)
1765 fs = subtract(MagicConstant, rounding_mode);
1767 // Restore the input sign.
1768 if (inputSign != isNegative())
1775 /* Comparison requires normalized numbers. */
1777 APFloat::compare(const APFloat &rhs) const
1781 assert(semantics == rhs.semantics);
1783 switch (convolve(category, rhs.category)) {
1785 llvm_unreachable(0);
1787 case convolve(fcNaN, fcZero):
1788 case convolve(fcNaN, fcNormal):
1789 case convolve(fcNaN, fcInfinity):
1790 case convolve(fcNaN, fcNaN):
1791 case convolve(fcZero, fcNaN):
1792 case convolve(fcNormal, fcNaN):
1793 case convolve(fcInfinity, fcNaN):
1794 return cmpUnordered;
1796 case convolve(fcInfinity, fcNormal):
1797 case convolve(fcInfinity, fcZero):
1798 case convolve(fcNormal, fcZero):
1802 return cmpGreaterThan;
1804 case convolve(fcNormal, fcInfinity):
1805 case convolve(fcZero, fcInfinity):
1806 case convolve(fcZero, fcNormal):
1808 return cmpGreaterThan;
1812 case convolve(fcInfinity, fcInfinity):
1813 if (sign == rhs.sign)
1818 return cmpGreaterThan;
1820 case convolve(fcZero, fcZero):
1823 case convolve(fcNormal, fcNormal):
1827 /* Two normal numbers. Do they have the same sign? */
1828 if (sign != rhs.sign) {
1830 result = cmpLessThan;
1832 result = cmpGreaterThan;
1834 /* Compare absolute values; invert result if negative. */
1835 result = compareAbsoluteValue(rhs);
1838 if (result == cmpLessThan)
1839 result = cmpGreaterThan;
1840 else if (result == cmpGreaterThan)
1841 result = cmpLessThan;
1848 /// APFloat::convert - convert a value of one floating point type to another.
1849 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1850 /// records whether the transformation lost information, i.e. whether
1851 /// converting the result back to the original type will produce the
1852 /// original value (this is almost the same as return value==fsOK, but there
1853 /// are edge cases where this is not so).
1856 APFloat::convert(const fltSemantics &toSemantics,
1857 roundingMode rounding_mode, bool *losesInfo)
1859 lostFraction lostFraction;
1860 unsigned int newPartCount, oldPartCount;
1863 const fltSemantics &fromSemantics = *semantics;
1865 lostFraction = lfExactlyZero;
1866 newPartCount = partCountForBits(toSemantics.precision + 1);
1867 oldPartCount = partCount();
1868 shift = toSemantics.precision - fromSemantics.precision;
1870 bool X86SpecialNan = false;
1871 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1872 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1873 (!(*significandParts() & 0x8000000000000000ULL) ||
1874 !(*significandParts() & 0x4000000000000000ULL))) {
1875 // x86 has some unusual NaNs which cannot be represented in any other
1876 // format; note them here.
1877 X86SpecialNan = true;
1880 // If this is a truncation, perform the shift before we narrow the storage.
1881 if (shift < 0 && (category==fcNormal || category==fcNaN))
1882 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1884 // Fix the storage so it can hold to new value.
1885 if (newPartCount > oldPartCount) {
1886 // The new type requires more storage; make it available.
1887 integerPart *newParts;
1888 newParts = new integerPart[newPartCount];
1889 APInt::tcSet(newParts, 0, newPartCount);
1890 if (category==fcNormal || category==fcNaN)
1891 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1893 significand.parts = newParts;
1894 } else if (newPartCount == 1 && oldPartCount != 1) {
1895 // Switch to built-in storage for a single part.
1896 integerPart newPart = 0;
1897 if (category==fcNormal || category==fcNaN)
1898 newPart = significandParts()[0];
1900 significand.part = newPart;
1903 // Now that we have the right storage, switch the semantics.
1904 semantics = &toSemantics;
1906 // If this is an extension, perform the shift now that the storage is
1908 if (shift > 0 && (category==fcNormal || category==fcNaN))
1909 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1911 if (category == fcNormal) {
1912 fs = normalize(rounding_mode, lostFraction);
1913 *losesInfo = (fs != opOK);
1914 } else if (category == fcNaN) {
1915 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1916 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1917 // does not give you back the same bits. This is dubious, and we
1918 // don't currently do it. You're really supposed to get
1919 // an invalid operation signal at runtime, but nobody does that.
1929 /* Convert a floating point number to an integer according to the
1930 rounding mode. If the rounded integer value is out of range this
1931 returns an invalid operation exception and the contents of the
1932 destination parts are unspecified. If the rounded value is in
1933 range but the floating point number is not the exact integer, the C
1934 standard doesn't require an inexact exception to be raised. IEEE
1935 854 does require it so we do that.
1937 Note that for conversions to integer type the C standard requires
1938 round-to-zero to always be used. */
1940 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1942 roundingMode rounding_mode,
1943 bool *isExact) const
1945 lostFraction lost_fraction;
1946 const integerPart *src;
1947 unsigned int dstPartsCount, truncatedBits;
1951 /* Handle the three special cases first. */
1952 if (category == fcInfinity || category == fcNaN)
1955 dstPartsCount = partCountForBits(width);
1957 if (category == fcZero) {
1958 APInt::tcSet(parts, 0, dstPartsCount);
1959 // Negative zero can't be represented as an int.
1964 src = significandParts();
1966 /* Step 1: place our absolute value, with any fraction truncated, in
1969 /* Our absolute value is less than one; truncate everything. */
1970 APInt::tcSet(parts, 0, dstPartsCount);
1971 /* For exponent -1 the integer bit represents .5, look at that.
1972 For smaller exponents leftmost truncated bit is 0. */
1973 truncatedBits = semantics->precision -1U - exponent;
1975 /* We want the most significant (exponent + 1) bits; the rest are
1977 unsigned int bits = exponent + 1U;
1979 /* Hopelessly large in magnitude? */
1983 if (bits < semantics->precision) {
1984 /* We truncate (semantics->precision - bits) bits. */
1985 truncatedBits = semantics->precision - bits;
1986 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1988 /* We want at least as many bits as are available. */
1989 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1990 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1995 /* Step 2: work out any lost fraction, and increment the absolute
1996 value if we would round away from zero. */
1997 if (truncatedBits) {
1998 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2000 if (lost_fraction != lfExactlyZero &&
2001 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2002 if (APInt::tcIncrement(parts, dstPartsCount))
2003 return opInvalidOp; /* Overflow. */
2006 lost_fraction = lfExactlyZero;
2009 /* Step 3: check if we fit in the destination. */
2010 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2014 /* Negative numbers cannot be represented as unsigned. */
2018 /* It takes omsb bits to represent the unsigned integer value.
2019 We lose a bit for the sign, but care is needed as the
2020 maximally negative integer is a special case. */
2021 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2024 /* This case can happen because of rounding. */
2029 APInt::tcNegate (parts, dstPartsCount);
2031 if (omsb >= width + !isSigned)
2035 if (lost_fraction == lfExactlyZero) {
2042 /* Same as convertToSignExtendedInteger, except we provide
2043 deterministic values in case of an invalid operation exception,
2044 namely zero for NaNs and the minimal or maximal value respectively
2045 for underflow or overflow.
2046 The *isExact output tells whether the result is exact, in the sense
2047 that converting it back to the original floating point type produces
2048 the original value. This is almost equivalent to result==opOK,
2049 except for negative zeroes.
2052 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2054 roundingMode rounding_mode, bool *isExact) const
2058 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2061 if (fs == opInvalidOp) {
2062 unsigned int bits, dstPartsCount;
2064 dstPartsCount = partCountForBits(width);
2066 if (category == fcNaN)
2071 bits = width - isSigned;
2073 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2074 if (sign && isSigned)
2075 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2081 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2082 an APSInt, whose initial bit-width and signed-ness are used to determine the
2083 precision of the conversion.
2086 APFloat::convertToInteger(APSInt &result,
2087 roundingMode rounding_mode, bool *isExact) const
2089 unsigned bitWidth = result.getBitWidth();
2090 SmallVector<uint64_t, 4> parts(result.getNumWords());
2091 opStatus status = convertToInteger(
2092 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2093 // Keeps the original signed-ness.
2094 result = APInt(bitWidth, parts);
2098 /* Convert an unsigned integer SRC to a floating point number,
2099 rounding according to ROUNDING_MODE. The sign of the floating
2100 point number is not modified. */
2102 APFloat::convertFromUnsignedParts(const integerPart *src,
2103 unsigned int srcCount,
2104 roundingMode rounding_mode)
2106 unsigned int omsb, precision, dstCount;
2108 lostFraction lost_fraction;
2110 category = fcNormal;
2111 omsb = APInt::tcMSB(src, srcCount) + 1;
2112 dst = significandParts();
2113 dstCount = partCount();
2114 precision = semantics->precision;
2116 /* We want the most significant PRECISION bits of SRC. There may not
2117 be that many; extract what we can. */
2118 if (precision <= omsb) {
2119 exponent = omsb - 1;
2120 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2122 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2124 exponent = precision - 1;
2125 lost_fraction = lfExactlyZero;
2126 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2129 return normalize(rounding_mode, lost_fraction);
2133 APFloat::convertFromAPInt(const APInt &Val,
2135 roundingMode rounding_mode)
2137 unsigned int partCount = Val.getNumWords();
2141 if (isSigned && api.isNegative()) {
2146 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2149 /* Convert a two's complement integer SRC to a floating point number,
2150 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2151 integer is signed, in which case it must be sign-extended. */
2153 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2154 unsigned int srcCount,
2156 roundingMode rounding_mode)
2161 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2164 /* If we're signed and negative negate a copy. */
2166 copy = new integerPart[srcCount];
2167 APInt::tcAssign(copy, src, srcCount);
2168 APInt::tcNegate(copy, srcCount);
2169 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2173 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2179 /* FIXME: should this just take a const APInt reference? */
2181 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2182 unsigned int width, bool isSigned,
2183 roundingMode rounding_mode)
2185 unsigned int partCount = partCountForBits(width);
2186 APInt api = APInt(width, makeArrayRef(parts, partCount));
2189 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2194 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2198 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2200 lostFraction lost_fraction = lfExactlyZero;
2201 integerPart *significand;
2202 unsigned int bitPos, partsCount;
2203 StringRef::iterator dot, firstSignificantDigit;
2207 category = fcNormal;
2209 significand = significandParts();
2210 partsCount = partCount();
2211 bitPos = partsCount * integerPartWidth;
2213 /* Skip leading zeroes and any (hexa)decimal point. */
2214 StringRef::iterator begin = s.begin();
2215 StringRef::iterator end = s.end();
2216 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2217 firstSignificantDigit = p;
2220 integerPart hex_value;
2223 assert(dot == end && "String contains multiple dots");
2230 hex_value = hexDigitValue(*p);
2231 if (hex_value == -1U) {
2240 /* Store the number whilst 4-bit nibbles remain. */
2243 hex_value <<= bitPos % integerPartWidth;
2244 significand[bitPos / integerPartWidth] |= hex_value;
2246 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2247 while (p != end && hexDigitValue(*p) != -1U)
2254 /* Hex floats require an exponent but not a hexadecimal point. */
2255 assert(p != end && "Hex strings require an exponent");
2256 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2257 assert(p != begin && "Significand has no digits");
2258 assert((dot == end || p - begin != 1) && "Significand has no digits");
2260 /* Ignore the exponent if we are zero. */
2261 if (p != firstSignificantDigit) {
2264 /* Implicit hexadecimal point? */
2268 /* Calculate the exponent adjustment implicit in the number of
2269 significant digits. */
2270 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2271 if (expAdjustment < 0)
2273 expAdjustment = expAdjustment * 4 - 1;
2275 /* Adjust for writing the significand starting at the most
2276 significant nibble. */
2277 expAdjustment += semantics->precision;
2278 expAdjustment -= partsCount * integerPartWidth;
2280 /* Adjust for the given exponent. */
2281 exponent = totalExponent(p + 1, end, expAdjustment);
2284 return normalize(rounding_mode, lost_fraction);
2288 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2289 unsigned sigPartCount, int exp,
2290 roundingMode rounding_mode)
2292 unsigned int parts, pow5PartCount;
2293 fltSemantics calcSemantics = { 32767, -32767, 0 };
2294 integerPart pow5Parts[maxPowerOfFiveParts];
2297 isNearest = (rounding_mode == rmNearestTiesToEven ||
2298 rounding_mode == rmNearestTiesToAway);
2300 parts = partCountForBits(semantics->precision + 11);
2302 /* Calculate pow(5, abs(exp)). */
2303 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2305 for (;; parts *= 2) {
2306 opStatus sigStatus, powStatus;
2307 unsigned int excessPrecision, truncatedBits;
2309 calcSemantics.precision = parts * integerPartWidth - 1;
2310 excessPrecision = calcSemantics.precision - semantics->precision;
2311 truncatedBits = excessPrecision;
2313 APFloat decSig(calcSemantics, fcZero, sign);
2314 APFloat pow5(calcSemantics, fcZero, false);
2316 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2317 rmNearestTiesToEven);
2318 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2319 rmNearestTiesToEven);
2320 /* Add exp, as 10^n = 5^n * 2^n. */
2321 decSig.exponent += exp;
2323 lostFraction calcLostFraction;
2324 integerPart HUerr, HUdistance;
2325 unsigned int powHUerr;
2328 /* multiplySignificand leaves the precision-th bit set to 1. */
2329 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2330 powHUerr = powStatus != opOK;
2332 calcLostFraction = decSig.divideSignificand(pow5);
2333 /* Denormal numbers have less precision. */
2334 if (decSig.exponent < semantics->minExponent) {
2335 excessPrecision += (semantics->minExponent - decSig.exponent);
2336 truncatedBits = excessPrecision;
2337 if (excessPrecision > calcSemantics.precision)
2338 excessPrecision = calcSemantics.precision;
2340 /* Extra half-ulp lost in reciprocal of exponent. */
2341 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2344 /* Both multiplySignificand and divideSignificand return the
2345 result with the integer bit set. */
2346 assert(APInt::tcExtractBit
2347 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2349 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2351 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2352 excessPrecision, isNearest);
2354 /* Are we guaranteed to round correctly if we truncate? */
2355 if (HUdistance >= HUerr) {
2356 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2357 calcSemantics.precision - excessPrecision,
2359 /* Take the exponent of decSig. If we tcExtract-ed less bits
2360 above we must adjust our exponent to compensate for the
2361 implicit right shift. */
2362 exponent = (decSig.exponent + semantics->precision
2363 - (calcSemantics.precision - excessPrecision));
2364 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2367 return normalize(rounding_mode, calcLostFraction);
2373 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2378 /* Scan the text. */
2379 StringRef::iterator p = str.begin();
2380 interpretDecimal(p, str.end(), &D);
2382 /* Handle the quick cases. First the case of no significant digits,
2383 i.e. zero, and then exponents that are obviously too large or too
2384 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2385 definitely overflows if
2387 (exp - 1) * L >= maxExponent
2389 and definitely underflows to zero where
2391 (exp + 1) * L <= minExponent - precision
2393 With integer arithmetic the tightest bounds for L are
2395 93/28 < L < 196/59 [ numerator <= 256 ]
2396 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2399 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2403 /* Check whether the normalized exponent is high enough to overflow
2404 max during the log-rebasing in the max-exponent check below. */
2405 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2406 fs = handleOverflow(rounding_mode);
2408 /* If it wasn't, then it also wasn't high enough to overflow max
2409 during the log-rebasing in the min-exponent check. Check that it
2410 won't overflow min in either check, then perform the min-exponent
2412 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2413 (D.normalizedExponent + 1) * 28738 <=
2414 8651 * (semantics->minExponent - (int) semantics->precision)) {
2415 /* Underflow to zero and round. */
2417 fs = normalize(rounding_mode, lfLessThanHalf);
2419 /* We can finally safely perform the max-exponent check. */
2420 } else if ((D.normalizedExponent - 1) * 42039
2421 >= 12655 * semantics->maxExponent) {
2422 /* Overflow and round. */
2423 fs = handleOverflow(rounding_mode);
2425 integerPart *decSignificand;
2426 unsigned int partCount;
2428 /* A tight upper bound on number of bits required to hold an
2429 N-digit decimal integer is N * 196 / 59. Allocate enough space
2430 to hold the full significand, and an extra part required by
2432 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2433 partCount = partCountForBits(1 + 196 * partCount / 59);
2434 decSignificand = new integerPart[partCount + 1];
2437 /* Convert to binary efficiently - we do almost all multiplication
2438 in an integerPart. When this would overflow do we do a single
2439 bignum multiplication, and then revert again to multiplication
2440 in an integerPart. */
2442 integerPart decValue, val, multiplier;
2450 if (p == str.end()) {
2454 decValue = decDigitValue(*p++);
2455 assert(decValue < 10U && "Invalid character in significand");
2457 val = val * 10 + decValue;
2458 /* The maximum number that can be multiplied by ten with any
2459 digit added without overflowing an integerPart. */
2460 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2462 /* Multiply out the current part. */
2463 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2464 partCount, partCount + 1, false);
2466 /* If we used another part (likely but not guaranteed), increase
2468 if (decSignificand[partCount])
2470 } while (p <= D.lastSigDigit);
2472 category = fcNormal;
2473 fs = roundSignificandWithExponent(decSignificand, partCount,
2474 D.exponent, rounding_mode);
2476 delete [] decSignificand;
2483 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2485 assert(!str.empty() && "Invalid string length");
2487 /* Handle a leading minus sign. */
2488 StringRef::iterator p = str.begin();
2489 size_t slen = str.size();
2490 sign = *p == '-' ? 1 : 0;
2491 if (*p == '-' || *p == '+') {
2494 assert(slen && "String has no digits");
2497 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2498 assert(slen - 2 && "Invalid string");
2499 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2503 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2506 /* Write out a hexadecimal representation of the floating point value
2507 to DST, which must be of sufficient size, in the C99 form
2508 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2509 excluding the terminating NUL.
2511 If UPPERCASE, the output is in upper case, otherwise in lower case.
2513 HEXDIGITS digits appear altogether, rounding the value if
2514 necessary. If HEXDIGITS is 0, the minimal precision to display the
2515 number precisely is used instead. If nothing would appear after
2516 the decimal point it is suppressed.
2518 The decimal exponent is always printed and has at least one digit.
2519 Zero values display an exponent of zero. Infinities and NaNs
2520 appear as "infinity" or "nan" respectively.
2522 The above rules are as specified by C99. There is ambiguity about
2523 what the leading hexadecimal digit should be. This implementation
2524 uses whatever is necessary so that the exponent is displayed as
2525 stored. This implies the exponent will fall within the IEEE format
2526 range, and the leading hexadecimal digit will be 0 (for denormals),
2527 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2528 any other digits zero).
2531 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2532 bool upperCase, roundingMode rounding_mode) const
2542 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2543 dst += sizeof infinityL - 1;
2547 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2548 dst += sizeof NaNU - 1;
2553 *dst++ = upperCase ? 'X': 'x';
2555 if (hexDigits > 1) {
2557 memset (dst, '0', hexDigits - 1);
2558 dst += hexDigits - 1;
2560 *dst++ = upperCase ? 'P': 'p';
2565 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2571 return static_cast<unsigned int>(dst - p);
2574 /* Does the hard work of outputting the correctly rounded hexadecimal
2575 form of a normal floating point number with the specified number of
2576 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2577 digits necessary to print the value precisely is output. */
2579 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2581 roundingMode rounding_mode) const
2583 unsigned int count, valueBits, shift, partsCount, outputDigits;
2584 const char *hexDigitChars;
2585 const integerPart *significand;
2590 *dst++ = upperCase ? 'X': 'x';
2593 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2595 significand = significandParts();
2596 partsCount = partCount();
2598 /* +3 because the first digit only uses the single integer bit, so
2599 we have 3 virtual zero most-significant-bits. */
2600 valueBits = semantics->precision + 3;
2601 shift = integerPartWidth - valueBits % integerPartWidth;
2603 /* The natural number of digits required ignoring trailing
2604 insignificant zeroes. */
2605 outputDigits = (valueBits - significandLSB () + 3) / 4;
2607 /* hexDigits of zero means use the required number for the
2608 precision. Otherwise, see if we are truncating. If we are,
2609 find out if we need to round away from zero. */
2611 if (hexDigits < outputDigits) {
2612 /* We are dropping non-zero bits, so need to check how to round.
2613 "bits" is the number of dropped bits. */
2615 lostFraction fraction;
2617 bits = valueBits - hexDigits * 4;
2618 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2619 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2621 outputDigits = hexDigits;
2624 /* Write the digits consecutively, and start writing in the location
2625 of the hexadecimal point. We move the most significant digit
2626 left and add the hexadecimal point later. */
2629 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2631 while (outputDigits && count) {
2634 /* Put the most significant integerPartWidth bits in "part". */
2635 if (--count == partsCount)
2636 part = 0; /* An imaginary higher zero part. */
2638 part = significand[count] << shift;
2641 part |= significand[count - 1] >> (integerPartWidth - shift);
2643 /* Convert as much of "part" to hexdigits as we can. */
2644 unsigned int curDigits = integerPartWidth / 4;
2646 if (curDigits > outputDigits)
2647 curDigits = outputDigits;
2648 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2649 outputDigits -= curDigits;
2655 /* Note that hexDigitChars has a trailing '0'. */
2658 *q = hexDigitChars[hexDigitValue (*q) + 1];
2659 } while (*q == '0');
2662 /* Add trailing zeroes. */
2663 memset (dst, '0', outputDigits);
2664 dst += outputDigits;
2667 /* Move the most significant digit to before the point, and if there
2668 is something after the decimal point add it. This must come
2669 after rounding above. */
2676 /* Finally output the exponent. */
2677 *dst++ = upperCase ? 'P': 'p';
2679 return writeSignedDecimal (dst, exponent);
2682 hash_code llvm::hash_value(const APFloat &Arg) {
2683 if (Arg.category != APFloat::fcNormal)
2684 return hash_combine((uint8_t)Arg.category,
2685 // NaN has no sign, fix it at zero.
2686 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2687 Arg.semantics->precision);
2689 // Normal floats need their exponent and significand hashed.
2690 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2691 Arg.semantics->precision, Arg.exponent,
2693 Arg.significandParts(),
2694 Arg.significandParts() + Arg.partCount()));
2697 // Conversion from APFloat to/from host float/double. It may eventually be
2698 // possible to eliminate these and have everybody deal with APFloats, but that
2699 // will take a while. This approach will not easily extend to long double.
2700 // Current implementation requires integerPartWidth==64, which is correct at
2701 // the moment but could be made more general.
2703 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2704 // the actual IEEE respresentations. We compensate for that here.
2707 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2709 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2710 assert(partCount()==2);
2712 uint64_t myexponent, mysignificand;
2714 if (category==fcNormal) {
2715 myexponent = exponent+16383; //bias
2716 mysignificand = significandParts()[0];
2717 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2718 myexponent = 0; // denormal
2719 } else if (category==fcZero) {
2722 } else if (category==fcInfinity) {
2723 myexponent = 0x7fff;
2724 mysignificand = 0x8000000000000000ULL;
2726 assert(category == fcNaN && "Unknown category");
2727 myexponent = 0x7fff;
2728 mysignificand = significandParts()[0];
2732 words[0] = mysignificand;
2733 words[1] = ((uint64_t)(sign & 1) << 15) |
2734 (myexponent & 0x7fffLL);
2735 return APInt(80, words);
2739 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2741 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2742 assert(partCount()==2);
2748 // Convert number to double. To avoid spurious underflows, we re-
2749 // normalize against the "double" minExponent first, and only *then*
2750 // truncate the mantissa. The result of that second conversion
2751 // may be inexact, but should never underflow.
2752 // Declare fltSemantics before APFloat that uses it (and
2753 // saves pointer to it) to ensure correct destruction order.
2754 fltSemantics extendedSemantics = *semantics;
2755 extendedSemantics.minExponent = IEEEdouble.minExponent;
2756 APFloat extended(*this);
2757 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2758 assert(fs == opOK && !losesInfo);
2761 APFloat u(extended);
2762 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2763 assert(fs == opOK || fs == opInexact);
2765 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2767 // If conversion was exact or resulted in a special case, we're done;
2768 // just set the second double to zero. Otherwise, re-convert back to
2769 // the extended format and compute the difference. This now should
2770 // convert exactly to double.
2771 if (u.category == fcNormal && losesInfo) {
2772 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2773 assert(fs == opOK && !losesInfo);
2776 APFloat v(extended);
2777 v.subtract(u, rmNearestTiesToEven);
2778 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2779 assert(fs == opOK && !losesInfo);
2781 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2786 return APInt(128, words);
2790 APFloat::convertQuadrupleAPFloatToAPInt() const
2792 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2793 assert(partCount()==2);
2795 uint64_t myexponent, mysignificand, mysignificand2;
2797 if (category==fcNormal) {
2798 myexponent = exponent+16383; //bias
2799 mysignificand = significandParts()[0];
2800 mysignificand2 = significandParts()[1];
2801 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2802 myexponent = 0; // denormal
2803 } else if (category==fcZero) {
2805 mysignificand = mysignificand2 = 0;
2806 } else if (category==fcInfinity) {
2807 myexponent = 0x7fff;
2808 mysignificand = mysignificand2 = 0;
2810 assert(category == fcNaN && "Unknown category!");
2811 myexponent = 0x7fff;
2812 mysignificand = significandParts()[0];
2813 mysignificand2 = significandParts()[1];
2817 words[0] = mysignificand;
2818 words[1] = ((uint64_t)(sign & 1) << 63) |
2819 ((myexponent & 0x7fff) << 48) |
2820 (mysignificand2 & 0xffffffffffffLL);
2822 return APInt(128, words);
2826 APFloat::convertDoubleAPFloatToAPInt() const
2828 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2829 assert(partCount()==1);
2831 uint64_t myexponent, mysignificand;
2833 if (category==fcNormal) {
2834 myexponent = exponent+1023; //bias
2835 mysignificand = *significandParts();
2836 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2837 myexponent = 0; // denormal
2838 } else if (category==fcZero) {
2841 } else if (category==fcInfinity) {
2845 assert(category == fcNaN && "Unknown category!");
2847 mysignificand = *significandParts();
2850 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2851 ((myexponent & 0x7ff) << 52) |
2852 (mysignificand & 0xfffffffffffffLL))));
2856 APFloat::convertFloatAPFloatToAPInt() const
2858 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2859 assert(partCount()==1);
2861 uint32_t myexponent, mysignificand;
2863 if (category==fcNormal) {
2864 myexponent = exponent+127; //bias
2865 mysignificand = (uint32_t)*significandParts();
2866 if (myexponent == 1 && !(mysignificand & 0x800000))
2867 myexponent = 0; // denormal
2868 } else if (category==fcZero) {
2871 } else if (category==fcInfinity) {
2875 assert(category == fcNaN && "Unknown category!");
2877 mysignificand = (uint32_t)*significandParts();
2880 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2881 (mysignificand & 0x7fffff)));
2885 APFloat::convertHalfAPFloatToAPInt() const
2887 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2888 assert(partCount()==1);
2890 uint32_t myexponent, mysignificand;
2892 if (category==fcNormal) {
2893 myexponent = exponent+15; //bias
2894 mysignificand = (uint32_t)*significandParts();
2895 if (myexponent == 1 && !(mysignificand & 0x400))
2896 myexponent = 0; // denormal
2897 } else if (category==fcZero) {
2900 } else if (category==fcInfinity) {
2904 assert(category == fcNaN && "Unknown category!");
2906 mysignificand = (uint32_t)*significandParts();
2909 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2910 (mysignificand & 0x3ff)));
2913 // This function creates an APInt that is just a bit map of the floating
2914 // point constant as it would appear in memory. It is not a conversion,
2915 // and treating the result as a normal integer is unlikely to be useful.
2918 APFloat::bitcastToAPInt() const
2920 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2921 return convertHalfAPFloatToAPInt();
2923 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2924 return convertFloatAPFloatToAPInt();
2926 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2927 return convertDoubleAPFloatToAPInt();
2929 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2930 return convertQuadrupleAPFloatToAPInt();
2932 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2933 return convertPPCDoubleDoubleAPFloatToAPInt();
2935 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2937 return convertF80LongDoubleAPFloatToAPInt();
2941 APFloat::convertToFloat() const
2943 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2944 "Float semantics are not IEEEsingle");
2945 APInt api = bitcastToAPInt();
2946 return api.bitsToFloat();
2950 APFloat::convertToDouble() const
2952 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2953 "Float semantics are not IEEEdouble");
2954 APInt api = bitcastToAPInt();
2955 return api.bitsToDouble();
2958 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2959 /// does not support these bit patterns:
2960 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2961 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2962 /// exponent = 0, integer bit 1 ("pseudodenormal")
2963 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2964 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2966 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2968 assert(api.getBitWidth()==80);
2969 uint64_t i1 = api.getRawData()[0];
2970 uint64_t i2 = api.getRawData()[1];
2971 uint64_t myexponent = (i2 & 0x7fff);
2972 uint64_t mysignificand = i1;
2974 initialize(&APFloat::x87DoubleExtended);
2975 assert(partCount()==2);
2977 sign = static_cast<unsigned int>(i2>>15);
2978 if (myexponent==0 && mysignificand==0) {
2979 // exponent, significand meaningless
2981 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2982 // exponent, significand meaningless
2983 category = fcInfinity;
2984 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2985 // exponent meaningless
2987 significandParts()[0] = mysignificand;
2988 significandParts()[1] = 0;
2990 category = fcNormal;
2991 exponent = myexponent - 16383;
2992 significandParts()[0] = mysignificand;
2993 significandParts()[1] = 0;
2994 if (myexponent==0) // denormal
3000 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3002 assert(api.getBitWidth()==128);
3003 uint64_t i1 = api.getRawData()[0];
3004 uint64_t i2 = api.getRawData()[1];
3008 // Get the first double and convert to our format.
3009 initFromDoubleAPInt(APInt(64, i1));
3010 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3011 assert(fs == opOK && !losesInfo);
3014 // Unless we have a special case, add in second double.
3015 if (category == fcNormal) {
3016 APFloat v(IEEEdouble, APInt(64, i2));
3017 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3018 assert(fs == opOK && !losesInfo);
3021 add(v, rmNearestTiesToEven);
3026 APFloat::initFromQuadrupleAPInt(const APInt &api)
3028 assert(api.getBitWidth()==128);
3029 uint64_t i1 = api.getRawData()[0];
3030 uint64_t i2 = api.getRawData()[1];
3031 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3032 uint64_t mysignificand = i1;
3033 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3035 initialize(&APFloat::IEEEquad);
3036 assert(partCount()==2);
3038 sign = static_cast<unsigned int>(i2>>63);
3039 if (myexponent==0 &&
3040 (mysignificand==0 && mysignificand2==0)) {
3041 // exponent, significand meaningless
3043 } else if (myexponent==0x7fff &&
3044 (mysignificand==0 && mysignificand2==0)) {
3045 // exponent, significand meaningless
3046 category = fcInfinity;
3047 } else if (myexponent==0x7fff &&
3048 (mysignificand!=0 || mysignificand2 !=0)) {
3049 // exponent meaningless
3051 significandParts()[0] = mysignificand;
3052 significandParts()[1] = mysignificand2;
3054 category = fcNormal;
3055 exponent = myexponent - 16383;
3056 significandParts()[0] = mysignificand;
3057 significandParts()[1] = mysignificand2;
3058 if (myexponent==0) // denormal
3061 significandParts()[1] |= 0x1000000000000LL; // integer bit
3066 APFloat::initFromDoubleAPInt(const APInt &api)
3068 assert(api.getBitWidth()==64);
3069 uint64_t i = *api.getRawData();
3070 uint64_t myexponent = (i >> 52) & 0x7ff;
3071 uint64_t mysignificand = i & 0xfffffffffffffLL;
3073 initialize(&APFloat::IEEEdouble);
3074 assert(partCount()==1);
3076 sign = static_cast<unsigned int>(i>>63);
3077 if (myexponent==0 && mysignificand==0) {
3078 // exponent, significand meaningless
3080 } else if (myexponent==0x7ff && mysignificand==0) {
3081 // exponent, significand meaningless
3082 category = fcInfinity;
3083 } else if (myexponent==0x7ff && mysignificand!=0) {
3084 // exponent meaningless
3086 *significandParts() = mysignificand;
3088 category = fcNormal;
3089 exponent = myexponent - 1023;
3090 *significandParts() = mysignificand;
3091 if (myexponent==0) // denormal
3094 *significandParts() |= 0x10000000000000LL; // integer bit
3099 APFloat::initFromFloatAPInt(const APInt & api)
3101 assert(api.getBitWidth()==32);
3102 uint32_t i = (uint32_t)*api.getRawData();
3103 uint32_t myexponent = (i >> 23) & 0xff;
3104 uint32_t mysignificand = i & 0x7fffff;
3106 initialize(&APFloat::IEEEsingle);
3107 assert(partCount()==1);
3110 if (myexponent==0 && mysignificand==0) {
3111 // exponent, significand meaningless
3113 } else if (myexponent==0xff && mysignificand==0) {
3114 // exponent, significand meaningless
3115 category = fcInfinity;
3116 } else if (myexponent==0xff && mysignificand!=0) {
3117 // sign, exponent, significand meaningless
3119 *significandParts() = mysignificand;
3121 category = fcNormal;
3122 exponent = myexponent - 127; //bias
3123 *significandParts() = mysignificand;
3124 if (myexponent==0) // denormal
3127 *significandParts() |= 0x800000; // integer bit
3132 APFloat::initFromHalfAPInt(const APInt & api)
3134 assert(api.getBitWidth()==16);
3135 uint32_t i = (uint32_t)*api.getRawData();
3136 uint32_t myexponent = (i >> 10) & 0x1f;
3137 uint32_t mysignificand = i & 0x3ff;
3139 initialize(&APFloat::IEEEhalf);
3140 assert(partCount()==1);
3143 if (myexponent==0 && mysignificand==0) {
3144 // exponent, significand meaningless
3146 } else if (myexponent==0x1f && mysignificand==0) {
3147 // exponent, significand meaningless
3148 category = fcInfinity;
3149 } else if (myexponent==0x1f && mysignificand!=0) {
3150 // sign, exponent, significand meaningless
3152 *significandParts() = mysignificand;
3154 category = fcNormal;
3155 exponent = myexponent - 15; //bias
3156 *significandParts() = mysignificand;
3157 if (myexponent==0) // denormal
3160 *significandParts() |= 0x400; // integer bit
3164 /// Treat api as containing the bits of a floating point number. Currently
3165 /// we infer the floating point type from the size of the APInt. The
3166 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3167 /// when the size is anything else).
3169 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3171 if (Sem == &IEEEhalf)
3172 return initFromHalfAPInt(api);
3173 if (Sem == &IEEEsingle)
3174 return initFromFloatAPInt(api);
3175 if (Sem == &IEEEdouble)
3176 return initFromDoubleAPInt(api);
3177 if (Sem == &x87DoubleExtended)
3178 return initFromF80LongDoubleAPInt(api);
3179 if (Sem == &IEEEquad)
3180 return initFromQuadrupleAPInt(api);
3181 if (Sem == &PPCDoubleDouble)
3182 return initFromPPCDoubleDoubleAPInt(api);
3184 llvm_unreachable(0);
3188 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3192 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3194 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3196 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3198 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3201 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3202 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3204 llvm_unreachable("Unknown floating bit width");
3208 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3209 APFloat Val(Sem, fcNormal, Negative);
3211 // We want (in interchange format):
3212 // sign = {Negative}
3214 // significand = 1..1
3216 Val.exponent = Sem.maxExponent; // unbiased
3218 // 1-initialize all bits....
3219 Val.zeroSignificand();
3220 integerPart *significand = Val.significandParts();
3221 unsigned N = partCountForBits(Sem.precision);
3222 for (unsigned i = 0; i != N; ++i)
3223 significand[i] = ~((integerPart) 0);
3225 // ...and then clear the top bits for internal consistency.
3226 if (Sem.precision % integerPartWidth != 0)
3228 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3233 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3234 APFloat Val(Sem, fcNormal, Negative);
3236 // We want (in interchange format):
3237 // sign = {Negative}
3239 // significand = 0..01
3241 Val.exponent = Sem.minExponent; // unbiased
3242 Val.zeroSignificand();
3243 Val.significandParts()[0] = 1;
3247 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3248 APFloat Val(Sem, fcNormal, Negative);
3250 // We want (in interchange format):
3251 // sign = {Negative}
3253 // significand = 10..0
3255 Val.exponent = Sem.minExponent;
3256 Val.zeroSignificand();
3257 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3258 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3263 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3264 initFromAPInt(&Sem, API);
3267 APFloat::APFloat(float f) {
3268 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3271 APFloat::APFloat(double d) {
3272 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3276 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3277 Buffer.append(Str.begin(), Str.end());
3280 /// Removes data from the given significand until it is no more
3281 /// precise than is required for the desired precision.
3282 void AdjustToPrecision(APInt &significand,
3283 int &exp, unsigned FormatPrecision) {
3284 unsigned bits = significand.getActiveBits();
3286 // 196/59 is a very slight overestimate of lg_2(10).
3287 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3289 if (bits <= bitsRequired) return;
3291 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3292 if (!tensRemovable) return;
3294 exp += tensRemovable;
3296 APInt divisor(significand.getBitWidth(), 1);
3297 APInt powten(significand.getBitWidth(), 10);
3299 if (tensRemovable & 1)
3301 tensRemovable >>= 1;
3302 if (!tensRemovable) break;
3306 significand = significand.udiv(divisor);
3308 // Truncate the significand down to its active bit count, but
3309 // don't try to drop below 32.
3310 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3311 significand = significand.trunc(newPrecision);
3315 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3316 int &exp, unsigned FormatPrecision) {
3317 unsigned N = buffer.size();
3318 if (N <= FormatPrecision) return;
3320 // The most significant figures are the last ones in the buffer.
3321 unsigned FirstSignificant = N - FormatPrecision;
3324 // FIXME: this probably shouldn't use 'round half up'.
3326 // Rounding down is just a truncation, except we also want to drop
3327 // trailing zeros from the new result.
3328 if (buffer[FirstSignificant - 1] < '5') {
3329 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3332 exp += FirstSignificant;
3333 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3337 // Rounding up requires a decimal add-with-carry. If we continue
3338 // the carry, the newly-introduced zeros will just be truncated.
3339 for (unsigned I = FirstSignificant; I != N; ++I) {
3340 if (buffer[I] == '9') {
3348 // If we carried through, we have exactly one digit of precision.
3349 if (FirstSignificant == N) {
3350 exp += FirstSignificant;
3352 buffer.push_back('1');
3356 exp += FirstSignificant;
3357 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3361 void APFloat::toString(SmallVectorImpl<char> &Str,
3362 unsigned FormatPrecision,
3363 unsigned FormatMaxPadding) const {
3367 return append(Str, "-Inf");
3369 return append(Str, "+Inf");
3371 case fcNaN: return append(Str, "NaN");
3377 if (!FormatMaxPadding)
3378 append(Str, "0.0E+0");
3390 // Decompose the number into an APInt and an exponent.
3391 int exp = exponent - ((int) semantics->precision - 1);
3392 APInt significand(semantics->precision,
3393 makeArrayRef(significandParts(),
3394 partCountForBits(semantics->precision)));
3396 // Set FormatPrecision if zero. We want to do this before we
3397 // truncate trailing zeros, as those are part of the precision.
3398 if (!FormatPrecision) {
3399 // It's an interesting question whether to use the nominal
3400 // precision or the active precision here for denormals.
3402 // FormatPrecision = ceil(significandBits / lg_2(10))
3403 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3406 // Ignore trailing binary zeros.
3407 int trailingZeros = significand.countTrailingZeros();
3408 exp += trailingZeros;
3409 significand = significand.lshr(trailingZeros);
3411 // Change the exponent from 2^e to 10^e.
3414 } else if (exp > 0) {
3416 significand = significand.zext(semantics->precision + exp);
3417 significand <<= exp;
3419 } else { /* exp < 0 */
3422 // We transform this using the identity:
3423 // (N)(2^-e) == (N)(5^e)(10^-e)
3424 // This means we have to multiply N (the significand) by 5^e.
3425 // To avoid overflow, we have to operate on numbers large
3426 // enough to store N * 5^e:
3427 // log2(N * 5^e) == log2(N) + e * log2(5)
3428 // <= semantics->precision + e * 137 / 59
3429 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3431 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3433 // Multiply significand by 5^e.
3434 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3435 significand = significand.zext(precision);
3436 APInt five_to_the_i(precision, 5);
3438 if (texp & 1) significand *= five_to_the_i;
3442 five_to_the_i *= five_to_the_i;
3446 AdjustToPrecision(significand, exp, FormatPrecision);
3448 SmallVector<char, 256> buffer;
3451 unsigned precision = significand.getBitWidth();
3452 APInt ten(precision, 10);
3453 APInt digit(precision, 0);
3455 bool inTrail = true;
3456 while (significand != 0) {
3457 // digit <- significand % 10
3458 // significand <- significand / 10
3459 APInt::udivrem(significand, ten, significand, digit);
3461 unsigned d = digit.getZExtValue();
3463 // Drop trailing zeros.
3464 if (inTrail && !d) exp++;
3466 buffer.push_back((char) ('0' + d));
3471 assert(!buffer.empty() && "no characters in buffer!");
3473 // Drop down to FormatPrecision.
3474 // TODO: don't do more precise calculations above than are required.
3475 AdjustToPrecision(buffer, exp, FormatPrecision);
3477 unsigned NDigits = buffer.size();
3479 // Check whether we should use scientific notation.
3480 bool FormatScientific;
3481 if (!FormatMaxPadding)
3482 FormatScientific = true;
3487 // But we shouldn't make the number look more precise than it is.
3488 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3489 NDigits + (unsigned) exp > FormatPrecision);
3491 // Power of the most significant digit.
3492 int MSD = exp + (int) (NDigits - 1);
3495 FormatScientific = false;
3497 // 765e-5 == 0.00765
3499 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3504 // Scientific formatting is pretty straightforward.
3505 if (FormatScientific) {
3506 exp += (NDigits - 1);
3508 Str.push_back(buffer[NDigits-1]);
3513 for (unsigned I = 1; I != NDigits; ++I)
3514 Str.push_back(buffer[NDigits-1-I]);
3517 Str.push_back(exp >= 0 ? '+' : '-');
3518 if (exp < 0) exp = -exp;
3519 SmallVector<char, 6> expbuf;
3521 expbuf.push_back((char) ('0' + (exp % 10)));
3524 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3525 Str.push_back(expbuf[E-1-I]);
3529 // Non-scientific, positive exponents.
3531 for (unsigned I = 0; I != NDigits; ++I)
3532 Str.push_back(buffer[NDigits-1-I]);
3533 for (unsigned I = 0; I != (unsigned) exp; ++I)
3538 // Non-scientific, negative exponents.
3540 // The number of digits to the left of the decimal point.
3541 int NWholeDigits = exp + (int) NDigits;
3544 if (NWholeDigits > 0) {
3545 for (; I != (unsigned) NWholeDigits; ++I)
3546 Str.push_back(buffer[NDigits-I-1]);
3549 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3553 for (unsigned Z = 1; Z != NZeros; ++Z)
3557 for (; I != NDigits; ++I)
3558 Str.push_back(buffer[NDigits-I-1]);
3561 bool APFloat::getExactInverse(APFloat *inv) const {
3562 // Special floats and denormals have no exact inverse.
3563 if (category != fcNormal)
3566 // Check that the number is a power of two by making sure that only the
3567 // integer bit is set in the significand.
3568 if (significandLSB() != semantics->precision - 1)
3572 APFloat reciprocal(*semantics, 1ULL);
3573 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3576 // Avoid multiplication with a denormal, it is not safe on all platforms and
3577 // may be slower than a normal division.
3578 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3581 assert(reciprocal.category == fcNormal &&
3582 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);