1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/MathExtras.h"
26 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
28 /* Assumed in hexadecimal significand parsing, and conversion to
29 hexadecimal strings. */
30 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
31 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
35 /* Represents floating point arithmetic semantics. */
37 /* The largest E such that 2^E is representable; this matches the
38 definition of IEEE 754. */
39 exponent_t maxExponent;
41 /* The smallest E such that 2^E is a normalized number; this
42 matches the definition of IEEE 754. */
43 exponent_t minExponent;
45 /* Number of bits in the significand. This includes the integer
47 unsigned int precision;
49 /* True if arithmetic is supported. */
50 unsigned int arithmeticOK;
53 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
54 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
55 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
56 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
57 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
58 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
60 // The PowerPC format consists of two doubles. It does not map cleanly
61 // onto the usual format above. For now only storage of constants of
62 // this type is supported, no arithmetic.
63 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
65 /* A tight upper bound on number of parts required to hold the value
68 power * 815 / (351 * integerPartWidth) + 1
70 However, whilst the result may require only this many parts,
71 because we are multiplying two values to get it, the
72 multiplication may require an extra part with the excess part
73 being zero (consider the trivial case of 1 * 1, tcFullMultiply
74 requires two parts to hold the single-part result). So we add an
75 extra one to guarantee enough space whilst multiplying. */
76 const unsigned int maxExponent = 16383;
77 const unsigned int maxPrecision = 113;
78 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
79 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
80 / (351 * integerPartWidth));
83 /* A bunch of private, handy routines. */
85 static inline unsigned int
86 partCountForBits(unsigned int bits)
88 return ((bits) + integerPartWidth - 1) / integerPartWidth;
91 /* Returns 0U-9U. Return values >= 10U are not digits. */
92 static inline unsigned int
93 decDigitValue(unsigned int c)
99 hexDigitValue(unsigned int c)
119 assertArithmeticOK(const llvm::fltSemantics &semantics) {
120 assert(semantics.arithmeticOK &&
121 "Compile-time arithmetic does not support these semantics");
124 /* Return the value of a decimal exponent of the form
127 If the exponent overflows, returns a large exponent with the
130 readExponent(StringRef::iterator begin, StringRef::iterator end)
133 unsigned int absExponent;
134 const unsigned int overlargeExponent = 24000; /* FIXME. */
135 StringRef::iterator p = begin;
137 assert(p != end && "Exponent has no digits");
139 isNegative = (*p == '-');
140 if (*p == '-' || *p == '+') {
142 assert(p != end && "Exponent has no digits");
145 absExponent = decDigitValue(*p++);
146 assert(absExponent < 10U && "Invalid character in exponent");
148 for (; p != end; ++p) {
151 value = decDigitValue(*p);
152 assert(value < 10U && "Invalid character in exponent");
154 value += absExponent * 10;
155 if (absExponent >= overlargeExponent) {
156 absExponent = overlargeExponent;
157 p = end; /* outwit assert below */
163 assert(p == end && "Invalid exponent in exponent");
166 return -(int) absExponent;
168 return (int) absExponent;
171 /* This is ugly and needs cleaning up, but I don't immediately see
172 how whilst remaining safe. */
174 totalExponent(StringRef::iterator p, StringRef::iterator end,
175 int exponentAdjustment)
177 int unsignedExponent;
178 bool negative, overflow;
181 assert(p != end && "Exponent has no digits");
183 negative = *p == '-';
184 if (*p == '-' || *p == '+') {
186 assert(p != end && "Exponent has no digits");
189 unsignedExponent = 0;
191 for (; p != end; ++p) {
194 value = decDigitValue(*p);
195 assert(value < 10U && "Invalid character in exponent");
197 unsignedExponent = unsignedExponent * 10 + value;
198 if (unsignedExponent > 32767)
202 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
206 exponent = unsignedExponent;
208 exponent = -exponent;
209 exponent += exponentAdjustment;
210 if (exponent > 32767 || exponent < -32768)
215 exponent = negative ? -32768: 32767;
220 static StringRef::iterator
221 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
222 StringRef::iterator *dot)
224 StringRef::iterator p = begin;
226 while (*p == '0' && p != end)
232 assert(end - begin != 1 && "Significand has no digits");
234 while (*p == '0' && p != end)
241 /* Given a normal decimal floating point number of the form
245 where the decimal point and exponent are optional, fill out the
246 structure D. Exponent is appropriate if the significand is
247 treated as an integer, and normalizedExponent if the significand
248 is taken to have the decimal point after a single leading
251 If the value is zero, V->firstSigDigit points to a non-digit, and
252 the return exponent is zero.
255 const char *firstSigDigit;
256 const char *lastSigDigit;
258 int normalizedExponent;
262 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
265 StringRef::iterator dot = end;
266 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
268 D->firstSigDigit = p;
270 D->normalizedExponent = 0;
272 for (; p != end; ++p) {
274 assert(dot == end && "String contains multiple dots");
279 if (decDigitValue(*p) >= 10U)
284 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
285 assert(p != begin && "Significand has no digits");
286 assert((dot == end || p - begin != 1) && "Significand has no digits");
288 /* p points to the first non-digit in the string */
289 D->exponent = readExponent(p + 1, end);
291 /* Implied decimal point? */
296 /* If number is all zeroes accept any exponent. */
297 if (p != D->firstSigDigit) {
298 /* Drop insignificant trailing zeroes. */
303 while (p != begin && *p == '0');
304 while (p != begin && *p == '.');
307 /* Adjust the exponents for any decimal point. */
308 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
309 D->normalizedExponent = (D->exponent +
310 static_cast<exponent_t>((p - D->firstSigDigit)
311 - (dot > D->firstSigDigit && dot < p)));
317 /* Return the trailing fraction of a hexadecimal number.
318 DIGITVALUE is the first hex digit of the fraction, P points to
321 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
322 unsigned int digitValue)
324 unsigned int hexDigit;
326 /* If the first trailing digit isn't 0 or 8 we can work out the
327 fraction immediately. */
329 return lfMoreThanHalf;
330 else if (digitValue < 8 && digitValue > 0)
331 return lfLessThanHalf;
333 /* Otherwise we need to find the first non-zero digit. */
337 assert(p != end && "Invalid trailing hexadecimal fraction!");
339 hexDigit = hexDigitValue(*p);
341 /* If we ran off the end it is exactly zero or one-half, otherwise
344 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
346 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
349 /* Return the fraction lost were a bignum truncated losing the least
350 significant BITS bits. */
352 lostFractionThroughTruncation(const integerPart *parts,
353 unsigned int partCount,
358 lsb = APInt::tcLSB(parts, partCount);
360 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
362 return lfExactlyZero;
364 return lfExactlyHalf;
365 if (bits <= partCount * integerPartWidth &&
366 APInt::tcExtractBit(parts, bits - 1))
367 return lfMoreThanHalf;
369 return lfLessThanHalf;
372 /* Shift DST right BITS bits noting lost fraction. */
374 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
376 lostFraction lost_fraction;
378 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
380 APInt::tcShiftRight(dst, parts, bits);
382 return lost_fraction;
385 /* Combine the effect of two lost fractions. */
387 combineLostFractions(lostFraction moreSignificant,
388 lostFraction lessSignificant)
390 if (lessSignificant != lfExactlyZero) {
391 if (moreSignificant == lfExactlyZero)
392 moreSignificant = lfLessThanHalf;
393 else if (moreSignificant == lfExactlyHalf)
394 moreSignificant = lfMoreThanHalf;
397 return moreSignificant;
400 /* The error from the true value, in half-ulps, on multiplying two
401 floating point numbers, which differ from the value they
402 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
403 than the returned value.
405 See "How to Read Floating Point Numbers Accurately" by William D
408 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
410 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
412 if (HUerr1 + HUerr2 == 0)
413 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
415 return inexactMultiply + 2 * (HUerr1 + HUerr2);
418 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
419 when the least significant BITS are truncated. BITS cannot be
422 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
424 unsigned int count, partBits;
425 integerPart part, boundary;
430 count = bits / integerPartWidth;
431 partBits = bits % integerPartWidth + 1;
433 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
436 boundary = (integerPart) 1 << (partBits - 1);
441 if (part - boundary <= boundary - part)
442 return part - boundary;
444 return boundary - part;
447 if (part == boundary) {
450 return ~(integerPart) 0; /* A lot. */
453 } else if (part == boundary - 1) {
456 return ~(integerPart) 0; /* A lot. */
461 return ~(integerPart) 0; /* A lot. */
464 /* Place pow(5, power) in DST, and return the number of parts used.
465 DST must be at least one part larger than size of the answer. */
467 powerOf5(integerPart *dst, unsigned int power)
469 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
471 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
472 pow5s[0] = 78125 * 5;
474 unsigned int partsCount[16] = { 1 };
475 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
477 assert(power <= maxExponent);
482 *p1 = firstEightPowers[power & 7];
488 for (unsigned int n = 0; power; power >>= 1, n++) {
493 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
495 pc = partsCount[n - 1];
496 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
498 if (pow5[pc - 1] == 0)
506 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
508 if (p2[result - 1] == 0)
511 /* Now result is in p1 with partsCount parts and p2 is scratch
513 tmp = p1, p1 = p2, p2 = tmp;
520 APInt::tcAssign(dst, p1, result);
525 /* Zero at the end to avoid modular arithmetic when adding one; used
526 when rounding up during hexadecimal output. */
527 static const char hexDigitsLower[] = "0123456789abcdef0";
528 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
529 static const char infinityL[] = "infinity";
530 static const char infinityU[] = "INFINITY";
531 static const char NaNL[] = "nan";
532 static const char NaNU[] = "NAN";
534 /* Write out an integerPart in hexadecimal, starting with the most
535 significant nibble. Write out exactly COUNT hexdigits, return
538 partAsHex (char *dst, integerPart part, unsigned int count,
539 const char *hexDigitChars)
541 unsigned int result = count;
543 assert(count != 0 && count <= integerPartWidth / 4);
545 part >>= (integerPartWidth - 4 * count);
547 dst[count] = hexDigitChars[part & 0xf];
554 /* Write out an unsigned decimal integer. */
556 writeUnsignedDecimal (char *dst, unsigned int n)
572 /* Write out a signed decimal integer. */
574 writeSignedDecimal (char *dst, int value)
578 dst = writeUnsignedDecimal(dst, -(unsigned) value);
580 dst = writeUnsignedDecimal(dst, value);
587 APFloat::initialize(const fltSemantics *ourSemantics)
591 semantics = ourSemantics;
594 significand.parts = new integerPart[count];
598 APFloat::freeSignificand()
601 delete [] significand.parts;
605 APFloat::assign(const APFloat &rhs)
607 assert(semantics == rhs.semantics);
610 category = rhs.category;
611 exponent = rhs.exponent;
613 exponent2 = rhs.exponent2;
614 if (category == fcNormal || category == fcNaN)
615 copySignificand(rhs);
619 APFloat::copySignificand(const APFloat &rhs)
621 assert(category == fcNormal || category == fcNaN);
622 assert(rhs.partCount() >= partCount());
624 APInt::tcAssign(significandParts(), rhs.significandParts(),
628 /* Make this number a NaN, with an arbitrary but deterministic value
629 for the significand. If double or longer, this is a signalling NaN,
630 which may not be ideal. If float, this is QNaN(0). */
631 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
636 integerPart *significand = significandParts();
637 unsigned numParts = partCount();
639 // Set the significand bits to the fill.
640 if (!fill || fill->getNumWords() < numParts)
641 APInt::tcSet(significand, 0, numParts);
643 APInt::tcAssign(significand, fill->getRawData(),
644 std::min(fill->getNumWords(), numParts));
646 // Zero out the excess bits of the significand.
647 unsigned bitsToPreserve = semantics->precision - 1;
648 unsigned part = bitsToPreserve / 64;
649 bitsToPreserve %= 64;
650 significand[part] &= ((1ULL << bitsToPreserve) - 1);
651 for (part++; part != numParts; ++part)
652 significand[part] = 0;
655 unsigned QNaNBit = semantics->precision - 2;
658 // We always have to clear the QNaN bit to make it an SNaN.
659 APInt::tcClearBit(significand, QNaNBit);
661 // If there are no bits set in the payload, we have to set
662 // *something* to make it a NaN instead of an infinity;
663 // conventionally, this is the next bit down from the QNaN bit.
664 if (APInt::tcIsZero(significand, numParts))
665 APInt::tcSetBit(significand, QNaNBit - 1);
667 // We always have to set the QNaN bit to make it a QNaN.
668 APInt::tcSetBit(significand, QNaNBit);
671 // For x87 extended precision, we want to make a NaN, not a
672 // pseudo-NaN. Maybe we should expose the ability to make
674 if (semantics == &APFloat::x87DoubleExtended)
675 APInt::tcSetBit(significand, QNaNBit + 1);
678 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
680 APFloat value(Sem, uninitialized);
681 value.makeNaN(SNaN, Negative, fill);
686 APFloat::operator=(const APFloat &rhs)
689 if (semantics != rhs.semantics) {
691 initialize(rhs.semantics);
700 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
703 if (semantics != rhs.semantics ||
704 category != rhs.category ||
707 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
710 if (category==fcZero || category==fcInfinity)
712 else if (category==fcNormal && exponent!=rhs.exponent)
714 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
715 exponent2!=rhs.exponent2)
719 const integerPart* p=significandParts();
720 const integerPart* q=rhs.significandParts();
721 for (; i>0; i--, p++, q++) {
729 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
730 : exponent2(0), sign2(0) {
731 assertArithmeticOK(ourSemantics);
732 initialize(&ourSemantics);
735 exponent = ourSemantics.precision - 1;
736 significandParts()[0] = value;
737 normalize(rmNearestTiesToEven, lfExactlyZero);
740 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
741 assertArithmeticOK(ourSemantics);
742 initialize(&ourSemantics);
747 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
748 : exponent2(0), sign2(0) {
749 assertArithmeticOK(ourSemantics);
750 // Allocates storage if necessary but does not initialize it.
751 initialize(&ourSemantics);
754 APFloat::APFloat(const fltSemantics &ourSemantics,
755 fltCategory ourCategory, bool negative)
756 : exponent2(0), sign2(0) {
757 assertArithmeticOK(ourSemantics);
758 initialize(&ourSemantics);
759 category = ourCategory;
761 if (category == fcNormal)
763 else if (ourCategory == fcNaN)
767 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
768 : exponent2(0), sign2(0) {
769 assertArithmeticOK(ourSemantics);
770 initialize(&ourSemantics);
771 convertFromString(text, rmNearestTiesToEven);
774 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
775 initialize(rhs.semantics);
784 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
785 void APFloat::Profile(FoldingSetNodeID& ID) const {
786 ID.Add(bitcastToAPInt());
790 APFloat::partCount() const
792 return partCountForBits(semantics->precision + 1);
796 APFloat::semanticsPrecision(const fltSemantics &semantics)
798 return semantics.precision;
802 APFloat::significandParts() const
804 return const_cast<APFloat *>(this)->significandParts();
808 APFloat::significandParts()
810 assert(category == fcNormal || category == fcNaN);
813 return significand.parts;
815 return &significand.part;
819 APFloat::zeroSignificand()
822 APInt::tcSet(significandParts(), 0, partCount());
825 /* Increment an fcNormal floating point number's significand. */
827 APFloat::incrementSignificand()
831 carry = APInt::tcIncrement(significandParts(), partCount());
833 /* Our callers should never cause us to overflow. */
838 /* Add the significand of the RHS. Returns the carry flag. */
840 APFloat::addSignificand(const APFloat &rhs)
844 parts = significandParts();
846 assert(semantics == rhs.semantics);
847 assert(exponent == rhs.exponent);
849 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
852 /* Subtract the significand of the RHS with a borrow flag. Returns
855 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
859 parts = significandParts();
861 assert(semantics == rhs.semantics);
862 assert(exponent == rhs.exponent);
864 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
868 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
869 on to the full-precision result of the multiplication. Returns the
872 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
874 unsigned int omsb; // One, not zero, based MSB.
875 unsigned int partsCount, newPartsCount, precision;
876 integerPart *lhsSignificand;
877 integerPart scratch[4];
878 integerPart *fullSignificand;
879 lostFraction lost_fraction;
882 assert(semantics == rhs.semantics);
884 precision = semantics->precision;
885 newPartsCount = partCountForBits(precision * 2);
887 if (newPartsCount > 4)
888 fullSignificand = new integerPart[newPartsCount];
890 fullSignificand = scratch;
892 lhsSignificand = significandParts();
893 partsCount = partCount();
895 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
896 rhs.significandParts(), partsCount, partsCount);
898 lost_fraction = lfExactlyZero;
899 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
900 exponent += rhs.exponent;
903 Significand savedSignificand = significand;
904 const fltSemantics *savedSemantics = semantics;
905 fltSemantics extendedSemantics;
907 unsigned int extendedPrecision;
909 /* Normalize our MSB. */
910 extendedPrecision = precision + precision - 1;
911 if (omsb != extendedPrecision) {
912 APInt::tcShiftLeft(fullSignificand, newPartsCount,
913 extendedPrecision - omsb);
914 exponent -= extendedPrecision - omsb;
917 /* Create new semantics. */
918 extendedSemantics = *semantics;
919 extendedSemantics.precision = extendedPrecision;
921 if (newPartsCount == 1)
922 significand.part = fullSignificand[0];
924 significand.parts = fullSignificand;
925 semantics = &extendedSemantics;
927 APFloat extendedAddend(*addend);
928 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
929 assert(status == opOK);
931 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
933 /* Restore our state. */
934 if (newPartsCount == 1)
935 fullSignificand[0] = significand.part;
936 significand = savedSignificand;
937 semantics = savedSemantics;
939 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
942 exponent -= (precision - 1);
944 if (omsb > precision) {
945 unsigned int bits, significantParts;
948 bits = omsb - precision;
949 significantParts = partCountForBits(omsb);
950 lf = shiftRight(fullSignificand, significantParts, bits);
951 lost_fraction = combineLostFractions(lf, lost_fraction);
955 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
957 if (newPartsCount > 4)
958 delete [] fullSignificand;
960 return lost_fraction;
963 /* Multiply the significands of LHS and RHS to DST. */
965 APFloat::divideSignificand(const APFloat &rhs)
967 unsigned int bit, i, partsCount;
968 const integerPart *rhsSignificand;
969 integerPart *lhsSignificand, *dividend, *divisor;
970 integerPart scratch[4];
971 lostFraction lost_fraction;
973 assert(semantics == rhs.semantics);
975 lhsSignificand = significandParts();
976 rhsSignificand = rhs.significandParts();
977 partsCount = partCount();
980 dividend = new integerPart[partsCount * 2];
984 divisor = dividend + partsCount;
986 /* Copy the dividend and divisor as they will be modified in-place. */
987 for (i = 0; i < partsCount; i++) {
988 dividend[i] = lhsSignificand[i];
989 divisor[i] = rhsSignificand[i];
990 lhsSignificand[i] = 0;
993 exponent -= rhs.exponent;
995 unsigned int precision = semantics->precision;
997 /* Normalize the divisor. */
998 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1001 APInt::tcShiftLeft(divisor, partsCount, bit);
1004 /* Normalize the dividend. */
1005 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1008 APInt::tcShiftLeft(dividend, partsCount, bit);
1011 /* Ensure the dividend >= divisor initially for the loop below.
1012 Incidentally, this means that the division loop below is
1013 guaranteed to set the integer bit to one. */
1014 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1016 APInt::tcShiftLeft(dividend, partsCount, 1);
1017 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1020 /* Long division. */
1021 for (bit = precision; bit; bit -= 1) {
1022 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1023 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1024 APInt::tcSetBit(lhsSignificand, bit - 1);
1027 APInt::tcShiftLeft(dividend, partsCount, 1);
1030 /* Figure out the lost fraction. */
1031 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1034 lost_fraction = lfMoreThanHalf;
1036 lost_fraction = lfExactlyHalf;
1037 else if (APInt::tcIsZero(dividend, partsCount))
1038 lost_fraction = lfExactlyZero;
1040 lost_fraction = lfLessThanHalf;
1045 return lost_fraction;
1049 APFloat::significandMSB() const
1051 return APInt::tcMSB(significandParts(), partCount());
1055 APFloat::significandLSB() const
1057 return APInt::tcLSB(significandParts(), partCount());
1060 /* Note that a zero result is NOT normalized to fcZero. */
1062 APFloat::shiftSignificandRight(unsigned int bits)
1064 /* Our exponent should not overflow. */
1065 assert((exponent_t) (exponent + bits) >= exponent);
1069 return shiftRight(significandParts(), partCount(), bits);
1072 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1074 APFloat::shiftSignificandLeft(unsigned int bits)
1076 assert(bits < semantics->precision);
1079 unsigned int partsCount = partCount();
1081 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1084 assert(!APInt::tcIsZero(significandParts(), partsCount));
1089 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1093 assert(semantics == rhs.semantics);
1094 assert(category == fcNormal);
1095 assert(rhs.category == fcNormal);
1097 compare = exponent - rhs.exponent;
1099 /* If exponents are equal, do an unsigned bignum comparison of the
1102 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1106 return cmpGreaterThan;
1107 else if (compare < 0)
1113 /* Handle overflow. Sign is preserved. We either become infinity or
1114 the largest finite number. */
1116 APFloat::handleOverflow(roundingMode rounding_mode)
1119 if (rounding_mode == rmNearestTiesToEven ||
1120 rounding_mode == rmNearestTiesToAway ||
1121 (rounding_mode == rmTowardPositive && !sign) ||
1122 (rounding_mode == rmTowardNegative && sign)) {
1123 category = fcInfinity;
1124 return (opStatus) (opOverflow | opInexact);
1127 /* Otherwise we become the largest finite number. */
1128 category = fcNormal;
1129 exponent = semantics->maxExponent;
1130 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1131 semantics->precision);
1136 /* Returns TRUE if, when truncating the current number, with BIT the
1137 new LSB, with the given lost fraction and rounding mode, the result
1138 would need to be rounded away from zero (i.e., by increasing the
1139 signficand). This routine must work for fcZero of both signs, and
1140 fcNormal numbers. */
1142 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1143 lostFraction lost_fraction,
1144 unsigned int bit) const
1146 /* NaNs and infinities should not have lost fractions. */
1147 assert(category == fcNormal || category == fcZero);
1149 /* Current callers never pass this so we don't handle it. */
1150 assert(lost_fraction != lfExactlyZero);
1152 switch (rounding_mode) {
1153 case rmNearestTiesToAway:
1154 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1156 case rmNearestTiesToEven:
1157 if (lost_fraction == lfMoreThanHalf)
1160 /* Our zeroes don't have a significand to test. */
1161 if (lost_fraction == lfExactlyHalf && category != fcZero)
1162 return APInt::tcExtractBit(significandParts(), bit);
1169 case rmTowardPositive:
1170 return sign == false;
1172 case rmTowardNegative:
1173 return sign == true;
1175 llvm_unreachable("Invalid rounding mode found");
1179 APFloat::normalize(roundingMode rounding_mode,
1180 lostFraction lost_fraction)
1182 unsigned int omsb; /* One, not zero, based MSB. */
1185 if (category != fcNormal)
1188 /* Before rounding normalize the exponent of fcNormal numbers. */
1189 omsb = significandMSB() + 1;
1192 /* OMSB is numbered from 1. We want to place it in the integer
1193 bit numbered PRECISION if possible, with a compensating change in
1195 exponentChange = omsb - semantics->precision;
1197 /* If the resulting exponent is too high, overflow according to
1198 the rounding mode. */
1199 if (exponent + exponentChange > semantics->maxExponent)
1200 return handleOverflow(rounding_mode);
1202 /* Subnormal numbers have exponent minExponent, and their MSB
1203 is forced based on that. */
1204 if (exponent + exponentChange < semantics->minExponent)
1205 exponentChange = semantics->minExponent - exponent;
1207 /* Shifting left is easy as we don't lose precision. */
1208 if (exponentChange < 0) {
1209 assert(lost_fraction == lfExactlyZero);
1211 shiftSignificandLeft(-exponentChange);
1216 if (exponentChange > 0) {
1219 /* Shift right and capture any new lost fraction. */
1220 lf = shiftSignificandRight(exponentChange);
1222 lost_fraction = combineLostFractions(lf, lost_fraction);
1224 /* Keep OMSB up-to-date. */
1225 if (omsb > (unsigned) exponentChange)
1226 omsb -= exponentChange;
1232 /* Now round the number according to rounding_mode given the lost
1235 /* As specified in IEEE 754, since we do not trap we do not report
1236 underflow for exact results. */
1237 if (lost_fraction == lfExactlyZero) {
1238 /* Canonicalize zeroes. */
1245 /* Increment the significand if we're rounding away from zero. */
1246 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1248 exponent = semantics->minExponent;
1250 incrementSignificand();
1251 omsb = significandMSB() + 1;
1253 /* Did the significand increment overflow? */
1254 if (omsb == (unsigned) semantics->precision + 1) {
1255 /* Renormalize by incrementing the exponent and shifting our
1256 significand right one. However if we already have the
1257 maximum exponent we overflow to infinity. */
1258 if (exponent == semantics->maxExponent) {
1259 category = fcInfinity;
1261 return (opStatus) (opOverflow | opInexact);
1264 shiftSignificandRight(1);
1270 /* The normal case - we were and are not denormal, and any
1271 significand increment above didn't overflow. */
1272 if (omsb == semantics->precision)
1275 /* We have a non-zero denormal. */
1276 assert(omsb < semantics->precision);
1278 /* Canonicalize zeroes. */
1282 /* The fcZero case is a denormal that underflowed to zero. */
1283 return (opStatus) (opUnderflow | opInexact);
1287 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1289 switch (convolve(category, rhs.category)) {
1291 llvm_unreachable(0);
1293 case convolve(fcNaN, fcZero):
1294 case convolve(fcNaN, fcNormal):
1295 case convolve(fcNaN, fcInfinity):
1296 case convolve(fcNaN, fcNaN):
1297 case convolve(fcNormal, fcZero):
1298 case convolve(fcInfinity, fcNormal):
1299 case convolve(fcInfinity, fcZero):
1302 case convolve(fcZero, fcNaN):
1303 case convolve(fcNormal, fcNaN):
1304 case convolve(fcInfinity, fcNaN):
1306 copySignificand(rhs);
1309 case convolve(fcNormal, fcInfinity):
1310 case convolve(fcZero, fcInfinity):
1311 category = fcInfinity;
1312 sign = rhs.sign ^ subtract;
1315 case convolve(fcZero, fcNormal):
1317 sign = rhs.sign ^ subtract;
1320 case convolve(fcZero, fcZero):
1321 /* Sign depends on rounding mode; handled by caller. */
1324 case convolve(fcInfinity, fcInfinity):
1325 /* Differently signed infinities can only be validly
1327 if (((sign ^ rhs.sign)!=0) != subtract) {
1334 case convolve(fcNormal, fcNormal):
1339 /* Add or subtract two normal numbers. */
1341 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1344 lostFraction lost_fraction;
1347 /* Determine if the operation on the absolute values is effectively
1348 an addition or subtraction. */
1349 subtract ^= (sign ^ rhs.sign) ? true : false;
1351 /* Are we bigger exponent-wise than the RHS? */
1352 bits = exponent - rhs.exponent;
1354 /* Subtraction is more subtle than one might naively expect. */
1356 APFloat temp_rhs(rhs);
1360 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1361 lost_fraction = lfExactlyZero;
1362 } else if (bits > 0) {
1363 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1364 shiftSignificandLeft(1);
1367 lost_fraction = shiftSignificandRight(-bits - 1);
1368 temp_rhs.shiftSignificandLeft(1);
1373 carry = temp_rhs.subtractSignificand
1374 (*this, lost_fraction != lfExactlyZero);
1375 copySignificand(temp_rhs);
1378 carry = subtractSignificand
1379 (temp_rhs, lost_fraction != lfExactlyZero);
1382 /* Invert the lost fraction - it was on the RHS and
1384 if (lost_fraction == lfLessThanHalf)
1385 lost_fraction = lfMoreThanHalf;
1386 else if (lost_fraction == lfMoreThanHalf)
1387 lost_fraction = lfLessThanHalf;
1389 /* The code above is intended to ensure that no borrow is
1395 APFloat temp_rhs(rhs);
1397 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1398 carry = addSignificand(temp_rhs);
1400 lost_fraction = shiftSignificandRight(-bits);
1401 carry = addSignificand(rhs);
1404 /* We have a guard bit; generating a carry cannot happen. */
1409 return lost_fraction;
1413 APFloat::multiplySpecials(const APFloat &rhs)
1415 switch (convolve(category, rhs.category)) {
1417 llvm_unreachable(0);
1419 case convolve(fcNaN, fcZero):
1420 case convolve(fcNaN, fcNormal):
1421 case convolve(fcNaN, fcInfinity):
1422 case convolve(fcNaN, fcNaN):
1425 case convolve(fcZero, fcNaN):
1426 case convolve(fcNormal, fcNaN):
1427 case convolve(fcInfinity, fcNaN):
1429 copySignificand(rhs);
1432 case convolve(fcNormal, fcInfinity):
1433 case convolve(fcInfinity, fcNormal):
1434 case convolve(fcInfinity, fcInfinity):
1435 category = fcInfinity;
1438 case convolve(fcZero, fcNormal):
1439 case convolve(fcNormal, fcZero):
1440 case convolve(fcZero, fcZero):
1444 case convolve(fcZero, fcInfinity):
1445 case convolve(fcInfinity, fcZero):
1449 case convolve(fcNormal, fcNormal):
1455 APFloat::divideSpecials(const APFloat &rhs)
1457 switch (convolve(category, rhs.category)) {
1459 llvm_unreachable(0);
1461 case convolve(fcNaN, fcZero):
1462 case convolve(fcNaN, fcNormal):
1463 case convolve(fcNaN, fcInfinity):
1464 case convolve(fcNaN, fcNaN):
1465 case convolve(fcInfinity, fcZero):
1466 case convolve(fcInfinity, fcNormal):
1467 case convolve(fcZero, fcInfinity):
1468 case convolve(fcZero, fcNormal):
1471 case convolve(fcZero, fcNaN):
1472 case convolve(fcNormal, fcNaN):
1473 case convolve(fcInfinity, fcNaN):
1475 copySignificand(rhs);
1478 case convolve(fcNormal, fcInfinity):
1482 case convolve(fcNormal, fcZero):
1483 category = fcInfinity;
1486 case convolve(fcInfinity, fcInfinity):
1487 case convolve(fcZero, fcZero):
1491 case convolve(fcNormal, fcNormal):
1497 APFloat::modSpecials(const APFloat &rhs)
1499 switch (convolve(category, rhs.category)) {
1501 llvm_unreachable(0);
1503 case convolve(fcNaN, fcZero):
1504 case convolve(fcNaN, fcNormal):
1505 case convolve(fcNaN, fcInfinity):
1506 case convolve(fcNaN, fcNaN):
1507 case convolve(fcZero, fcInfinity):
1508 case convolve(fcZero, fcNormal):
1509 case convolve(fcNormal, fcInfinity):
1512 case convolve(fcZero, fcNaN):
1513 case convolve(fcNormal, fcNaN):
1514 case convolve(fcInfinity, fcNaN):
1516 copySignificand(rhs);
1519 case convolve(fcNormal, fcZero):
1520 case convolve(fcInfinity, fcZero):
1521 case convolve(fcInfinity, fcNormal):
1522 case convolve(fcInfinity, fcInfinity):
1523 case convolve(fcZero, fcZero):
1527 case convolve(fcNormal, fcNormal):
1534 APFloat::changeSign()
1536 /* Look mummy, this one's easy. */
1541 APFloat::clearSign()
1543 /* So is this one. */
1548 APFloat::copySign(const APFloat &rhs)
1554 /* Normalized addition or subtraction. */
1556 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1561 assertArithmeticOK(*semantics);
1563 fs = addOrSubtractSpecials(rhs, subtract);
1565 /* This return code means it was not a simple case. */
1566 if (fs == opDivByZero) {
1567 lostFraction lost_fraction;
1569 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1570 fs = normalize(rounding_mode, lost_fraction);
1572 /* Can only be zero if we lost no fraction. */
1573 assert(category != fcZero || lost_fraction == lfExactlyZero);
1576 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1577 positive zero unless rounding to minus infinity, except that
1578 adding two like-signed zeroes gives that zero. */
1579 if (category == fcZero) {
1580 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1581 sign = (rounding_mode == rmTowardNegative);
1587 /* Normalized addition. */
1589 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1591 return addOrSubtract(rhs, rounding_mode, false);
1594 /* Normalized subtraction. */
1596 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1598 return addOrSubtract(rhs, rounding_mode, true);
1601 /* Normalized multiply. */
1603 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1607 assertArithmeticOK(*semantics);
1609 fs = multiplySpecials(rhs);
1611 if (category == fcNormal) {
1612 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1613 fs = normalize(rounding_mode, lost_fraction);
1614 if (lost_fraction != lfExactlyZero)
1615 fs = (opStatus) (fs | opInexact);
1621 /* Normalized divide. */
1623 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1627 assertArithmeticOK(*semantics);
1629 fs = divideSpecials(rhs);
1631 if (category == fcNormal) {
1632 lostFraction lost_fraction = divideSignificand(rhs);
1633 fs = normalize(rounding_mode, lost_fraction);
1634 if (lost_fraction != lfExactlyZero)
1635 fs = (opStatus) (fs | opInexact);
1641 /* Normalized remainder. This is not currently correct in all cases. */
1643 APFloat::remainder(const APFloat &rhs)
1647 unsigned int origSign = sign;
1649 assertArithmeticOK(*semantics);
1650 fs = V.divide(rhs, rmNearestTiesToEven);
1651 if (fs == opDivByZero)
1654 int parts = partCount();
1655 integerPart *x = new integerPart[parts];
1657 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1658 rmNearestTiesToEven, &ignored);
1659 if (fs==opInvalidOp)
1662 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1663 rmNearestTiesToEven);
1664 assert(fs==opOK); // should always work
1666 fs = V.multiply(rhs, rmNearestTiesToEven);
1667 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1669 fs = subtract(V, rmNearestTiesToEven);
1670 assert(fs==opOK || fs==opInexact); // likewise
1673 sign = origSign; // IEEE754 requires this
1678 /* Normalized llvm frem (C fmod).
1679 This is not currently correct in all cases. */
1681 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1684 assertArithmeticOK(*semantics);
1685 fs = modSpecials(rhs);
1687 if (category == fcNormal && rhs.category == fcNormal) {
1689 unsigned int origSign = sign;
1691 fs = V.divide(rhs, rmNearestTiesToEven);
1692 if (fs == opDivByZero)
1695 int parts = partCount();
1696 integerPart *x = new integerPart[parts];
1698 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1699 rmTowardZero, &ignored);
1700 if (fs==opInvalidOp)
1703 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1704 rmNearestTiesToEven);
1705 assert(fs==opOK); // should always work
1707 fs = V.multiply(rhs, rounding_mode);
1708 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1710 fs = subtract(V, rounding_mode);
1711 assert(fs==opOK || fs==opInexact); // likewise
1714 sign = origSign; // IEEE754 requires this
1720 /* Normalized fused-multiply-add. */
1722 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1723 const APFloat &addend,
1724 roundingMode rounding_mode)
1728 assertArithmeticOK(*semantics);
1730 /* Post-multiplication sign, before addition. */
1731 sign ^= multiplicand.sign;
1733 /* If and only if all arguments are normal do we need to do an
1734 extended-precision calculation. */
1735 if (category == fcNormal &&
1736 multiplicand.category == fcNormal &&
1737 addend.category == fcNormal) {
1738 lostFraction lost_fraction;
1740 lost_fraction = multiplySignificand(multiplicand, &addend);
1741 fs = normalize(rounding_mode, lost_fraction);
1742 if (lost_fraction != lfExactlyZero)
1743 fs = (opStatus) (fs | opInexact);
1745 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1746 positive zero unless rounding to minus infinity, except that
1747 adding two like-signed zeroes gives that zero. */
1748 if (category == fcZero && sign != addend.sign)
1749 sign = (rounding_mode == rmTowardNegative);
1751 fs = multiplySpecials(multiplicand);
1753 /* FS can only be opOK or opInvalidOp. There is no more work
1754 to do in the latter case. The IEEE-754R standard says it is
1755 implementation-defined in this case whether, if ADDEND is a
1756 quiet NaN, we raise invalid op; this implementation does so.
1758 If we need to do the addition we can do so with normal
1761 fs = addOrSubtract(addend, rounding_mode, false);
1767 /* Comparison requires normalized numbers. */
1769 APFloat::compare(const APFloat &rhs) const
1773 assertArithmeticOK(*semantics);
1774 assert(semantics == rhs.semantics);
1776 switch (convolve(category, rhs.category)) {
1778 llvm_unreachable(0);
1780 case convolve(fcNaN, fcZero):
1781 case convolve(fcNaN, fcNormal):
1782 case convolve(fcNaN, fcInfinity):
1783 case convolve(fcNaN, fcNaN):
1784 case convolve(fcZero, fcNaN):
1785 case convolve(fcNormal, fcNaN):
1786 case convolve(fcInfinity, fcNaN):
1787 return cmpUnordered;
1789 case convolve(fcInfinity, fcNormal):
1790 case convolve(fcInfinity, fcZero):
1791 case convolve(fcNormal, fcZero):
1795 return cmpGreaterThan;
1797 case convolve(fcNormal, fcInfinity):
1798 case convolve(fcZero, fcInfinity):
1799 case convolve(fcZero, fcNormal):
1801 return cmpGreaterThan;
1805 case convolve(fcInfinity, fcInfinity):
1806 if (sign == rhs.sign)
1811 return cmpGreaterThan;
1813 case convolve(fcZero, fcZero):
1816 case convolve(fcNormal, fcNormal):
1820 /* Two normal numbers. Do they have the same sign? */
1821 if (sign != rhs.sign) {
1823 result = cmpLessThan;
1825 result = cmpGreaterThan;
1827 /* Compare absolute values; invert result if negative. */
1828 result = compareAbsoluteValue(rhs);
1831 if (result == cmpLessThan)
1832 result = cmpGreaterThan;
1833 else if (result == cmpGreaterThan)
1834 result = cmpLessThan;
1841 /// APFloat::convert - convert a value of one floating point type to another.
1842 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1843 /// records whether the transformation lost information, i.e. whether
1844 /// converting the result back to the original type will produce the
1845 /// original value (this is almost the same as return value==fsOK, but there
1846 /// are edge cases where this is not so).
1849 APFloat::convert(const fltSemantics &toSemantics,
1850 roundingMode rounding_mode, bool *losesInfo)
1852 lostFraction lostFraction;
1853 unsigned int newPartCount, oldPartCount;
1856 const fltSemantics &fromSemantics = *semantics;
1858 assertArithmeticOK(fromSemantics);
1859 assertArithmeticOK(toSemantics);
1860 lostFraction = lfExactlyZero;
1861 newPartCount = partCountForBits(toSemantics.precision + 1);
1862 oldPartCount = partCount();
1863 shift = toSemantics.precision - fromSemantics.precision;
1865 bool X86SpecialNan = false;
1866 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1867 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1868 (!(*significandParts() & 0x8000000000000000ULL) ||
1869 !(*significandParts() & 0x4000000000000000ULL))) {
1870 // x86 has some unusual NaNs which cannot be represented in any other
1871 // format; note them here.
1872 X86SpecialNan = true;
1875 // If this is a truncation, perform the shift before we narrow the storage.
1876 if (shift < 0 && (category==fcNormal || category==fcNaN))
1877 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1879 // Fix the storage so it can hold to new value.
1880 if (newPartCount > oldPartCount) {
1881 // The new type requires more storage; make it available.
1882 integerPart *newParts;
1883 newParts = new integerPart[newPartCount];
1884 APInt::tcSet(newParts, 0, newPartCount);
1885 if (category==fcNormal || category==fcNaN)
1886 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1888 significand.parts = newParts;
1889 } else if (newPartCount == 1 && oldPartCount != 1) {
1890 // Switch to built-in storage for a single part.
1891 integerPart newPart = 0;
1892 if (category==fcNormal || category==fcNaN)
1893 newPart = significandParts()[0];
1895 significand.part = newPart;
1898 // Now that we have the right storage, switch the semantics.
1899 semantics = &toSemantics;
1901 // If this is an extension, perform the shift now that the storage is
1903 if (shift > 0 && (category==fcNormal || category==fcNaN))
1904 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1906 if (category == fcNormal) {
1907 fs = normalize(rounding_mode, lostFraction);
1908 *losesInfo = (fs != opOK);
1909 } else if (category == fcNaN) {
1910 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1911 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1912 // does not give you back the same bits. This is dubious, and we
1913 // don't currently do it. You're really supposed to get
1914 // an invalid operation signal at runtime, but nobody does that.
1924 /* Convert a floating point number to an integer according to the
1925 rounding mode. If the rounded integer value is out of range this
1926 returns an invalid operation exception and the contents of the
1927 destination parts are unspecified. If the rounded value is in
1928 range but the floating point number is not the exact integer, the C
1929 standard doesn't require an inexact exception to be raised. IEEE
1930 854 does require it so we do that.
1932 Note that for conversions to integer type the C standard requires
1933 round-to-zero to always be used. */
1935 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1937 roundingMode rounding_mode,
1938 bool *isExact) const
1940 lostFraction lost_fraction;
1941 const integerPart *src;
1942 unsigned int dstPartsCount, truncatedBits;
1944 assertArithmeticOK(*semantics);
1948 /* Handle the three special cases first. */
1949 if (category == fcInfinity || category == fcNaN)
1952 dstPartsCount = partCountForBits(width);
1954 if (category == fcZero) {
1955 APInt::tcSet(parts, 0, dstPartsCount);
1956 // Negative zero can't be represented as an int.
1961 src = significandParts();
1963 /* Step 1: place our absolute value, with any fraction truncated, in
1966 /* Our absolute value is less than one; truncate everything. */
1967 APInt::tcSet(parts, 0, dstPartsCount);
1968 /* For exponent -1 the integer bit represents .5, look at that.
1969 For smaller exponents leftmost truncated bit is 0. */
1970 truncatedBits = semantics->precision -1U - exponent;
1972 /* We want the most significant (exponent + 1) bits; the rest are
1974 unsigned int bits = exponent + 1U;
1976 /* Hopelessly large in magnitude? */
1980 if (bits < semantics->precision) {
1981 /* We truncate (semantics->precision - bits) bits. */
1982 truncatedBits = semantics->precision - bits;
1983 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1985 /* We want at least as many bits as are available. */
1986 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1987 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1992 /* Step 2: work out any lost fraction, and increment the absolute
1993 value if we would round away from zero. */
1994 if (truncatedBits) {
1995 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1997 if (lost_fraction != lfExactlyZero &&
1998 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1999 if (APInt::tcIncrement(parts, dstPartsCount))
2000 return opInvalidOp; /* Overflow. */
2003 lost_fraction = lfExactlyZero;
2006 /* Step 3: check if we fit in the destination. */
2007 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2011 /* Negative numbers cannot be represented as unsigned. */
2015 /* It takes omsb bits to represent the unsigned integer value.
2016 We lose a bit for the sign, but care is needed as the
2017 maximally negative integer is a special case. */
2018 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2021 /* This case can happen because of rounding. */
2026 APInt::tcNegate (parts, dstPartsCount);
2028 if (omsb >= width + !isSigned)
2032 if (lost_fraction == lfExactlyZero) {
2039 /* Same as convertToSignExtendedInteger, except we provide
2040 deterministic values in case of an invalid operation exception,
2041 namely zero for NaNs and the minimal or maximal value respectively
2042 for underflow or overflow.
2043 The *isExact output tells whether the result is exact, in the sense
2044 that converting it back to the original floating point type produces
2045 the original value. This is almost equivalent to result==opOK,
2046 except for negative zeroes.
2049 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2051 roundingMode rounding_mode, bool *isExact) const
2055 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2058 if (fs == opInvalidOp) {
2059 unsigned int bits, dstPartsCount;
2061 dstPartsCount = partCountForBits(width);
2063 if (category == fcNaN)
2068 bits = width - isSigned;
2070 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2071 if (sign && isSigned)
2072 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2078 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2079 an APSInt, whose initial bit-width and signed-ness are used to determine the
2080 precision of the conversion.
2083 APFloat::convertToInteger(APSInt &result,
2084 roundingMode rounding_mode, bool *isExact) const
2086 unsigned bitWidth = result.getBitWidth();
2087 SmallVector<uint64_t, 4> parts(result.getNumWords());
2088 opStatus status = convertToInteger(
2089 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2090 // Keeps the original signed-ness.
2091 result = APInt(bitWidth, parts);
2095 /* Convert an unsigned integer SRC to a floating point number,
2096 rounding according to ROUNDING_MODE. The sign of the floating
2097 point number is not modified. */
2099 APFloat::convertFromUnsignedParts(const integerPart *src,
2100 unsigned int srcCount,
2101 roundingMode rounding_mode)
2103 unsigned int omsb, precision, dstCount;
2105 lostFraction lost_fraction;
2107 assertArithmeticOK(*semantics);
2108 category = fcNormal;
2109 omsb = APInt::tcMSB(src, srcCount) + 1;
2110 dst = significandParts();
2111 dstCount = partCount();
2112 precision = semantics->precision;
2114 /* We want the most significant PRECISION bits of SRC. There may not
2115 be that many; extract what we can. */
2116 if (precision <= omsb) {
2117 exponent = omsb - 1;
2118 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2120 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2122 exponent = precision - 1;
2123 lost_fraction = lfExactlyZero;
2124 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2127 return normalize(rounding_mode, lost_fraction);
2131 APFloat::convertFromAPInt(const APInt &Val,
2133 roundingMode rounding_mode)
2135 unsigned int partCount = Val.getNumWords();
2139 if (isSigned && api.isNegative()) {
2144 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2147 /* Convert a two's complement integer SRC to a floating point number,
2148 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2149 integer is signed, in which case it must be sign-extended. */
2151 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2152 unsigned int srcCount,
2154 roundingMode rounding_mode)
2158 assertArithmeticOK(*semantics);
2160 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2163 /* If we're signed and negative negate a copy. */
2165 copy = new integerPart[srcCount];
2166 APInt::tcAssign(copy, src, srcCount);
2167 APInt::tcNegate(copy, srcCount);
2168 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2172 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2178 /* FIXME: should this just take a const APInt reference? */
2180 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2181 unsigned int width, bool isSigned,
2182 roundingMode rounding_mode)
2184 unsigned int partCount = partCountForBits(width);
2185 APInt api = APInt(width, makeArrayRef(parts, partCount));
2188 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2193 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2197 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2199 lostFraction lost_fraction = lfExactlyZero;
2200 integerPart *significand;
2201 unsigned int bitPos, partsCount;
2202 StringRef::iterator dot, firstSignificantDigit;
2206 category = fcNormal;
2208 significand = significandParts();
2209 partsCount = partCount();
2210 bitPos = partsCount * integerPartWidth;
2212 /* Skip leading zeroes and any (hexa)decimal point. */
2213 StringRef::iterator begin = s.begin();
2214 StringRef::iterator end = s.end();
2215 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2216 firstSignificantDigit = p;
2219 integerPart hex_value;
2222 assert(dot == end && "String contains multiple dots");
2229 hex_value = hexDigitValue(*p);
2230 if (hex_value == -1U) {
2239 /* Store the number whilst 4-bit nibbles remain. */
2242 hex_value <<= bitPos % integerPartWidth;
2243 significand[bitPos / integerPartWidth] |= hex_value;
2245 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2246 while (p != end && hexDigitValue(*p) != -1U)
2253 /* Hex floats require an exponent but not a hexadecimal point. */
2254 assert(p != end && "Hex strings require an exponent");
2255 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2256 assert(p != begin && "Significand has no digits");
2257 assert((dot == end || p - begin != 1) && "Significand has no digits");
2259 /* Ignore the exponent if we are zero. */
2260 if (p != firstSignificantDigit) {
2263 /* Implicit hexadecimal point? */
2267 /* Calculate the exponent adjustment implicit in the number of
2268 significant digits. */
2269 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2270 if (expAdjustment < 0)
2272 expAdjustment = expAdjustment * 4 - 1;
2274 /* Adjust for writing the significand starting at the most
2275 significant nibble. */
2276 expAdjustment += semantics->precision;
2277 expAdjustment -= partsCount * integerPartWidth;
2279 /* Adjust for the given exponent. */
2280 exponent = totalExponent(p + 1, end, expAdjustment);
2283 return normalize(rounding_mode, lost_fraction);
2287 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2288 unsigned sigPartCount, int exp,
2289 roundingMode rounding_mode)
2291 unsigned int parts, pow5PartCount;
2292 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2293 integerPart pow5Parts[maxPowerOfFiveParts];
2296 isNearest = (rounding_mode == rmNearestTiesToEven ||
2297 rounding_mode == rmNearestTiesToAway);
2299 parts = partCountForBits(semantics->precision + 11);
2301 /* Calculate pow(5, abs(exp)). */
2302 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2304 for (;; parts *= 2) {
2305 opStatus sigStatus, powStatus;
2306 unsigned int excessPrecision, truncatedBits;
2308 calcSemantics.precision = parts * integerPartWidth - 1;
2309 excessPrecision = calcSemantics.precision - semantics->precision;
2310 truncatedBits = excessPrecision;
2312 APFloat decSig(calcSemantics, fcZero, sign);
2313 APFloat pow5(calcSemantics, fcZero, false);
2315 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2316 rmNearestTiesToEven);
2317 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2318 rmNearestTiesToEven);
2319 /* Add exp, as 10^n = 5^n * 2^n. */
2320 decSig.exponent += exp;
2322 lostFraction calcLostFraction;
2323 integerPart HUerr, HUdistance;
2324 unsigned int powHUerr;
2327 /* multiplySignificand leaves the precision-th bit set to 1. */
2328 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2329 powHUerr = powStatus != opOK;
2331 calcLostFraction = decSig.divideSignificand(pow5);
2332 /* Denormal numbers have less precision. */
2333 if (decSig.exponent < semantics->minExponent) {
2334 excessPrecision += (semantics->minExponent - decSig.exponent);
2335 truncatedBits = excessPrecision;
2336 if (excessPrecision > calcSemantics.precision)
2337 excessPrecision = calcSemantics.precision;
2339 /* Extra half-ulp lost in reciprocal of exponent. */
2340 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2343 /* Both multiplySignificand and divideSignificand return the
2344 result with the integer bit set. */
2345 assert(APInt::tcExtractBit
2346 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2348 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2350 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2351 excessPrecision, isNearest);
2353 /* Are we guaranteed to round correctly if we truncate? */
2354 if (HUdistance >= HUerr) {
2355 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2356 calcSemantics.precision - excessPrecision,
2358 /* Take the exponent of decSig. If we tcExtract-ed less bits
2359 above we must adjust our exponent to compensate for the
2360 implicit right shift. */
2361 exponent = (decSig.exponent + semantics->precision
2362 - (calcSemantics.precision - excessPrecision));
2363 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2366 return normalize(rounding_mode, calcLostFraction);
2372 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2377 /* Scan the text. */
2378 StringRef::iterator p = str.begin();
2379 interpretDecimal(p, str.end(), &D);
2381 /* Handle the quick cases. First the case of no significant digits,
2382 i.e. zero, and then exponents that are obviously too large or too
2383 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2384 definitely overflows if
2386 (exp - 1) * L >= maxExponent
2388 and definitely underflows to zero where
2390 (exp + 1) * L <= minExponent - precision
2392 With integer arithmetic the tightest bounds for L are
2394 93/28 < L < 196/59 [ numerator <= 256 ]
2395 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2398 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2402 /* Check whether the normalized exponent is high enough to overflow
2403 max during the log-rebasing in the max-exponent check below. */
2404 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2405 fs = handleOverflow(rounding_mode);
2407 /* If it wasn't, then it also wasn't high enough to overflow max
2408 during the log-rebasing in the min-exponent check. Check that it
2409 won't overflow min in either check, then perform the min-exponent
2411 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2412 (D.normalizedExponent + 1) * 28738 <=
2413 8651 * (semantics->minExponent - (int) semantics->precision)) {
2414 /* Underflow to zero and round. */
2416 fs = normalize(rounding_mode, lfLessThanHalf);
2418 /* We can finally safely perform the max-exponent check. */
2419 } else if ((D.normalizedExponent - 1) * 42039
2420 >= 12655 * semantics->maxExponent) {
2421 /* Overflow and round. */
2422 fs = handleOverflow(rounding_mode);
2424 integerPart *decSignificand;
2425 unsigned int partCount;
2427 /* A tight upper bound on number of bits required to hold an
2428 N-digit decimal integer is N * 196 / 59. Allocate enough space
2429 to hold the full significand, and an extra part required by
2431 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2432 partCount = partCountForBits(1 + 196 * partCount / 59);
2433 decSignificand = new integerPart[partCount + 1];
2436 /* Convert to binary efficiently - we do almost all multiplication
2437 in an integerPart. When this would overflow do we do a single
2438 bignum multiplication, and then revert again to multiplication
2439 in an integerPart. */
2441 integerPart decValue, val, multiplier;
2449 if (p == str.end()) {
2453 decValue = decDigitValue(*p++);
2454 assert(decValue < 10U && "Invalid character in significand");
2456 val = val * 10 + decValue;
2457 /* The maximum number that can be multiplied by ten with any
2458 digit added without overflowing an integerPart. */
2459 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2461 /* Multiply out the current part. */
2462 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2463 partCount, partCount + 1, false);
2465 /* If we used another part (likely but not guaranteed), increase
2467 if (decSignificand[partCount])
2469 } while (p <= D.lastSigDigit);
2471 category = fcNormal;
2472 fs = roundSignificandWithExponent(decSignificand, partCount,
2473 D.exponent, rounding_mode);
2475 delete [] decSignificand;
2482 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2484 assertArithmeticOK(*semantics);
2485 assert(!str.empty() && "Invalid string length");
2487 /* Handle a leading minus sign. */
2488 StringRef::iterator p = str.begin();
2489 size_t slen = str.size();
2490 sign = *p == '-' ? 1 : 0;
2491 if (*p == '-' || *p == '+') {
2494 assert(slen && "String has no digits");
2497 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2498 assert(slen - 2 && "Invalid string");
2499 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2503 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2506 /* Write out a hexadecimal representation of the floating point value
2507 to DST, which must be of sufficient size, in the C99 form
2508 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2509 excluding the terminating NUL.
2511 If UPPERCASE, the output is in upper case, otherwise in lower case.
2513 HEXDIGITS digits appear altogether, rounding the value if
2514 necessary. If HEXDIGITS is 0, the minimal precision to display the
2515 number precisely is used instead. If nothing would appear after
2516 the decimal point it is suppressed.
2518 The decimal exponent is always printed and has at least one digit.
2519 Zero values display an exponent of zero. Infinities and NaNs
2520 appear as "infinity" or "nan" respectively.
2522 The above rules are as specified by C99. There is ambiguity about
2523 what the leading hexadecimal digit should be. This implementation
2524 uses whatever is necessary so that the exponent is displayed as
2525 stored. This implies the exponent will fall within the IEEE format
2526 range, and the leading hexadecimal digit will be 0 (for denormals),
2527 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2528 any other digits zero).
2531 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2532 bool upperCase, roundingMode rounding_mode) const
2536 assertArithmeticOK(*semantics);
2544 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2545 dst += sizeof infinityL - 1;
2549 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2550 dst += sizeof NaNU - 1;
2555 *dst++ = upperCase ? 'X': 'x';
2557 if (hexDigits > 1) {
2559 memset (dst, '0', hexDigits - 1);
2560 dst += hexDigits - 1;
2562 *dst++ = upperCase ? 'P': 'p';
2567 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2573 return static_cast<unsigned int>(dst - p);
2576 /* Does the hard work of outputting the correctly rounded hexadecimal
2577 form of a normal floating point number with the specified number of
2578 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2579 digits necessary to print the value precisely is output. */
2581 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2583 roundingMode rounding_mode) const
2585 unsigned int count, valueBits, shift, partsCount, outputDigits;
2586 const char *hexDigitChars;
2587 const integerPart *significand;
2592 *dst++ = upperCase ? 'X': 'x';
2595 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2597 significand = significandParts();
2598 partsCount = partCount();
2600 /* +3 because the first digit only uses the single integer bit, so
2601 we have 3 virtual zero most-significant-bits. */
2602 valueBits = semantics->precision + 3;
2603 shift = integerPartWidth - valueBits % integerPartWidth;
2605 /* The natural number of digits required ignoring trailing
2606 insignificant zeroes. */
2607 outputDigits = (valueBits - significandLSB () + 3) / 4;
2609 /* hexDigits of zero means use the required number for the
2610 precision. Otherwise, see if we are truncating. If we are,
2611 find out if we need to round away from zero. */
2613 if (hexDigits < outputDigits) {
2614 /* We are dropping non-zero bits, so need to check how to round.
2615 "bits" is the number of dropped bits. */
2617 lostFraction fraction;
2619 bits = valueBits - hexDigits * 4;
2620 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2621 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2623 outputDigits = hexDigits;
2626 /* Write the digits consecutively, and start writing in the location
2627 of the hexadecimal point. We move the most significant digit
2628 left and add the hexadecimal point later. */
2631 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2633 while (outputDigits && count) {
2636 /* Put the most significant integerPartWidth bits in "part". */
2637 if (--count == partsCount)
2638 part = 0; /* An imaginary higher zero part. */
2640 part = significand[count] << shift;
2643 part |= significand[count - 1] >> (integerPartWidth - shift);
2645 /* Convert as much of "part" to hexdigits as we can. */
2646 unsigned int curDigits = integerPartWidth / 4;
2648 if (curDigits > outputDigits)
2649 curDigits = outputDigits;
2650 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2651 outputDigits -= curDigits;
2657 /* Note that hexDigitChars has a trailing '0'. */
2660 *q = hexDigitChars[hexDigitValue (*q) + 1];
2661 } while (*q == '0');
2664 /* Add trailing zeroes. */
2665 memset (dst, '0', outputDigits);
2666 dst += outputDigits;
2669 /* Move the most significant digit to before the point, and if there
2670 is something after the decimal point add it. This must come
2671 after rounding above. */
2678 /* Finally output the exponent. */
2679 *dst++ = upperCase ? 'P': 'p';
2681 return writeSignedDecimal (dst, exponent);
2684 // For good performance it is desirable for different APFloats
2685 // to produce different integers.
2687 APFloat::getHashValue() const
2689 if (category==fcZero) return sign<<8 | semantics->precision ;
2690 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2691 else if (category==fcNaN) return 1<<10 | semantics->precision;
2693 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2694 const integerPart* p = significandParts();
2695 for (int i=partCount(); i>0; i--, p++)
2696 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2701 // Conversion from APFloat to/from host float/double. It may eventually be
2702 // possible to eliminate these and have everybody deal with APFloats, but that
2703 // will take a while. This approach will not easily extend to long double.
2704 // Current implementation requires integerPartWidth==64, which is correct at
2705 // the moment but could be made more general.
2707 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2708 // the actual IEEE respresentations. We compensate for that here.
2711 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2713 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2714 assert(partCount()==2);
2716 uint64_t myexponent, mysignificand;
2718 if (category==fcNormal) {
2719 myexponent = exponent+16383; //bias
2720 mysignificand = significandParts()[0];
2721 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2722 myexponent = 0; // denormal
2723 } else if (category==fcZero) {
2726 } else if (category==fcInfinity) {
2727 myexponent = 0x7fff;
2728 mysignificand = 0x8000000000000000ULL;
2730 assert(category == fcNaN && "Unknown category");
2731 myexponent = 0x7fff;
2732 mysignificand = significandParts()[0];
2736 words[0] = mysignificand;
2737 words[1] = ((uint64_t)(sign & 1) << 15) |
2738 (myexponent & 0x7fffLL);
2739 return APInt(80, words);
2743 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2745 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2746 assert(partCount()==2);
2748 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2750 if (category==fcNormal) {
2751 myexponent = exponent + 1023; //bias
2752 myexponent2 = exponent2 + 1023;
2753 mysignificand = significandParts()[0];
2754 mysignificand2 = significandParts()[1];
2755 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2756 myexponent = 0; // denormal
2757 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2758 myexponent2 = 0; // denormal
2759 } else if (category==fcZero) {
2764 } else if (category==fcInfinity) {
2770 assert(category == fcNaN && "Unknown category");
2772 mysignificand = significandParts()[0];
2773 myexponent2 = exponent2;
2774 mysignificand2 = significandParts()[1];
2778 words[0] = ((uint64_t)(sign & 1) << 63) |
2779 ((myexponent & 0x7ff) << 52) |
2780 (mysignificand & 0xfffffffffffffLL);
2781 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2782 ((myexponent2 & 0x7ff) << 52) |
2783 (mysignificand2 & 0xfffffffffffffLL);
2784 return APInt(128, words);
2788 APFloat::convertQuadrupleAPFloatToAPInt() const
2790 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2791 assert(partCount()==2);
2793 uint64_t myexponent, mysignificand, mysignificand2;
2795 if (category==fcNormal) {
2796 myexponent = exponent+16383; //bias
2797 mysignificand = significandParts()[0];
2798 mysignificand2 = significandParts()[1];
2799 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2800 myexponent = 0; // denormal
2801 } else if (category==fcZero) {
2803 mysignificand = mysignificand2 = 0;
2804 } else if (category==fcInfinity) {
2805 myexponent = 0x7fff;
2806 mysignificand = mysignificand2 = 0;
2808 assert(category == fcNaN && "Unknown category!");
2809 myexponent = 0x7fff;
2810 mysignificand = significandParts()[0];
2811 mysignificand2 = significandParts()[1];
2815 words[0] = mysignificand;
2816 words[1] = ((uint64_t)(sign & 1) << 63) |
2817 ((myexponent & 0x7fff) << 48) |
2818 (mysignificand2 & 0xffffffffffffLL);
2820 return APInt(128, words);
2824 APFloat::convertDoubleAPFloatToAPInt() const
2826 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2827 assert(partCount()==1);
2829 uint64_t myexponent, mysignificand;
2831 if (category==fcNormal) {
2832 myexponent = exponent+1023; //bias
2833 mysignificand = *significandParts();
2834 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2835 myexponent = 0; // denormal
2836 } else if (category==fcZero) {
2839 } else if (category==fcInfinity) {
2843 assert(category == fcNaN && "Unknown category!");
2845 mysignificand = *significandParts();
2848 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2849 ((myexponent & 0x7ff) << 52) |
2850 (mysignificand & 0xfffffffffffffLL))));
2854 APFloat::convertFloatAPFloatToAPInt() const
2856 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2857 assert(partCount()==1);
2859 uint32_t myexponent, mysignificand;
2861 if (category==fcNormal) {
2862 myexponent = exponent+127; //bias
2863 mysignificand = (uint32_t)*significandParts();
2864 if (myexponent == 1 && !(mysignificand & 0x800000))
2865 myexponent = 0; // denormal
2866 } else if (category==fcZero) {
2869 } else if (category==fcInfinity) {
2873 assert(category == fcNaN && "Unknown category!");
2875 mysignificand = (uint32_t)*significandParts();
2878 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2879 (mysignificand & 0x7fffff)));
2883 APFloat::convertHalfAPFloatToAPInt() const
2885 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2886 assert(partCount()==1);
2888 uint32_t myexponent, mysignificand;
2890 if (category==fcNormal) {
2891 myexponent = exponent+15; //bias
2892 mysignificand = (uint32_t)*significandParts();
2893 if (myexponent == 1 && !(mysignificand & 0x400))
2894 myexponent = 0; // denormal
2895 } else if (category==fcZero) {
2898 } else if (category==fcInfinity) {
2902 assert(category == fcNaN && "Unknown category!");
2904 mysignificand = (uint32_t)*significandParts();
2907 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2908 (mysignificand & 0x3ff)));
2911 // This function creates an APInt that is just a bit map of the floating
2912 // point constant as it would appear in memory. It is not a conversion,
2913 // and treating the result as a normal integer is unlikely to be useful.
2916 APFloat::bitcastToAPInt() const
2918 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2919 return convertHalfAPFloatToAPInt();
2921 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2922 return convertFloatAPFloatToAPInt();
2924 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2925 return convertDoubleAPFloatToAPInt();
2927 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2928 return convertQuadrupleAPFloatToAPInt();
2930 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2931 return convertPPCDoubleDoubleAPFloatToAPInt();
2933 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2935 return convertF80LongDoubleAPFloatToAPInt();
2939 APFloat::convertToFloat() const
2941 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2942 "Float semantics are not IEEEsingle");
2943 APInt api = bitcastToAPInt();
2944 return api.bitsToFloat();
2948 APFloat::convertToDouble() const
2950 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2951 "Float semantics are not IEEEdouble");
2952 APInt api = bitcastToAPInt();
2953 return api.bitsToDouble();
2956 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2957 /// does not support these bit patterns:
2958 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2959 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2960 /// exponent = 0, integer bit 1 ("pseudodenormal")
2961 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2962 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2964 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2966 assert(api.getBitWidth()==80);
2967 uint64_t i1 = api.getRawData()[0];
2968 uint64_t i2 = api.getRawData()[1];
2969 uint64_t myexponent = (i2 & 0x7fff);
2970 uint64_t mysignificand = i1;
2972 initialize(&APFloat::x87DoubleExtended);
2973 assert(partCount()==2);
2975 sign = static_cast<unsigned int>(i2>>15);
2976 if (myexponent==0 && mysignificand==0) {
2977 // exponent, significand meaningless
2979 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2980 // exponent, significand meaningless
2981 category = fcInfinity;
2982 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2983 // exponent meaningless
2985 significandParts()[0] = mysignificand;
2986 significandParts()[1] = 0;
2988 category = fcNormal;
2989 exponent = myexponent - 16383;
2990 significandParts()[0] = mysignificand;
2991 significandParts()[1] = 0;
2992 if (myexponent==0) // denormal
2998 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3000 assert(api.getBitWidth()==128);
3001 uint64_t i1 = api.getRawData()[0];
3002 uint64_t i2 = api.getRawData()[1];
3003 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3004 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3005 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3006 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3008 initialize(&APFloat::PPCDoubleDouble);
3009 assert(partCount()==2);
3011 sign = static_cast<unsigned int>(i1>>63);
3012 sign2 = static_cast<unsigned int>(i2>>63);
3013 if (myexponent==0 && mysignificand==0) {
3014 // exponent, significand meaningless
3015 // exponent2 and significand2 are required to be 0; we don't check
3017 } else if (myexponent==0x7ff && mysignificand==0) {
3018 // exponent, significand meaningless
3019 // exponent2 and significand2 are required to be 0; we don't check
3020 category = fcInfinity;
3021 } else if (myexponent==0x7ff && mysignificand!=0) {
3022 // exponent meaningless. So is the whole second word, but keep it
3025 exponent2 = myexponent2;
3026 significandParts()[0] = mysignificand;
3027 significandParts()[1] = mysignificand2;
3029 category = fcNormal;
3030 // Note there is no category2; the second word is treated as if it is
3031 // fcNormal, although it might be something else considered by itself.
3032 exponent = myexponent - 1023;
3033 exponent2 = myexponent2 - 1023;
3034 significandParts()[0] = mysignificand;
3035 significandParts()[1] = mysignificand2;
3036 if (myexponent==0) // denormal
3039 significandParts()[0] |= 0x10000000000000LL; // integer bit
3043 significandParts()[1] |= 0x10000000000000LL; // integer bit
3048 APFloat::initFromQuadrupleAPInt(const APInt &api)
3050 assert(api.getBitWidth()==128);
3051 uint64_t i1 = api.getRawData()[0];
3052 uint64_t i2 = api.getRawData()[1];
3053 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3054 uint64_t mysignificand = i1;
3055 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3057 initialize(&APFloat::IEEEquad);
3058 assert(partCount()==2);
3060 sign = static_cast<unsigned int>(i2>>63);
3061 if (myexponent==0 &&
3062 (mysignificand==0 && mysignificand2==0)) {
3063 // exponent, significand meaningless
3065 } else if (myexponent==0x7fff &&
3066 (mysignificand==0 && mysignificand2==0)) {
3067 // exponent, significand meaningless
3068 category = fcInfinity;
3069 } else if (myexponent==0x7fff &&
3070 (mysignificand!=0 || mysignificand2 !=0)) {
3071 // exponent meaningless
3073 significandParts()[0] = mysignificand;
3074 significandParts()[1] = mysignificand2;
3076 category = fcNormal;
3077 exponent = myexponent - 16383;
3078 significandParts()[0] = mysignificand;
3079 significandParts()[1] = mysignificand2;
3080 if (myexponent==0) // denormal
3083 significandParts()[1] |= 0x1000000000000LL; // integer bit
3088 APFloat::initFromDoubleAPInt(const APInt &api)
3090 assert(api.getBitWidth()==64);
3091 uint64_t i = *api.getRawData();
3092 uint64_t myexponent = (i >> 52) & 0x7ff;
3093 uint64_t mysignificand = i & 0xfffffffffffffLL;
3095 initialize(&APFloat::IEEEdouble);
3096 assert(partCount()==1);
3098 sign = static_cast<unsigned int>(i>>63);
3099 if (myexponent==0 && mysignificand==0) {
3100 // exponent, significand meaningless
3102 } else if (myexponent==0x7ff && mysignificand==0) {
3103 // exponent, significand meaningless
3104 category = fcInfinity;
3105 } else if (myexponent==0x7ff && mysignificand!=0) {
3106 // exponent meaningless
3108 *significandParts() = mysignificand;
3110 category = fcNormal;
3111 exponent = myexponent - 1023;
3112 *significandParts() = mysignificand;
3113 if (myexponent==0) // denormal
3116 *significandParts() |= 0x10000000000000LL; // integer bit
3121 APFloat::initFromFloatAPInt(const APInt & api)
3123 assert(api.getBitWidth()==32);
3124 uint32_t i = (uint32_t)*api.getRawData();
3125 uint32_t myexponent = (i >> 23) & 0xff;
3126 uint32_t mysignificand = i & 0x7fffff;
3128 initialize(&APFloat::IEEEsingle);
3129 assert(partCount()==1);
3132 if (myexponent==0 && mysignificand==0) {
3133 // exponent, significand meaningless
3135 } else if (myexponent==0xff && mysignificand==0) {
3136 // exponent, significand meaningless
3137 category = fcInfinity;
3138 } else if (myexponent==0xff && mysignificand!=0) {
3139 // sign, exponent, significand meaningless
3141 *significandParts() = mysignificand;
3143 category = fcNormal;
3144 exponent = myexponent - 127; //bias
3145 *significandParts() = mysignificand;
3146 if (myexponent==0) // denormal
3149 *significandParts() |= 0x800000; // integer bit
3154 APFloat::initFromHalfAPInt(const APInt & api)
3156 assert(api.getBitWidth()==16);
3157 uint32_t i = (uint32_t)*api.getRawData();
3158 uint32_t myexponent = (i >> 10) & 0x1f;
3159 uint32_t mysignificand = i & 0x3ff;
3161 initialize(&APFloat::IEEEhalf);
3162 assert(partCount()==1);
3165 if (myexponent==0 && mysignificand==0) {
3166 // exponent, significand meaningless
3168 } else if (myexponent==0x1f && mysignificand==0) {
3169 // exponent, significand meaningless
3170 category = fcInfinity;
3171 } else if (myexponent==0x1f && mysignificand!=0) {
3172 // sign, exponent, significand meaningless
3174 *significandParts() = mysignificand;
3176 category = fcNormal;
3177 exponent = myexponent - 15; //bias
3178 *significandParts() = mysignificand;
3179 if (myexponent==0) // denormal
3182 *significandParts() |= 0x400; // integer bit
3186 /// Treat api as containing the bits of a floating point number. Currently
3187 /// we infer the floating point type from the size of the APInt. The
3188 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3189 /// when the size is anything else).
3191 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3193 if (api.getBitWidth() == 16)
3194 return initFromHalfAPInt(api);
3195 else if (api.getBitWidth() == 32)
3196 return initFromFloatAPInt(api);
3197 else if (api.getBitWidth()==64)
3198 return initFromDoubleAPInt(api);
3199 else if (api.getBitWidth()==80)
3200 return initFromF80LongDoubleAPInt(api);
3201 else if (api.getBitWidth()==128)
3203 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3205 llvm_unreachable(0);
3209 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3211 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3214 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3215 APFloat Val(Sem, fcNormal, Negative);
3217 // We want (in interchange format):
3218 // sign = {Negative}
3220 // significand = 1..1
3222 Val.exponent = Sem.maxExponent; // unbiased
3224 // 1-initialize all bits....
3225 Val.zeroSignificand();
3226 integerPart *significand = Val.significandParts();
3227 unsigned N = partCountForBits(Sem.precision);
3228 for (unsigned i = 0; i != N; ++i)
3229 significand[i] = ~((integerPart) 0);
3231 // ...and then clear the top bits for internal consistency.
3232 if (Sem.precision % integerPartWidth != 0)
3234 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3239 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3240 APFloat Val(Sem, fcNormal, Negative);
3242 // We want (in interchange format):
3243 // sign = {Negative}
3245 // significand = 0..01
3247 Val.exponent = Sem.minExponent; // unbiased
3248 Val.zeroSignificand();
3249 Val.significandParts()[0] = 1;
3253 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3254 APFloat Val(Sem, fcNormal, Negative);
3256 // We want (in interchange format):
3257 // sign = {Negative}
3259 // significand = 10..0
3261 Val.exponent = Sem.minExponent;
3262 Val.zeroSignificand();
3263 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3264 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3269 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3270 initFromAPInt(api, isIEEE);
3273 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3274 initFromAPInt(APInt::floatToBits(f));
3277 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3278 initFromAPInt(APInt::doubleToBits(d));
3282 static void append(SmallVectorImpl<char> &Buffer,
3283 unsigned N, const char *Str) {
3284 unsigned Start = Buffer.size();
3285 Buffer.set_size(Start + N);
3286 memcpy(&Buffer[Start], Str, N);
3289 template <unsigned N>
3290 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3291 append(Buffer, N, Str);
3294 /// Removes data from the given significand until it is no more
3295 /// precise than is required for the desired precision.
3296 void AdjustToPrecision(APInt &significand,
3297 int &exp, unsigned FormatPrecision) {
3298 unsigned bits = significand.getActiveBits();
3300 // 196/59 is a very slight overestimate of lg_2(10).
3301 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3303 if (bits <= bitsRequired) return;
3305 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3306 if (!tensRemovable) return;
3308 exp += tensRemovable;
3310 APInt divisor(significand.getBitWidth(), 1);
3311 APInt powten(significand.getBitWidth(), 10);
3313 if (tensRemovable & 1)
3315 tensRemovable >>= 1;
3316 if (!tensRemovable) break;
3320 significand = significand.udiv(divisor);
3322 // Truncate the significand down to its active bit count, but
3323 // don't try to drop below 32.
3324 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3325 significand = significand.trunc(newPrecision);
3329 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3330 int &exp, unsigned FormatPrecision) {
3331 unsigned N = buffer.size();
3332 if (N <= FormatPrecision) return;
3334 // The most significant figures are the last ones in the buffer.
3335 unsigned FirstSignificant = N - FormatPrecision;
3338 // FIXME: this probably shouldn't use 'round half up'.
3340 // Rounding down is just a truncation, except we also want to drop
3341 // trailing zeros from the new result.
3342 if (buffer[FirstSignificant - 1] < '5') {
3343 while (buffer[FirstSignificant] == '0')
3346 exp += FirstSignificant;
3347 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3351 // Rounding up requires a decimal add-with-carry. If we continue
3352 // the carry, the newly-introduced zeros will just be truncated.
3353 for (unsigned I = FirstSignificant; I != N; ++I) {
3354 if (buffer[I] == '9') {
3362 // If we carried through, we have exactly one digit of precision.
3363 if (FirstSignificant == N) {
3364 exp += FirstSignificant;
3366 buffer.push_back('1');
3370 exp += FirstSignificant;
3371 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3375 void APFloat::toString(SmallVectorImpl<char> &Str,
3376 unsigned FormatPrecision,
3377 unsigned FormatMaxPadding) const {
3381 return append(Str, "-Inf");
3383 return append(Str, "+Inf");
3385 case fcNaN: return append(Str, "NaN");
3391 if (!FormatMaxPadding)
3392 append(Str, "0.0E+0");
3404 // Decompose the number into an APInt and an exponent.
3405 int exp = exponent - ((int) semantics->precision - 1);
3406 APInt significand(semantics->precision,
3407 makeArrayRef(significandParts(),
3408 partCountForBits(semantics->precision)));
3410 // Set FormatPrecision if zero. We want to do this before we
3411 // truncate trailing zeros, as those are part of the precision.
3412 if (!FormatPrecision) {
3413 // It's an interesting question whether to use the nominal
3414 // precision or the active precision here for denormals.
3416 // FormatPrecision = ceil(significandBits / lg_2(10))
3417 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3420 // Ignore trailing binary zeros.
3421 int trailingZeros = significand.countTrailingZeros();
3422 exp += trailingZeros;
3423 significand = significand.lshr(trailingZeros);
3425 // Change the exponent from 2^e to 10^e.
3428 } else if (exp > 0) {
3430 significand = significand.zext(semantics->precision + exp);
3431 significand <<= exp;
3433 } else { /* exp < 0 */
3436 // We transform this using the identity:
3437 // (N)(2^-e) == (N)(5^e)(10^-e)
3438 // This means we have to multiply N (the significand) by 5^e.
3439 // To avoid overflow, we have to operate on numbers large
3440 // enough to store N * 5^e:
3441 // log2(N * 5^e) == log2(N) + e * log2(5)
3442 // <= semantics->precision + e * 137 / 59
3443 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3445 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3447 // Multiply significand by 5^e.
3448 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3449 significand = significand.zext(precision);
3450 APInt five_to_the_i(precision, 5);
3452 if (texp & 1) significand *= five_to_the_i;
3456 five_to_the_i *= five_to_the_i;
3460 AdjustToPrecision(significand, exp, FormatPrecision);
3462 llvm::SmallVector<char, 256> buffer;
3465 unsigned precision = significand.getBitWidth();
3466 APInt ten(precision, 10);
3467 APInt digit(precision, 0);
3469 bool inTrail = true;
3470 while (significand != 0) {
3471 // digit <- significand % 10
3472 // significand <- significand / 10
3473 APInt::udivrem(significand, ten, significand, digit);
3475 unsigned d = digit.getZExtValue();
3477 // Drop trailing zeros.
3478 if (inTrail && !d) exp++;
3480 buffer.push_back((char) ('0' + d));
3485 assert(!buffer.empty() && "no characters in buffer!");
3487 // Drop down to FormatPrecision.
3488 // TODO: don't do more precise calculations above than are required.
3489 AdjustToPrecision(buffer, exp, FormatPrecision);
3491 unsigned NDigits = buffer.size();
3493 // Check whether we should use scientific notation.
3494 bool FormatScientific;
3495 if (!FormatMaxPadding)
3496 FormatScientific = true;
3501 // But we shouldn't make the number look more precise than it is.
3502 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3503 NDigits + (unsigned) exp > FormatPrecision);
3505 // Power of the most significant digit.
3506 int MSD = exp + (int) (NDigits - 1);
3509 FormatScientific = false;
3511 // 765e-5 == 0.00765
3513 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3518 // Scientific formatting is pretty straightforward.
3519 if (FormatScientific) {
3520 exp += (NDigits - 1);
3522 Str.push_back(buffer[NDigits-1]);
3527 for (unsigned I = 1; I != NDigits; ++I)
3528 Str.push_back(buffer[NDigits-1-I]);
3531 Str.push_back(exp >= 0 ? '+' : '-');
3532 if (exp < 0) exp = -exp;
3533 SmallVector<char, 6> expbuf;
3535 expbuf.push_back((char) ('0' + (exp % 10)));
3538 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3539 Str.push_back(expbuf[E-1-I]);
3543 // Non-scientific, positive exponents.
3545 for (unsigned I = 0; I != NDigits; ++I)
3546 Str.push_back(buffer[NDigits-1-I]);
3547 for (unsigned I = 0; I != (unsigned) exp; ++I)
3552 // Non-scientific, negative exponents.
3554 // The number of digits to the left of the decimal point.
3555 int NWholeDigits = exp + (int) NDigits;
3558 if (NWholeDigits > 0) {
3559 for (; I != (unsigned) NWholeDigits; ++I)
3560 Str.push_back(buffer[NDigits-I-1]);
3563 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3567 for (unsigned Z = 1; Z != NZeros; ++Z)
3571 for (; I != NDigits; ++I)
3572 Str.push_back(buffer[NDigits-I-1]);
3575 bool APFloat::getExactInverse(APFloat *inv) const {
3576 // We can only guarantee the existence of an exact inverse for IEEE floats.
3577 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3578 semantics != &IEEEdouble && semantics != &IEEEquad)
3581 // Special floats and denormals have no exact inverse.
3582 if (category != fcNormal)
3585 // Check that the number is a power of two by making sure that only the
3586 // integer bit is set in the significand.
3587 if (significandLSB() != semantics->precision - 1)
3591 APFloat reciprocal(*semantics, 1ULL);
3592 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3595 // Avoid multiplication with a denormal, it is not safe on all platforms and
3596 // may be slower than a normal division.
3597 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3600 assert(reciprocal.category == fcNormal &&
3601 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);