1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
50 /* True if arithmetic is supported. */
51 unsigned int arithmeticOK;
54 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
55 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
56 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
57 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
58 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
59 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
61 // The PowerPC format consists of two doubles. It does not map cleanly
62 // onto the usual format above. For now only storage of constants of
63 // this type is supported, no arithmetic.
64 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
66 /* A tight upper bound on number of parts required to hold the value
69 power * 815 / (351 * integerPartWidth) + 1
71 However, whilst the result may require only this many parts,
72 because we are multiplying two values to get it, the
73 multiplication may require an extra part with the excess part
74 being zero (consider the trivial case of 1 * 1, tcFullMultiply
75 requires two parts to hold the single-part result). So we add an
76 extra one to guarantee enough space whilst multiplying. */
77 const unsigned int maxExponent = 16383;
78 const unsigned int maxPrecision = 113;
79 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
80 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
81 / (351 * integerPartWidth));
84 /* A bunch of private, handy routines. */
86 static inline unsigned int
87 partCountForBits(unsigned int bits)
89 return ((bits) + integerPartWidth - 1) / integerPartWidth;
92 /* Returns 0U-9U. Return values >= 10U are not digits. */
93 static inline unsigned int
94 decDigitValue(unsigned int c)
100 hexDigitValue(unsigned int c)
120 assertArithmeticOK(const llvm::fltSemantics &semantics) {
121 assert(semantics.arithmeticOK &&
122 "Compile-time arithmetic does not support these semantics");
125 /* Return the value of a decimal exponent of the form
128 If the exponent overflows, returns a large exponent with the
131 readExponent(StringRef::iterator begin, StringRef::iterator end)
134 unsigned int absExponent;
135 const unsigned int overlargeExponent = 24000; /* FIXME. */
136 StringRef::iterator p = begin;
138 assert(p != end && "Exponent has no digits");
140 isNegative = (*p == '-');
141 if (*p == '-' || *p == '+') {
143 assert(p != end && "Exponent has no digits");
146 absExponent = decDigitValue(*p++);
147 assert(absExponent < 10U && "Invalid character in exponent");
149 for (; p != end; ++p) {
152 value = decDigitValue(*p);
153 assert(value < 10U && "Invalid character in exponent");
155 value += absExponent * 10;
156 if (absExponent >= overlargeExponent) {
157 absExponent = overlargeExponent;
158 p = end; /* outwit assert below */
164 assert(p == end && "Invalid exponent in exponent");
167 return -(int) absExponent;
169 return (int) absExponent;
172 /* This is ugly and needs cleaning up, but I don't immediately see
173 how whilst remaining safe. */
175 totalExponent(StringRef::iterator p, StringRef::iterator end,
176 int exponentAdjustment)
178 int unsignedExponent;
179 bool negative, overflow;
182 assert(p != end && "Exponent has no digits");
184 negative = *p == '-';
185 if (*p == '-' || *p == '+') {
187 assert(p != end && "Exponent has no digits");
190 unsignedExponent = 0;
192 for (; p != end; ++p) {
195 value = decDigitValue(*p);
196 assert(value < 10U && "Invalid character in exponent");
198 unsignedExponent = unsignedExponent * 10 + value;
199 if (unsignedExponent > 32767) {
205 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
209 exponent = unsignedExponent;
211 exponent = -exponent;
212 exponent += exponentAdjustment;
213 if (exponent > 32767 || exponent < -32768)
218 exponent = negative ? -32768: 32767;
223 static StringRef::iterator
224 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
225 StringRef::iterator *dot)
227 StringRef::iterator p = begin;
229 while (*p == '0' && p != end)
235 assert(end - begin != 1 && "Significand has no digits");
237 while (*p == '0' && p != end)
244 /* Given a normal decimal floating point number of the form
248 where the decimal point and exponent are optional, fill out the
249 structure D. Exponent is appropriate if the significand is
250 treated as an integer, and normalizedExponent if the significand
251 is taken to have the decimal point after a single leading
254 If the value is zero, V->firstSigDigit points to a non-digit, and
255 the return exponent is zero.
258 const char *firstSigDigit;
259 const char *lastSigDigit;
261 int normalizedExponent;
265 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
268 StringRef::iterator dot = end;
269 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
271 D->firstSigDigit = p;
273 D->normalizedExponent = 0;
275 for (; p != end; ++p) {
277 assert(dot == end && "String contains multiple dots");
282 if (decDigitValue(*p) >= 10U)
287 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
288 assert(p != begin && "Significand has no digits");
289 assert((dot == end || p - begin != 1) && "Significand has no digits");
291 /* p points to the first non-digit in the string */
292 D->exponent = readExponent(p + 1, end);
294 /* Implied decimal point? */
299 /* If number is all zeroes accept any exponent. */
300 if (p != D->firstSigDigit) {
301 /* Drop insignificant trailing zeroes. */
306 while (p != begin && *p == '0');
307 while (p != begin && *p == '.');
310 /* Adjust the exponents for any decimal point. */
311 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
312 D->normalizedExponent = (D->exponent +
313 static_cast<exponent_t>((p - D->firstSigDigit)
314 - (dot > D->firstSigDigit && dot < p)));
320 /* Return the trailing fraction of a hexadecimal number.
321 DIGITVALUE is the first hex digit of the fraction, P points to
324 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
325 unsigned int digitValue)
327 unsigned int hexDigit;
329 /* If the first trailing digit isn't 0 or 8 we can work out the
330 fraction immediately. */
332 return lfMoreThanHalf;
333 else if (digitValue < 8 && digitValue > 0)
334 return lfLessThanHalf;
336 /* Otherwise we need to find the first non-zero digit. */
340 assert(p != end && "Invalid trailing hexadecimal fraction!");
342 hexDigit = hexDigitValue(*p);
344 /* If we ran off the end it is exactly zero or one-half, otherwise
347 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
349 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
352 /* Return the fraction lost were a bignum truncated losing the least
353 significant BITS bits. */
355 lostFractionThroughTruncation(const integerPart *parts,
356 unsigned int partCount,
361 lsb = APInt::tcLSB(parts, partCount);
363 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
365 return lfExactlyZero;
367 return lfExactlyHalf;
368 if (bits <= partCount * integerPartWidth &&
369 APInt::tcExtractBit(parts, bits - 1))
370 return lfMoreThanHalf;
372 return lfLessThanHalf;
375 /* Shift DST right BITS bits noting lost fraction. */
377 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
379 lostFraction lost_fraction;
381 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
383 APInt::tcShiftRight(dst, parts, bits);
385 return lost_fraction;
388 /* Combine the effect of two lost fractions. */
390 combineLostFractions(lostFraction moreSignificant,
391 lostFraction lessSignificant)
393 if (lessSignificant != lfExactlyZero) {
394 if (moreSignificant == lfExactlyZero)
395 moreSignificant = lfLessThanHalf;
396 else if (moreSignificant == lfExactlyHalf)
397 moreSignificant = lfMoreThanHalf;
400 return moreSignificant;
403 /* The error from the true value, in half-ulps, on multiplying two
404 floating point numbers, which differ from the value they
405 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
406 than the returned value.
408 See "How to Read Floating Point Numbers Accurately" by William D
411 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
413 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
415 if (HUerr1 + HUerr2 == 0)
416 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
418 return inexactMultiply + 2 * (HUerr1 + HUerr2);
421 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
422 when the least significant BITS are truncated. BITS cannot be
425 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
427 unsigned int count, partBits;
428 integerPart part, boundary;
433 count = bits / integerPartWidth;
434 partBits = bits % integerPartWidth + 1;
436 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
439 boundary = (integerPart) 1 << (partBits - 1);
444 if (part - boundary <= boundary - part)
445 return part - boundary;
447 return boundary - part;
450 if (part == boundary) {
453 return ~(integerPart) 0; /* A lot. */
456 } else if (part == boundary - 1) {
459 return ~(integerPart) 0; /* A lot. */
464 return ~(integerPart) 0; /* A lot. */
467 /* Place pow(5, power) in DST, and return the number of parts used.
468 DST must be at least one part larger than size of the answer. */
470 powerOf5(integerPart *dst, unsigned int power)
472 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
474 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
475 pow5s[0] = 78125 * 5;
477 unsigned int partsCount[16] = { 1 };
478 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
480 assert(power <= maxExponent);
485 *p1 = firstEightPowers[power & 7];
491 for (unsigned int n = 0; power; power >>= 1, n++) {
496 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
498 pc = partsCount[n - 1];
499 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
501 if (pow5[pc - 1] == 0)
509 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
511 if (p2[result - 1] == 0)
514 /* Now result is in p1 with partsCount parts and p2 is scratch
516 tmp = p1, p1 = p2, p2 = tmp;
523 APInt::tcAssign(dst, p1, result);
528 /* Zero at the end to avoid modular arithmetic when adding one; used
529 when rounding up during hexadecimal output. */
530 static const char hexDigitsLower[] = "0123456789abcdef0";
531 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
532 static const char infinityL[] = "infinity";
533 static const char infinityU[] = "INFINITY";
534 static const char NaNL[] = "nan";
535 static const char NaNU[] = "NAN";
537 /* Write out an integerPart in hexadecimal, starting with the most
538 significant nibble. Write out exactly COUNT hexdigits, return
541 partAsHex (char *dst, integerPart part, unsigned int count,
542 const char *hexDigitChars)
544 unsigned int result = count;
546 assert(count != 0 && count <= integerPartWidth / 4);
548 part >>= (integerPartWidth - 4 * count);
550 dst[count] = hexDigitChars[part & 0xf];
557 /* Write out an unsigned decimal integer. */
559 writeUnsignedDecimal (char *dst, unsigned int n)
575 /* Write out a signed decimal integer. */
577 writeSignedDecimal (char *dst, int value)
581 dst = writeUnsignedDecimal(dst, -(unsigned) value);
583 dst = writeUnsignedDecimal(dst, value);
590 APFloat::initialize(const fltSemantics *ourSemantics)
594 semantics = ourSemantics;
597 significand.parts = new integerPart[count];
601 APFloat::freeSignificand()
604 delete [] significand.parts;
608 APFloat::assign(const APFloat &rhs)
610 assert(semantics == rhs.semantics);
613 category = rhs.category;
614 exponent = rhs.exponent;
616 exponent2 = rhs.exponent2;
617 if (category == fcNormal || category == fcNaN)
618 copySignificand(rhs);
622 APFloat::copySignificand(const APFloat &rhs)
624 assert(category == fcNormal || category == fcNaN);
625 assert(rhs.partCount() >= partCount());
627 APInt::tcAssign(significandParts(), rhs.significandParts(),
631 /* Make this number a NaN, with an arbitrary but deterministic value
632 for the significand. If double or longer, this is a signalling NaN,
633 which may not be ideal. If float, this is QNaN(0). */
634 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
639 integerPart *significand = significandParts();
640 unsigned numParts = partCount();
642 // Set the significand bits to the fill.
643 if (!fill || fill->getNumWords() < numParts)
644 APInt::tcSet(significand, 0, numParts);
646 APInt::tcAssign(significand, fill->getRawData(),
647 std::min(fill->getNumWords(), numParts));
649 // Zero out the excess bits of the significand.
650 unsigned bitsToPreserve = semantics->precision - 1;
651 unsigned part = bitsToPreserve / 64;
652 bitsToPreserve %= 64;
653 significand[part] &= ((1ULL << bitsToPreserve) - 1);
654 for (part++; part != numParts; ++part)
655 significand[part] = 0;
658 unsigned QNaNBit = semantics->precision - 2;
661 // We always have to clear the QNaN bit to make it an SNaN.
662 APInt::tcClearBit(significand, QNaNBit);
664 // If there are no bits set in the payload, we have to set
665 // *something* to make it a NaN instead of an infinity;
666 // conventionally, this is the next bit down from the QNaN bit.
667 if (APInt::tcIsZero(significand, numParts))
668 APInt::tcSetBit(significand, QNaNBit - 1);
670 // We always have to set the QNaN bit to make it a QNaN.
671 APInt::tcSetBit(significand, QNaNBit);
674 // For x87 extended precision, we want to make a NaN, not a
675 // pseudo-NaN. Maybe we should expose the ability to make
677 if (semantics == &APFloat::x87DoubleExtended)
678 APInt::tcSetBit(significand, QNaNBit + 1);
681 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
683 APFloat value(Sem, uninitialized);
684 value.makeNaN(SNaN, Negative, fill);
689 APFloat::operator=(const APFloat &rhs)
692 if (semantics != rhs.semantics) {
694 initialize(rhs.semantics);
703 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
706 if (semantics != rhs.semantics ||
707 category != rhs.category ||
710 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
713 if (category==fcZero || category==fcInfinity)
715 else if (category==fcNormal && exponent!=rhs.exponent)
717 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
718 exponent2!=rhs.exponent2)
722 const integerPart* p=significandParts();
723 const integerPart* q=rhs.significandParts();
724 for (; i>0; i--, p++, q++) {
732 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
733 : exponent2(0), sign2(0) {
734 assertArithmeticOK(ourSemantics);
735 initialize(&ourSemantics);
738 exponent = ourSemantics.precision - 1;
739 significandParts()[0] = value;
740 normalize(rmNearestTiesToEven, lfExactlyZero);
743 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
744 assertArithmeticOK(ourSemantics);
745 initialize(&ourSemantics);
750 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
751 : exponent2(0), sign2(0) {
752 assertArithmeticOK(ourSemantics);
753 // Allocates storage if necessary but does not initialize it.
754 initialize(&ourSemantics);
757 APFloat::APFloat(const fltSemantics &ourSemantics,
758 fltCategory ourCategory, bool negative)
759 : exponent2(0), sign2(0) {
760 assertArithmeticOK(ourSemantics);
761 initialize(&ourSemantics);
762 category = ourCategory;
764 if (category == fcNormal)
766 else if (ourCategory == fcNaN)
770 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
771 : exponent2(0), sign2(0) {
772 assertArithmeticOK(ourSemantics);
773 initialize(&ourSemantics);
774 convertFromString(text, rmNearestTiesToEven);
777 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
778 initialize(rhs.semantics);
787 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
788 void APFloat::Profile(FoldingSetNodeID& ID) const {
789 ID.Add(bitcastToAPInt());
793 APFloat::partCount() const
795 return partCountForBits(semantics->precision + 1);
799 APFloat::semanticsPrecision(const fltSemantics &semantics)
801 return semantics.precision;
805 APFloat::significandParts() const
807 return const_cast<APFloat *>(this)->significandParts();
811 APFloat::significandParts()
813 assert(category == fcNormal || category == fcNaN);
816 return significand.parts;
818 return &significand.part;
822 APFloat::zeroSignificand()
825 APInt::tcSet(significandParts(), 0, partCount());
828 /* Increment an fcNormal floating point number's significand. */
830 APFloat::incrementSignificand()
834 carry = APInt::tcIncrement(significandParts(), partCount());
836 /* Our callers should never cause us to overflow. */
841 /* Add the significand of the RHS. Returns the carry flag. */
843 APFloat::addSignificand(const APFloat &rhs)
847 parts = significandParts();
849 assert(semantics == rhs.semantics);
850 assert(exponent == rhs.exponent);
852 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
855 /* Subtract the significand of the RHS with a borrow flag. Returns
858 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
862 parts = significandParts();
864 assert(semantics == rhs.semantics);
865 assert(exponent == rhs.exponent);
867 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
871 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
872 on to the full-precision result of the multiplication. Returns the
875 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
877 unsigned int omsb; // One, not zero, based MSB.
878 unsigned int partsCount, newPartsCount, precision;
879 integerPart *lhsSignificand;
880 integerPart scratch[4];
881 integerPart *fullSignificand;
882 lostFraction lost_fraction;
885 assert(semantics == rhs.semantics);
887 precision = semantics->precision;
888 newPartsCount = partCountForBits(precision * 2);
890 if (newPartsCount > 4)
891 fullSignificand = new integerPart[newPartsCount];
893 fullSignificand = scratch;
895 lhsSignificand = significandParts();
896 partsCount = partCount();
898 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
899 rhs.significandParts(), partsCount, partsCount);
901 lost_fraction = lfExactlyZero;
902 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
903 exponent += rhs.exponent;
906 Significand savedSignificand = significand;
907 const fltSemantics *savedSemantics = semantics;
908 fltSemantics extendedSemantics;
910 unsigned int extendedPrecision;
912 /* Normalize our MSB. */
913 extendedPrecision = precision + precision - 1;
914 if (omsb != extendedPrecision) {
915 APInt::tcShiftLeft(fullSignificand, newPartsCount,
916 extendedPrecision - omsb);
917 exponent -= extendedPrecision - omsb;
920 /* Create new semantics. */
921 extendedSemantics = *semantics;
922 extendedSemantics.precision = extendedPrecision;
924 if (newPartsCount == 1)
925 significand.part = fullSignificand[0];
927 significand.parts = fullSignificand;
928 semantics = &extendedSemantics;
930 APFloat extendedAddend(*addend);
931 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
932 assert(status == opOK);
934 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
936 /* Restore our state. */
937 if (newPartsCount == 1)
938 fullSignificand[0] = significand.part;
939 significand = savedSignificand;
940 semantics = savedSemantics;
942 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
945 exponent -= (precision - 1);
947 if (omsb > precision) {
948 unsigned int bits, significantParts;
951 bits = omsb - precision;
952 significantParts = partCountForBits(omsb);
953 lf = shiftRight(fullSignificand, significantParts, bits);
954 lost_fraction = combineLostFractions(lf, lost_fraction);
958 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
960 if (newPartsCount > 4)
961 delete [] fullSignificand;
963 return lost_fraction;
966 /* Multiply the significands of LHS and RHS to DST. */
968 APFloat::divideSignificand(const APFloat &rhs)
970 unsigned int bit, i, partsCount;
971 const integerPart *rhsSignificand;
972 integerPart *lhsSignificand, *dividend, *divisor;
973 integerPart scratch[4];
974 lostFraction lost_fraction;
976 assert(semantics == rhs.semantics);
978 lhsSignificand = significandParts();
979 rhsSignificand = rhs.significandParts();
980 partsCount = partCount();
983 dividend = new integerPart[partsCount * 2];
987 divisor = dividend + partsCount;
989 /* Copy the dividend and divisor as they will be modified in-place. */
990 for (i = 0; i < partsCount; i++) {
991 dividend[i] = lhsSignificand[i];
992 divisor[i] = rhsSignificand[i];
993 lhsSignificand[i] = 0;
996 exponent -= rhs.exponent;
998 unsigned int precision = semantics->precision;
1000 /* Normalize the divisor. */
1001 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1004 APInt::tcShiftLeft(divisor, partsCount, bit);
1007 /* Normalize the dividend. */
1008 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1011 APInt::tcShiftLeft(dividend, partsCount, bit);
1014 /* Ensure the dividend >= divisor initially for the loop below.
1015 Incidentally, this means that the division loop below is
1016 guaranteed to set the integer bit to one. */
1017 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1019 APInt::tcShiftLeft(dividend, partsCount, 1);
1020 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1023 /* Long division. */
1024 for (bit = precision; bit; bit -= 1) {
1025 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1026 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1027 APInt::tcSetBit(lhsSignificand, bit - 1);
1030 APInt::tcShiftLeft(dividend, partsCount, 1);
1033 /* Figure out the lost fraction. */
1034 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1037 lost_fraction = lfMoreThanHalf;
1039 lost_fraction = lfExactlyHalf;
1040 else if (APInt::tcIsZero(dividend, partsCount))
1041 lost_fraction = lfExactlyZero;
1043 lost_fraction = lfLessThanHalf;
1048 return lost_fraction;
1052 APFloat::significandMSB() const
1054 return APInt::tcMSB(significandParts(), partCount());
1058 APFloat::significandLSB() const
1060 return APInt::tcLSB(significandParts(), partCount());
1063 /* Note that a zero result is NOT normalized to fcZero. */
1065 APFloat::shiftSignificandRight(unsigned int bits)
1067 /* Our exponent should not overflow. */
1068 assert((exponent_t) (exponent + bits) >= exponent);
1072 return shiftRight(significandParts(), partCount(), bits);
1075 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1077 APFloat::shiftSignificandLeft(unsigned int bits)
1079 assert(bits < semantics->precision);
1082 unsigned int partsCount = partCount();
1084 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1087 assert(!APInt::tcIsZero(significandParts(), partsCount));
1092 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1096 assert(semantics == rhs.semantics);
1097 assert(category == fcNormal);
1098 assert(rhs.category == fcNormal);
1100 compare = exponent - rhs.exponent;
1102 /* If exponents are equal, do an unsigned bignum comparison of the
1105 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1109 return cmpGreaterThan;
1110 else if (compare < 0)
1116 /* Handle overflow. Sign is preserved. We either become infinity or
1117 the largest finite number. */
1119 APFloat::handleOverflow(roundingMode rounding_mode)
1122 if (rounding_mode == rmNearestTiesToEven ||
1123 rounding_mode == rmNearestTiesToAway ||
1124 (rounding_mode == rmTowardPositive && !sign) ||
1125 (rounding_mode == rmTowardNegative && sign)) {
1126 category = fcInfinity;
1127 return (opStatus) (opOverflow | opInexact);
1130 /* Otherwise we become the largest finite number. */
1131 category = fcNormal;
1132 exponent = semantics->maxExponent;
1133 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1134 semantics->precision);
1139 /* Returns TRUE if, when truncating the current number, with BIT the
1140 new LSB, with the given lost fraction and rounding mode, the result
1141 would need to be rounded away from zero (i.e., by increasing the
1142 signficand). This routine must work for fcZero of both signs, and
1143 fcNormal numbers. */
1145 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1146 lostFraction lost_fraction,
1147 unsigned int bit) const
1149 /* NaNs and infinities should not have lost fractions. */
1150 assert(category == fcNormal || category == fcZero);
1152 /* Current callers never pass this so we don't handle it. */
1153 assert(lost_fraction != lfExactlyZero);
1155 switch (rounding_mode) {
1156 case rmNearestTiesToAway:
1157 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1159 case rmNearestTiesToEven:
1160 if (lost_fraction == lfMoreThanHalf)
1163 /* Our zeroes don't have a significand to test. */
1164 if (lost_fraction == lfExactlyHalf && category != fcZero)
1165 return APInt::tcExtractBit(significandParts(), bit);
1172 case rmTowardPositive:
1173 return sign == false;
1175 case rmTowardNegative:
1176 return sign == true;
1178 llvm_unreachable("Invalid rounding mode found");
1182 APFloat::normalize(roundingMode rounding_mode,
1183 lostFraction lost_fraction)
1185 unsigned int omsb; /* One, not zero, based MSB. */
1188 if (category != fcNormal)
1191 /* Before rounding normalize the exponent of fcNormal numbers. */
1192 omsb = significandMSB() + 1;
1195 /* OMSB is numbered from 1. We want to place it in the integer
1196 bit numbered PRECISION if possible, with a compensating change in
1198 exponentChange = omsb - semantics->precision;
1200 /* If the resulting exponent is too high, overflow according to
1201 the rounding mode. */
1202 if (exponent + exponentChange > semantics->maxExponent)
1203 return handleOverflow(rounding_mode);
1205 /* Subnormal numbers have exponent minExponent, and their MSB
1206 is forced based on that. */
1207 if (exponent + exponentChange < semantics->minExponent)
1208 exponentChange = semantics->minExponent - exponent;
1210 /* Shifting left is easy as we don't lose precision. */
1211 if (exponentChange < 0) {
1212 assert(lost_fraction == lfExactlyZero);
1214 shiftSignificandLeft(-exponentChange);
1219 if (exponentChange > 0) {
1222 /* Shift right and capture any new lost fraction. */
1223 lf = shiftSignificandRight(exponentChange);
1225 lost_fraction = combineLostFractions(lf, lost_fraction);
1227 /* Keep OMSB up-to-date. */
1228 if (omsb > (unsigned) exponentChange)
1229 omsb -= exponentChange;
1235 /* Now round the number according to rounding_mode given the lost
1238 /* As specified in IEEE 754, since we do not trap we do not report
1239 underflow for exact results. */
1240 if (lost_fraction == lfExactlyZero) {
1241 /* Canonicalize zeroes. */
1248 /* Increment the significand if we're rounding away from zero. */
1249 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1251 exponent = semantics->minExponent;
1253 incrementSignificand();
1254 omsb = significandMSB() + 1;
1256 /* Did the significand increment overflow? */
1257 if (omsb == (unsigned) semantics->precision + 1) {
1258 /* Renormalize by incrementing the exponent and shifting our
1259 significand right one. However if we already have the
1260 maximum exponent we overflow to infinity. */
1261 if (exponent == semantics->maxExponent) {
1262 category = fcInfinity;
1264 return (opStatus) (opOverflow | opInexact);
1267 shiftSignificandRight(1);
1273 /* The normal case - we were and are not denormal, and any
1274 significand increment above didn't overflow. */
1275 if (omsb == semantics->precision)
1278 /* We have a non-zero denormal. */
1279 assert(omsb < semantics->precision);
1281 /* Canonicalize zeroes. */
1285 /* The fcZero case is a denormal that underflowed to zero. */
1286 return (opStatus) (opUnderflow | opInexact);
1290 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1292 switch (convolve(category, rhs.category)) {
1294 llvm_unreachable(0);
1296 case convolve(fcNaN, fcZero):
1297 case convolve(fcNaN, fcNormal):
1298 case convolve(fcNaN, fcInfinity):
1299 case convolve(fcNaN, fcNaN):
1300 case convolve(fcNormal, fcZero):
1301 case convolve(fcInfinity, fcNormal):
1302 case convolve(fcInfinity, fcZero):
1305 case convolve(fcZero, fcNaN):
1306 case convolve(fcNormal, fcNaN):
1307 case convolve(fcInfinity, fcNaN):
1309 copySignificand(rhs);
1312 case convolve(fcNormal, fcInfinity):
1313 case convolve(fcZero, fcInfinity):
1314 category = fcInfinity;
1315 sign = rhs.sign ^ subtract;
1318 case convolve(fcZero, fcNormal):
1320 sign = rhs.sign ^ subtract;
1323 case convolve(fcZero, fcZero):
1324 /* Sign depends on rounding mode; handled by caller. */
1327 case convolve(fcInfinity, fcInfinity):
1328 /* Differently signed infinities can only be validly
1330 if (((sign ^ rhs.sign)!=0) != subtract) {
1337 case convolve(fcNormal, fcNormal):
1342 /* Add or subtract two normal numbers. */
1344 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1347 lostFraction lost_fraction;
1350 /* Determine if the operation on the absolute values is effectively
1351 an addition or subtraction. */
1352 subtract ^= (sign ^ rhs.sign) ? true : false;
1354 /* Are we bigger exponent-wise than the RHS? */
1355 bits = exponent - rhs.exponent;
1357 /* Subtraction is more subtle than one might naively expect. */
1359 APFloat temp_rhs(rhs);
1363 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1364 lost_fraction = lfExactlyZero;
1365 } else if (bits > 0) {
1366 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1367 shiftSignificandLeft(1);
1370 lost_fraction = shiftSignificandRight(-bits - 1);
1371 temp_rhs.shiftSignificandLeft(1);
1376 carry = temp_rhs.subtractSignificand
1377 (*this, lost_fraction != lfExactlyZero);
1378 copySignificand(temp_rhs);
1381 carry = subtractSignificand
1382 (temp_rhs, lost_fraction != lfExactlyZero);
1385 /* Invert the lost fraction - it was on the RHS and
1387 if (lost_fraction == lfLessThanHalf)
1388 lost_fraction = lfMoreThanHalf;
1389 else if (lost_fraction == lfMoreThanHalf)
1390 lost_fraction = lfLessThanHalf;
1392 /* The code above is intended to ensure that no borrow is
1398 APFloat temp_rhs(rhs);
1400 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1401 carry = addSignificand(temp_rhs);
1403 lost_fraction = shiftSignificandRight(-bits);
1404 carry = addSignificand(rhs);
1407 /* We have a guard bit; generating a carry cannot happen. */
1412 return lost_fraction;
1416 APFloat::multiplySpecials(const APFloat &rhs)
1418 switch (convolve(category, rhs.category)) {
1420 llvm_unreachable(0);
1422 case convolve(fcNaN, fcZero):
1423 case convolve(fcNaN, fcNormal):
1424 case convolve(fcNaN, fcInfinity):
1425 case convolve(fcNaN, fcNaN):
1428 case convolve(fcZero, fcNaN):
1429 case convolve(fcNormal, fcNaN):
1430 case convolve(fcInfinity, fcNaN):
1432 copySignificand(rhs);
1435 case convolve(fcNormal, fcInfinity):
1436 case convolve(fcInfinity, fcNormal):
1437 case convolve(fcInfinity, fcInfinity):
1438 category = fcInfinity;
1441 case convolve(fcZero, fcNormal):
1442 case convolve(fcNormal, fcZero):
1443 case convolve(fcZero, fcZero):
1447 case convolve(fcZero, fcInfinity):
1448 case convolve(fcInfinity, fcZero):
1452 case convolve(fcNormal, fcNormal):
1458 APFloat::divideSpecials(const APFloat &rhs)
1460 switch (convolve(category, rhs.category)) {
1462 llvm_unreachable(0);
1464 case convolve(fcNaN, fcZero):
1465 case convolve(fcNaN, fcNormal):
1466 case convolve(fcNaN, fcInfinity):
1467 case convolve(fcNaN, fcNaN):
1468 case convolve(fcInfinity, fcZero):
1469 case convolve(fcInfinity, fcNormal):
1470 case convolve(fcZero, fcInfinity):
1471 case convolve(fcZero, fcNormal):
1474 case convolve(fcZero, fcNaN):
1475 case convolve(fcNormal, fcNaN):
1476 case convolve(fcInfinity, fcNaN):
1478 copySignificand(rhs);
1481 case convolve(fcNormal, fcInfinity):
1485 case convolve(fcNormal, fcZero):
1486 category = fcInfinity;
1489 case convolve(fcInfinity, fcInfinity):
1490 case convolve(fcZero, fcZero):
1494 case convolve(fcNormal, fcNormal):
1500 APFloat::modSpecials(const APFloat &rhs)
1502 switch (convolve(category, rhs.category)) {
1504 llvm_unreachable(0);
1506 case convolve(fcNaN, fcZero):
1507 case convolve(fcNaN, fcNormal):
1508 case convolve(fcNaN, fcInfinity):
1509 case convolve(fcNaN, fcNaN):
1510 case convolve(fcZero, fcInfinity):
1511 case convolve(fcZero, fcNormal):
1512 case convolve(fcNormal, fcInfinity):
1515 case convolve(fcZero, fcNaN):
1516 case convolve(fcNormal, fcNaN):
1517 case convolve(fcInfinity, fcNaN):
1519 copySignificand(rhs);
1522 case convolve(fcNormal, fcZero):
1523 case convolve(fcInfinity, fcZero):
1524 case convolve(fcInfinity, fcNormal):
1525 case convolve(fcInfinity, fcInfinity):
1526 case convolve(fcZero, fcZero):
1530 case convolve(fcNormal, fcNormal):
1537 APFloat::changeSign()
1539 /* Look mummy, this one's easy. */
1544 APFloat::clearSign()
1546 /* So is this one. */
1551 APFloat::copySign(const APFloat &rhs)
1557 /* Normalized addition or subtraction. */
1559 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1564 assertArithmeticOK(*semantics);
1566 fs = addOrSubtractSpecials(rhs, subtract);
1568 /* This return code means it was not a simple case. */
1569 if (fs == opDivByZero) {
1570 lostFraction lost_fraction;
1572 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1573 fs = normalize(rounding_mode, lost_fraction);
1575 /* Can only be zero if we lost no fraction. */
1576 assert(category != fcZero || lost_fraction == lfExactlyZero);
1579 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1580 positive zero unless rounding to minus infinity, except that
1581 adding two like-signed zeroes gives that zero. */
1582 if (category == fcZero) {
1583 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1584 sign = (rounding_mode == rmTowardNegative);
1590 /* Normalized addition. */
1592 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1594 return addOrSubtract(rhs, rounding_mode, false);
1597 /* Normalized subtraction. */
1599 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1601 return addOrSubtract(rhs, rounding_mode, true);
1604 /* Normalized multiply. */
1606 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1610 assertArithmeticOK(*semantics);
1612 fs = multiplySpecials(rhs);
1614 if (category == fcNormal) {
1615 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1616 fs = normalize(rounding_mode, lost_fraction);
1617 if (lost_fraction != lfExactlyZero)
1618 fs = (opStatus) (fs | opInexact);
1624 /* Normalized divide. */
1626 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1630 assertArithmeticOK(*semantics);
1632 fs = divideSpecials(rhs);
1634 if (category == fcNormal) {
1635 lostFraction lost_fraction = divideSignificand(rhs);
1636 fs = normalize(rounding_mode, lost_fraction);
1637 if (lost_fraction != lfExactlyZero)
1638 fs = (opStatus) (fs | opInexact);
1644 /* Normalized remainder. This is not currently correct in all cases. */
1646 APFloat::remainder(const APFloat &rhs)
1650 unsigned int origSign = sign;
1652 assertArithmeticOK(*semantics);
1653 fs = V.divide(rhs, rmNearestTiesToEven);
1654 if (fs == opDivByZero)
1657 int parts = partCount();
1658 integerPart *x = new integerPart[parts];
1660 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1661 rmNearestTiesToEven, &ignored);
1662 if (fs==opInvalidOp)
1665 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1666 rmNearestTiesToEven);
1667 assert(fs==opOK); // should always work
1669 fs = V.multiply(rhs, rmNearestTiesToEven);
1670 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1672 fs = subtract(V, rmNearestTiesToEven);
1673 assert(fs==opOK || fs==opInexact); // likewise
1676 sign = origSign; // IEEE754 requires this
1681 /* Normalized llvm frem (C fmod).
1682 This is not currently correct in all cases. */
1684 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1687 assertArithmeticOK(*semantics);
1688 fs = modSpecials(rhs);
1690 if (category == fcNormal && rhs.category == fcNormal) {
1692 unsigned int origSign = sign;
1694 fs = V.divide(rhs, rmNearestTiesToEven);
1695 if (fs == opDivByZero)
1698 int parts = partCount();
1699 integerPart *x = new integerPart[parts];
1701 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1702 rmTowardZero, &ignored);
1703 if (fs==opInvalidOp)
1706 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1707 rmNearestTiesToEven);
1708 assert(fs==opOK); // should always work
1710 fs = V.multiply(rhs, rounding_mode);
1711 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1713 fs = subtract(V, rounding_mode);
1714 assert(fs==opOK || fs==opInexact); // likewise
1717 sign = origSign; // IEEE754 requires this
1723 /* Normalized fused-multiply-add. */
1725 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1726 const APFloat &addend,
1727 roundingMode rounding_mode)
1731 assertArithmeticOK(*semantics);
1733 /* Post-multiplication sign, before addition. */
1734 sign ^= multiplicand.sign;
1736 /* If and only if all arguments are normal do we need to do an
1737 extended-precision calculation. */
1738 if (category == fcNormal &&
1739 multiplicand.category == fcNormal &&
1740 addend.category == fcNormal) {
1741 lostFraction lost_fraction;
1743 lost_fraction = multiplySignificand(multiplicand, &addend);
1744 fs = normalize(rounding_mode, lost_fraction);
1745 if (lost_fraction != lfExactlyZero)
1746 fs = (opStatus) (fs | opInexact);
1748 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1749 positive zero unless rounding to minus infinity, except that
1750 adding two like-signed zeroes gives that zero. */
1751 if (category == fcZero && sign != addend.sign)
1752 sign = (rounding_mode == rmTowardNegative);
1754 fs = multiplySpecials(multiplicand);
1756 /* FS can only be opOK or opInvalidOp. There is no more work
1757 to do in the latter case. The IEEE-754R standard says it is
1758 implementation-defined in this case whether, if ADDEND is a
1759 quiet NaN, we raise invalid op; this implementation does so.
1761 If we need to do the addition we can do so with normal
1764 fs = addOrSubtract(addend, rounding_mode, false);
1770 /* Rounding-mode corrrect round to integral value. */
1771 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1773 assertArithmeticOK(*semantics);
1775 // If the exponent is large enough, we know that this value is already
1776 // integral, and the arithmetic below would potentially cause it to saturate
1777 // to +/-Inf. Bail out early instead.
1778 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1781 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1782 // precision of our format, and then subtract it back off again. The choice
1783 // of rounding modes for the addition/subtraction determines the rounding mode
1784 // for our integral rounding as well.
1785 // NOTE: When the input value is negative, we do subtraction followed by
1786 // addition instead.
1787 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1788 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1789 APFloat MagicConstant(*semantics);
1790 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1791 rmNearestTiesToEven);
1792 MagicConstant.copySign(*this);
1797 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1798 bool inputSign = isNegative();
1800 fs = add(MagicConstant, rounding_mode);
1801 if (fs != opOK && fs != opInexact)
1804 fs = subtract(MagicConstant, rounding_mode);
1806 // Restore the input sign.
1807 if (inputSign != isNegative())
1814 /* Comparison requires normalized numbers. */
1816 APFloat::compare(const APFloat &rhs) const
1820 assertArithmeticOK(*semantics);
1821 assert(semantics == rhs.semantics);
1823 switch (convolve(category, rhs.category)) {
1825 llvm_unreachable(0);
1827 case convolve(fcNaN, fcZero):
1828 case convolve(fcNaN, fcNormal):
1829 case convolve(fcNaN, fcInfinity):
1830 case convolve(fcNaN, fcNaN):
1831 case convolve(fcZero, fcNaN):
1832 case convolve(fcNormal, fcNaN):
1833 case convolve(fcInfinity, fcNaN):
1834 return cmpUnordered;
1836 case convolve(fcInfinity, fcNormal):
1837 case convolve(fcInfinity, fcZero):
1838 case convolve(fcNormal, fcZero):
1842 return cmpGreaterThan;
1844 case convolve(fcNormal, fcInfinity):
1845 case convolve(fcZero, fcInfinity):
1846 case convolve(fcZero, fcNormal):
1848 return cmpGreaterThan;
1852 case convolve(fcInfinity, fcInfinity):
1853 if (sign == rhs.sign)
1858 return cmpGreaterThan;
1860 case convolve(fcZero, fcZero):
1863 case convolve(fcNormal, fcNormal):
1867 /* Two normal numbers. Do they have the same sign? */
1868 if (sign != rhs.sign) {
1870 result = cmpLessThan;
1872 result = cmpGreaterThan;
1874 /* Compare absolute values; invert result if negative. */
1875 result = compareAbsoluteValue(rhs);
1878 if (result == cmpLessThan)
1879 result = cmpGreaterThan;
1880 else if (result == cmpGreaterThan)
1881 result = cmpLessThan;
1888 /// APFloat::convert - convert a value of one floating point type to another.
1889 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1890 /// records whether the transformation lost information, i.e. whether
1891 /// converting the result back to the original type will produce the
1892 /// original value (this is almost the same as return value==fsOK, but there
1893 /// are edge cases where this is not so).
1896 APFloat::convert(const fltSemantics &toSemantics,
1897 roundingMode rounding_mode, bool *losesInfo)
1899 lostFraction lostFraction;
1900 unsigned int newPartCount, oldPartCount;
1903 const fltSemantics &fromSemantics = *semantics;
1905 assertArithmeticOK(fromSemantics);
1906 assertArithmeticOK(toSemantics);
1907 lostFraction = lfExactlyZero;
1908 newPartCount = partCountForBits(toSemantics.precision + 1);
1909 oldPartCount = partCount();
1910 shift = toSemantics.precision - fromSemantics.precision;
1912 bool X86SpecialNan = false;
1913 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1914 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1915 (!(*significandParts() & 0x8000000000000000ULL) ||
1916 !(*significandParts() & 0x4000000000000000ULL))) {
1917 // x86 has some unusual NaNs which cannot be represented in any other
1918 // format; note them here.
1919 X86SpecialNan = true;
1922 // If this is a truncation, perform the shift before we narrow the storage.
1923 if (shift < 0 && (category==fcNormal || category==fcNaN))
1924 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1926 // Fix the storage so it can hold to new value.
1927 if (newPartCount > oldPartCount) {
1928 // The new type requires more storage; make it available.
1929 integerPart *newParts;
1930 newParts = new integerPart[newPartCount];
1931 APInt::tcSet(newParts, 0, newPartCount);
1932 if (category==fcNormal || category==fcNaN)
1933 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1935 significand.parts = newParts;
1936 } else if (newPartCount == 1 && oldPartCount != 1) {
1937 // Switch to built-in storage for a single part.
1938 integerPart newPart = 0;
1939 if (category==fcNormal || category==fcNaN)
1940 newPart = significandParts()[0];
1942 significand.part = newPart;
1945 // Now that we have the right storage, switch the semantics.
1946 semantics = &toSemantics;
1948 // If this is an extension, perform the shift now that the storage is
1950 if (shift > 0 && (category==fcNormal || category==fcNaN))
1951 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1953 if (category == fcNormal) {
1954 fs = normalize(rounding_mode, lostFraction);
1955 *losesInfo = (fs != opOK);
1956 } else if (category == fcNaN) {
1957 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1958 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1959 // does not give you back the same bits. This is dubious, and we
1960 // don't currently do it. You're really supposed to get
1961 // an invalid operation signal at runtime, but nobody does that.
1971 /* Convert a floating point number to an integer according to the
1972 rounding mode. If the rounded integer value is out of range this
1973 returns an invalid operation exception and the contents of the
1974 destination parts are unspecified. If the rounded value is in
1975 range but the floating point number is not the exact integer, the C
1976 standard doesn't require an inexact exception to be raised. IEEE
1977 854 does require it so we do that.
1979 Note that for conversions to integer type the C standard requires
1980 round-to-zero to always be used. */
1982 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1984 roundingMode rounding_mode,
1985 bool *isExact) const
1987 lostFraction lost_fraction;
1988 const integerPart *src;
1989 unsigned int dstPartsCount, truncatedBits;
1991 assertArithmeticOK(*semantics);
1995 /* Handle the three special cases first. */
1996 if (category == fcInfinity || category == fcNaN)
1999 dstPartsCount = partCountForBits(width);
2001 if (category == fcZero) {
2002 APInt::tcSet(parts, 0, dstPartsCount);
2003 // Negative zero can't be represented as an int.
2008 src = significandParts();
2010 /* Step 1: place our absolute value, with any fraction truncated, in
2013 /* Our absolute value is less than one; truncate everything. */
2014 APInt::tcSet(parts, 0, dstPartsCount);
2015 /* For exponent -1 the integer bit represents .5, look at that.
2016 For smaller exponents leftmost truncated bit is 0. */
2017 truncatedBits = semantics->precision -1U - exponent;
2019 /* We want the most significant (exponent + 1) bits; the rest are
2021 unsigned int bits = exponent + 1U;
2023 /* Hopelessly large in magnitude? */
2027 if (bits < semantics->precision) {
2028 /* We truncate (semantics->precision - bits) bits. */
2029 truncatedBits = semantics->precision - bits;
2030 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2032 /* We want at least as many bits as are available. */
2033 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2034 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2039 /* Step 2: work out any lost fraction, and increment the absolute
2040 value if we would round away from zero. */
2041 if (truncatedBits) {
2042 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2044 if (lost_fraction != lfExactlyZero &&
2045 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2046 if (APInt::tcIncrement(parts, dstPartsCount))
2047 return opInvalidOp; /* Overflow. */
2050 lost_fraction = lfExactlyZero;
2053 /* Step 3: check if we fit in the destination. */
2054 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2058 /* Negative numbers cannot be represented as unsigned. */
2062 /* It takes omsb bits to represent the unsigned integer value.
2063 We lose a bit for the sign, but care is needed as the
2064 maximally negative integer is a special case. */
2065 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2068 /* This case can happen because of rounding. */
2073 APInt::tcNegate (parts, dstPartsCount);
2075 if (omsb >= width + !isSigned)
2079 if (lost_fraction == lfExactlyZero) {
2086 /* Same as convertToSignExtendedInteger, except we provide
2087 deterministic values in case of an invalid operation exception,
2088 namely zero for NaNs and the minimal or maximal value respectively
2089 for underflow or overflow.
2090 The *isExact output tells whether the result is exact, in the sense
2091 that converting it back to the original floating point type produces
2092 the original value. This is almost equivalent to result==opOK,
2093 except for negative zeroes.
2096 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2098 roundingMode rounding_mode, bool *isExact) const
2102 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2105 if (fs == opInvalidOp) {
2106 unsigned int bits, dstPartsCount;
2108 dstPartsCount = partCountForBits(width);
2110 if (category == fcNaN)
2115 bits = width - isSigned;
2117 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2118 if (sign && isSigned)
2119 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2125 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2126 an APSInt, whose initial bit-width and signed-ness are used to determine the
2127 precision of the conversion.
2130 APFloat::convertToInteger(APSInt &result,
2131 roundingMode rounding_mode, bool *isExact) const
2133 unsigned bitWidth = result.getBitWidth();
2134 SmallVector<uint64_t, 4> parts(result.getNumWords());
2135 opStatus status = convertToInteger(
2136 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2137 // Keeps the original signed-ness.
2138 result = APInt(bitWidth, parts);
2142 /* Convert an unsigned integer SRC to a floating point number,
2143 rounding according to ROUNDING_MODE. The sign of the floating
2144 point number is not modified. */
2146 APFloat::convertFromUnsignedParts(const integerPart *src,
2147 unsigned int srcCount,
2148 roundingMode rounding_mode)
2150 unsigned int omsb, precision, dstCount;
2152 lostFraction lost_fraction;
2154 assertArithmeticOK(*semantics);
2155 category = fcNormal;
2156 omsb = APInt::tcMSB(src, srcCount) + 1;
2157 dst = significandParts();
2158 dstCount = partCount();
2159 precision = semantics->precision;
2161 /* We want the most significant PRECISION bits of SRC. There may not
2162 be that many; extract what we can. */
2163 if (precision <= omsb) {
2164 exponent = omsb - 1;
2165 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2167 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2169 exponent = precision - 1;
2170 lost_fraction = lfExactlyZero;
2171 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2174 return normalize(rounding_mode, lost_fraction);
2178 APFloat::convertFromAPInt(const APInt &Val,
2180 roundingMode rounding_mode)
2182 unsigned int partCount = Val.getNumWords();
2186 if (isSigned && api.isNegative()) {
2191 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2194 /* Convert a two's complement integer SRC to a floating point number,
2195 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2196 integer is signed, in which case it must be sign-extended. */
2198 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2199 unsigned int srcCount,
2201 roundingMode rounding_mode)
2205 assertArithmeticOK(*semantics);
2207 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2210 /* If we're signed and negative negate a copy. */
2212 copy = new integerPart[srcCount];
2213 APInt::tcAssign(copy, src, srcCount);
2214 APInt::tcNegate(copy, srcCount);
2215 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2219 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2225 /* FIXME: should this just take a const APInt reference? */
2227 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2228 unsigned int width, bool isSigned,
2229 roundingMode rounding_mode)
2231 unsigned int partCount = partCountForBits(width);
2232 APInt api = APInt(width, makeArrayRef(parts, partCount));
2235 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2240 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2244 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2246 lostFraction lost_fraction = lfExactlyZero;
2247 integerPart *significand;
2248 unsigned int bitPos, partsCount;
2249 StringRef::iterator dot, firstSignificantDigit;
2253 category = fcNormal;
2255 significand = significandParts();
2256 partsCount = partCount();
2257 bitPos = partsCount * integerPartWidth;
2259 /* Skip leading zeroes and any (hexa)decimal point. */
2260 StringRef::iterator begin = s.begin();
2261 StringRef::iterator end = s.end();
2262 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2263 firstSignificantDigit = p;
2266 integerPart hex_value;
2269 assert(dot == end && "String contains multiple dots");
2276 hex_value = hexDigitValue(*p);
2277 if (hex_value == -1U) {
2286 /* Store the number whilst 4-bit nibbles remain. */
2289 hex_value <<= bitPos % integerPartWidth;
2290 significand[bitPos / integerPartWidth] |= hex_value;
2292 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2293 while (p != end && hexDigitValue(*p) != -1U)
2300 /* Hex floats require an exponent but not a hexadecimal point. */
2301 assert(p != end && "Hex strings require an exponent");
2302 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2303 assert(p != begin && "Significand has no digits");
2304 assert((dot == end || p - begin != 1) && "Significand has no digits");
2306 /* Ignore the exponent if we are zero. */
2307 if (p != firstSignificantDigit) {
2310 /* Implicit hexadecimal point? */
2314 /* Calculate the exponent adjustment implicit in the number of
2315 significant digits. */
2316 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2317 if (expAdjustment < 0)
2319 expAdjustment = expAdjustment * 4 - 1;
2321 /* Adjust for writing the significand starting at the most
2322 significant nibble. */
2323 expAdjustment += semantics->precision;
2324 expAdjustment -= partsCount * integerPartWidth;
2326 /* Adjust for the given exponent. */
2327 exponent = totalExponent(p + 1, end, expAdjustment);
2330 return normalize(rounding_mode, lost_fraction);
2334 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2335 unsigned sigPartCount, int exp,
2336 roundingMode rounding_mode)
2338 unsigned int parts, pow5PartCount;
2339 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2340 integerPart pow5Parts[maxPowerOfFiveParts];
2343 isNearest = (rounding_mode == rmNearestTiesToEven ||
2344 rounding_mode == rmNearestTiesToAway);
2346 parts = partCountForBits(semantics->precision + 11);
2348 /* Calculate pow(5, abs(exp)). */
2349 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2351 for (;; parts *= 2) {
2352 opStatus sigStatus, powStatus;
2353 unsigned int excessPrecision, truncatedBits;
2355 calcSemantics.precision = parts * integerPartWidth - 1;
2356 excessPrecision = calcSemantics.precision - semantics->precision;
2357 truncatedBits = excessPrecision;
2359 APFloat decSig(calcSemantics, fcZero, sign);
2360 APFloat pow5(calcSemantics, fcZero, false);
2362 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2363 rmNearestTiesToEven);
2364 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2365 rmNearestTiesToEven);
2366 /* Add exp, as 10^n = 5^n * 2^n. */
2367 decSig.exponent += exp;
2369 lostFraction calcLostFraction;
2370 integerPart HUerr, HUdistance;
2371 unsigned int powHUerr;
2374 /* multiplySignificand leaves the precision-th bit set to 1. */
2375 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2376 powHUerr = powStatus != opOK;
2378 calcLostFraction = decSig.divideSignificand(pow5);
2379 /* Denormal numbers have less precision. */
2380 if (decSig.exponent < semantics->minExponent) {
2381 excessPrecision += (semantics->minExponent - decSig.exponent);
2382 truncatedBits = excessPrecision;
2383 if (excessPrecision > calcSemantics.precision)
2384 excessPrecision = calcSemantics.precision;
2386 /* Extra half-ulp lost in reciprocal of exponent. */
2387 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2390 /* Both multiplySignificand and divideSignificand return the
2391 result with the integer bit set. */
2392 assert(APInt::tcExtractBit
2393 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2395 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2397 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2398 excessPrecision, isNearest);
2400 /* Are we guaranteed to round correctly if we truncate? */
2401 if (HUdistance >= HUerr) {
2402 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2403 calcSemantics.precision - excessPrecision,
2405 /* Take the exponent of decSig. If we tcExtract-ed less bits
2406 above we must adjust our exponent to compensate for the
2407 implicit right shift. */
2408 exponent = (decSig.exponent + semantics->precision
2409 - (calcSemantics.precision - excessPrecision));
2410 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2413 return normalize(rounding_mode, calcLostFraction);
2419 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2424 /* Scan the text. */
2425 StringRef::iterator p = str.begin();
2426 interpretDecimal(p, str.end(), &D);
2428 /* Handle the quick cases. First the case of no significant digits,
2429 i.e. zero, and then exponents that are obviously too large or too
2430 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2431 definitely overflows if
2433 (exp - 1) * L >= maxExponent
2435 and definitely underflows to zero where
2437 (exp + 1) * L <= minExponent - precision
2439 With integer arithmetic the tightest bounds for L are
2441 93/28 < L < 196/59 [ numerator <= 256 ]
2442 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2445 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2449 /* Check whether the normalized exponent is high enough to overflow
2450 max during the log-rebasing in the max-exponent check below. */
2451 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2452 fs = handleOverflow(rounding_mode);
2454 /* If it wasn't, then it also wasn't high enough to overflow max
2455 during the log-rebasing in the min-exponent check. Check that it
2456 won't overflow min in either check, then perform the min-exponent
2458 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2459 (D.normalizedExponent + 1) * 28738 <=
2460 8651 * (semantics->minExponent - (int) semantics->precision)) {
2461 /* Underflow to zero and round. */
2463 fs = normalize(rounding_mode, lfLessThanHalf);
2465 /* We can finally safely perform the max-exponent check. */
2466 } else if ((D.normalizedExponent - 1) * 42039
2467 >= 12655 * semantics->maxExponent) {
2468 /* Overflow and round. */
2469 fs = handleOverflow(rounding_mode);
2471 integerPart *decSignificand;
2472 unsigned int partCount;
2474 /* A tight upper bound on number of bits required to hold an
2475 N-digit decimal integer is N * 196 / 59. Allocate enough space
2476 to hold the full significand, and an extra part required by
2478 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2479 partCount = partCountForBits(1 + 196 * partCount / 59);
2480 decSignificand = new integerPart[partCount + 1];
2483 /* Convert to binary efficiently - we do almost all multiplication
2484 in an integerPart. When this would overflow do we do a single
2485 bignum multiplication, and then revert again to multiplication
2486 in an integerPart. */
2488 integerPart decValue, val, multiplier;
2496 if (p == str.end()) {
2500 decValue = decDigitValue(*p++);
2501 assert(decValue < 10U && "Invalid character in significand");
2503 val = val * 10 + decValue;
2504 /* The maximum number that can be multiplied by ten with any
2505 digit added without overflowing an integerPart. */
2506 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2508 /* Multiply out the current part. */
2509 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2510 partCount, partCount + 1, false);
2512 /* If we used another part (likely but not guaranteed), increase
2514 if (decSignificand[partCount])
2516 } while (p <= D.lastSigDigit);
2518 category = fcNormal;
2519 fs = roundSignificandWithExponent(decSignificand, partCount,
2520 D.exponent, rounding_mode);
2522 delete [] decSignificand;
2529 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2531 assertArithmeticOK(*semantics);
2532 assert(!str.empty() && "Invalid string length");
2534 /* Handle a leading minus sign. */
2535 StringRef::iterator p = str.begin();
2536 size_t slen = str.size();
2537 sign = *p == '-' ? 1 : 0;
2538 if (*p == '-' || *p == '+') {
2541 assert(slen && "String has no digits");
2544 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2545 assert(slen - 2 && "Invalid string");
2546 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2550 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2553 /* Write out a hexadecimal representation of the floating point value
2554 to DST, which must be of sufficient size, in the C99 form
2555 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2556 excluding the terminating NUL.
2558 If UPPERCASE, the output is in upper case, otherwise in lower case.
2560 HEXDIGITS digits appear altogether, rounding the value if
2561 necessary. If HEXDIGITS is 0, the minimal precision to display the
2562 number precisely is used instead. If nothing would appear after
2563 the decimal point it is suppressed.
2565 The decimal exponent is always printed and has at least one digit.
2566 Zero values display an exponent of zero. Infinities and NaNs
2567 appear as "infinity" or "nan" respectively.
2569 The above rules are as specified by C99. There is ambiguity about
2570 what the leading hexadecimal digit should be. This implementation
2571 uses whatever is necessary so that the exponent is displayed as
2572 stored. This implies the exponent will fall within the IEEE format
2573 range, and the leading hexadecimal digit will be 0 (for denormals),
2574 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2575 any other digits zero).
2578 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2579 bool upperCase, roundingMode rounding_mode) const
2583 assertArithmeticOK(*semantics);
2591 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2592 dst += sizeof infinityL - 1;
2596 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2597 dst += sizeof NaNU - 1;
2602 *dst++ = upperCase ? 'X': 'x';
2604 if (hexDigits > 1) {
2606 memset (dst, '0', hexDigits - 1);
2607 dst += hexDigits - 1;
2609 *dst++ = upperCase ? 'P': 'p';
2614 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2620 return static_cast<unsigned int>(dst - p);
2623 /* Does the hard work of outputting the correctly rounded hexadecimal
2624 form of a normal floating point number with the specified number of
2625 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2626 digits necessary to print the value precisely is output. */
2628 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2630 roundingMode rounding_mode) const
2632 unsigned int count, valueBits, shift, partsCount, outputDigits;
2633 const char *hexDigitChars;
2634 const integerPart *significand;
2639 *dst++ = upperCase ? 'X': 'x';
2642 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2644 significand = significandParts();
2645 partsCount = partCount();
2647 /* +3 because the first digit only uses the single integer bit, so
2648 we have 3 virtual zero most-significant-bits. */
2649 valueBits = semantics->precision + 3;
2650 shift = integerPartWidth - valueBits % integerPartWidth;
2652 /* The natural number of digits required ignoring trailing
2653 insignificant zeroes. */
2654 outputDigits = (valueBits - significandLSB () + 3) / 4;
2656 /* hexDigits of zero means use the required number for the
2657 precision. Otherwise, see if we are truncating. If we are,
2658 find out if we need to round away from zero. */
2660 if (hexDigits < outputDigits) {
2661 /* We are dropping non-zero bits, so need to check how to round.
2662 "bits" is the number of dropped bits. */
2664 lostFraction fraction;
2666 bits = valueBits - hexDigits * 4;
2667 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2668 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2670 outputDigits = hexDigits;
2673 /* Write the digits consecutively, and start writing in the location
2674 of the hexadecimal point. We move the most significant digit
2675 left and add the hexadecimal point later. */
2678 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2680 while (outputDigits && count) {
2683 /* Put the most significant integerPartWidth bits in "part". */
2684 if (--count == partsCount)
2685 part = 0; /* An imaginary higher zero part. */
2687 part = significand[count] << shift;
2690 part |= significand[count - 1] >> (integerPartWidth - shift);
2692 /* Convert as much of "part" to hexdigits as we can. */
2693 unsigned int curDigits = integerPartWidth / 4;
2695 if (curDigits > outputDigits)
2696 curDigits = outputDigits;
2697 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2698 outputDigits -= curDigits;
2704 /* Note that hexDigitChars has a trailing '0'. */
2707 *q = hexDigitChars[hexDigitValue (*q) + 1];
2708 } while (*q == '0');
2711 /* Add trailing zeroes. */
2712 memset (dst, '0', outputDigits);
2713 dst += outputDigits;
2716 /* Move the most significant digit to before the point, and if there
2717 is something after the decimal point add it. This must come
2718 after rounding above. */
2725 /* Finally output the exponent. */
2726 *dst++ = upperCase ? 'P': 'p';
2728 return writeSignedDecimal (dst, exponent);
2731 hash_code llvm::hash_value(const APFloat &Arg) {
2732 if (Arg.category != APFloat::fcNormal)
2733 return hash_combine((uint8_t)Arg.category,
2734 // NaN has no sign, fix it at zero.
2735 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2736 Arg.semantics->precision);
2738 // Normal floats need their exponent and significand hashed.
2739 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2740 Arg.semantics->precision, Arg.exponent,
2742 Arg.significandParts(),
2743 Arg.significandParts() + Arg.partCount()));
2746 // Conversion from APFloat to/from host float/double. It may eventually be
2747 // possible to eliminate these and have everybody deal with APFloats, but that
2748 // will take a while. This approach will not easily extend to long double.
2749 // Current implementation requires integerPartWidth==64, which is correct at
2750 // the moment but could be made more general.
2752 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2753 // the actual IEEE respresentations. We compensate for that here.
2756 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2758 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2759 assert(partCount()==2);
2761 uint64_t myexponent, mysignificand;
2763 if (category==fcNormal) {
2764 myexponent = exponent+16383; //bias
2765 mysignificand = significandParts()[0];
2766 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2767 myexponent = 0; // denormal
2768 } else if (category==fcZero) {
2771 } else if (category==fcInfinity) {
2772 myexponent = 0x7fff;
2773 mysignificand = 0x8000000000000000ULL;
2775 assert(category == fcNaN && "Unknown category");
2776 myexponent = 0x7fff;
2777 mysignificand = significandParts()[0];
2781 words[0] = mysignificand;
2782 words[1] = ((uint64_t)(sign & 1) << 15) |
2783 (myexponent & 0x7fffLL);
2784 return APInt(80, words);
2788 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2790 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2791 assert(partCount()==2);
2793 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2795 if (category==fcNormal) {
2796 myexponent = exponent + 1023; //bias
2797 myexponent2 = exponent2 + 1023;
2798 mysignificand = significandParts()[0];
2799 mysignificand2 = significandParts()[1];
2800 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2801 myexponent = 0; // denormal
2802 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2803 myexponent2 = 0; // denormal
2804 } else if (category==fcZero) {
2809 } else if (category==fcInfinity) {
2815 assert(category == fcNaN && "Unknown category");
2817 mysignificand = significandParts()[0];
2818 myexponent2 = exponent2;
2819 mysignificand2 = significandParts()[1];
2823 words[0] = ((uint64_t)(sign & 1) << 63) |
2824 ((myexponent & 0x7ff) << 52) |
2825 (mysignificand & 0xfffffffffffffLL);
2826 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2827 ((myexponent2 & 0x7ff) << 52) |
2828 (mysignificand2 & 0xfffffffffffffLL);
2829 return APInt(128, words);
2833 APFloat::convertQuadrupleAPFloatToAPInt() const
2835 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2836 assert(partCount()==2);
2838 uint64_t myexponent, mysignificand, mysignificand2;
2840 if (category==fcNormal) {
2841 myexponent = exponent+16383; //bias
2842 mysignificand = significandParts()[0];
2843 mysignificand2 = significandParts()[1];
2844 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2845 myexponent = 0; // denormal
2846 } else if (category==fcZero) {
2848 mysignificand = mysignificand2 = 0;
2849 } else if (category==fcInfinity) {
2850 myexponent = 0x7fff;
2851 mysignificand = mysignificand2 = 0;
2853 assert(category == fcNaN && "Unknown category!");
2854 myexponent = 0x7fff;
2855 mysignificand = significandParts()[0];
2856 mysignificand2 = significandParts()[1];
2860 words[0] = mysignificand;
2861 words[1] = ((uint64_t)(sign & 1) << 63) |
2862 ((myexponent & 0x7fff) << 48) |
2863 (mysignificand2 & 0xffffffffffffLL);
2865 return APInt(128, words);
2869 APFloat::convertDoubleAPFloatToAPInt() const
2871 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2872 assert(partCount()==1);
2874 uint64_t myexponent, mysignificand;
2876 if (category==fcNormal) {
2877 myexponent = exponent+1023; //bias
2878 mysignificand = *significandParts();
2879 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2880 myexponent = 0; // denormal
2881 } else if (category==fcZero) {
2884 } else if (category==fcInfinity) {
2888 assert(category == fcNaN && "Unknown category!");
2890 mysignificand = *significandParts();
2893 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2894 ((myexponent & 0x7ff) << 52) |
2895 (mysignificand & 0xfffffffffffffLL))));
2899 APFloat::convertFloatAPFloatToAPInt() const
2901 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2902 assert(partCount()==1);
2904 uint32_t myexponent, mysignificand;
2906 if (category==fcNormal) {
2907 myexponent = exponent+127; //bias
2908 mysignificand = (uint32_t)*significandParts();
2909 if (myexponent == 1 && !(mysignificand & 0x800000))
2910 myexponent = 0; // denormal
2911 } else if (category==fcZero) {
2914 } else if (category==fcInfinity) {
2918 assert(category == fcNaN && "Unknown category!");
2920 mysignificand = (uint32_t)*significandParts();
2923 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2924 (mysignificand & 0x7fffff)));
2928 APFloat::convertHalfAPFloatToAPInt() const
2930 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2931 assert(partCount()==1);
2933 uint32_t myexponent, mysignificand;
2935 if (category==fcNormal) {
2936 myexponent = exponent+15; //bias
2937 mysignificand = (uint32_t)*significandParts();
2938 if (myexponent == 1 && !(mysignificand & 0x400))
2939 myexponent = 0; // denormal
2940 } else if (category==fcZero) {
2943 } else if (category==fcInfinity) {
2947 assert(category == fcNaN && "Unknown category!");
2949 mysignificand = (uint32_t)*significandParts();
2952 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2953 (mysignificand & 0x3ff)));
2956 // This function creates an APInt that is just a bit map of the floating
2957 // point constant as it would appear in memory. It is not a conversion,
2958 // and treating the result as a normal integer is unlikely to be useful.
2961 APFloat::bitcastToAPInt() const
2963 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2964 return convertHalfAPFloatToAPInt();
2966 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2967 return convertFloatAPFloatToAPInt();
2969 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2970 return convertDoubleAPFloatToAPInt();
2972 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2973 return convertQuadrupleAPFloatToAPInt();
2975 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2976 return convertPPCDoubleDoubleAPFloatToAPInt();
2978 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2980 return convertF80LongDoubleAPFloatToAPInt();
2984 APFloat::convertToFloat() const
2986 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2987 "Float semantics are not IEEEsingle");
2988 APInt api = bitcastToAPInt();
2989 return api.bitsToFloat();
2993 APFloat::convertToDouble() const
2995 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2996 "Float semantics are not IEEEdouble");
2997 APInt api = bitcastToAPInt();
2998 return api.bitsToDouble();
3001 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3002 /// does not support these bit patterns:
3003 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3004 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3005 /// exponent = 0, integer bit 1 ("pseudodenormal")
3006 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3007 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3009 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3011 assert(api.getBitWidth()==80);
3012 uint64_t i1 = api.getRawData()[0];
3013 uint64_t i2 = api.getRawData()[1];
3014 uint64_t myexponent = (i2 & 0x7fff);
3015 uint64_t mysignificand = i1;
3017 initialize(&APFloat::x87DoubleExtended);
3018 assert(partCount()==2);
3020 sign = static_cast<unsigned int>(i2>>15);
3021 if (myexponent==0 && mysignificand==0) {
3022 // exponent, significand meaningless
3024 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3025 // exponent, significand meaningless
3026 category = fcInfinity;
3027 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3028 // exponent meaningless
3030 significandParts()[0] = mysignificand;
3031 significandParts()[1] = 0;
3033 category = fcNormal;
3034 exponent = myexponent - 16383;
3035 significandParts()[0] = mysignificand;
3036 significandParts()[1] = 0;
3037 if (myexponent==0) // denormal
3043 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3045 assert(api.getBitWidth()==128);
3046 uint64_t i1 = api.getRawData()[0];
3047 uint64_t i2 = api.getRawData()[1];
3048 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3049 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3050 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3051 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3053 initialize(&APFloat::PPCDoubleDouble);
3054 assert(partCount()==2);
3056 sign = static_cast<unsigned int>(i1>>63);
3057 sign2 = static_cast<unsigned int>(i2>>63);
3058 if (myexponent==0 && mysignificand==0) {
3059 // exponent, significand meaningless
3060 // exponent2 and significand2 are required to be 0; we don't check
3062 } else if (myexponent==0x7ff && mysignificand==0) {
3063 // exponent, significand meaningless
3064 // exponent2 and significand2 are required to be 0; we don't check
3065 category = fcInfinity;
3066 } else if (myexponent==0x7ff && mysignificand!=0) {
3067 // exponent meaningless. So is the whole second word, but keep it
3070 exponent2 = myexponent2;
3071 significandParts()[0] = mysignificand;
3072 significandParts()[1] = mysignificand2;
3074 category = fcNormal;
3075 // Note there is no category2; the second word is treated as if it is
3076 // fcNormal, although it might be something else considered by itself.
3077 exponent = myexponent - 1023;
3078 exponent2 = myexponent2 - 1023;
3079 significandParts()[0] = mysignificand;
3080 significandParts()[1] = mysignificand2;
3081 if (myexponent==0) // denormal
3084 significandParts()[0] |= 0x10000000000000LL; // integer bit
3088 significandParts()[1] |= 0x10000000000000LL; // integer bit
3093 APFloat::initFromQuadrupleAPInt(const APInt &api)
3095 assert(api.getBitWidth()==128);
3096 uint64_t i1 = api.getRawData()[0];
3097 uint64_t i2 = api.getRawData()[1];
3098 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3099 uint64_t mysignificand = i1;
3100 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3102 initialize(&APFloat::IEEEquad);
3103 assert(partCount()==2);
3105 sign = static_cast<unsigned int>(i2>>63);
3106 if (myexponent==0 &&
3107 (mysignificand==0 && mysignificand2==0)) {
3108 // exponent, significand meaningless
3110 } else if (myexponent==0x7fff &&
3111 (mysignificand==0 && mysignificand2==0)) {
3112 // exponent, significand meaningless
3113 category = fcInfinity;
3114 } else if (myexponent==0x7fff &&
3115 (mysignificand!=0 || mysignificand2 !=0)) {
3116 // exponent meaningless
3118 significandParts()[0] = mysignificand;
3119 significandParts()[1] = mysignificand2;
3121 category = fcNormal;
3122 exponent = myexponent - 16383;
3123 significandParts()[0] = mysignificand;
3124 significandParts()[1] = mysignificand2;
3125 if (myexponent==0) // denormal
3128 significandParts()[1] |= 0x1000000000000LL; // integer bit
3133 APFloat::initFromDoubleAPInt(const APInt &api)
3135 assert(api.getBitWidth()==64);
3136 uint64_t i = *api.getRawData();
3137 uint64_t myexponent = (i >> 52) & 0x7ff;
3138 uint64_t mysignificand = i & 0xfffffffffffffLL;
3140 initialize(&APFloat::IEEEdouble);
3141 assert(partCount()==1);
3143 sign = static_cast<unsigned int>(i>>63);
3144 if (myexponent==0 && mysignificand==0) {
3145 // exponent, significand meaningless
3147 } else if (myexponent==0x7ff && mysignificand==0) {
3148 // exponent, significand meaningless
3149 category = fcInfinity;
3150 } else if (myexponent==0x7ff && mysignificand!=0) {
3151 // exponent meaningless
3153 *significandParts() = mysignificand;
3155 category = fcNormal;
3156 exponent = myexponent - 1023;
3157 *significandParts() = mysignificand;
3158 if (myexponent==0) // denormal
3161 *significandParts() |= 0x10000000000000LL; // integer bit
3166 APFloat::initFromFloatAPInt(const APInt & api)
3168 assert(api.getBitWidth()==32);
3169 uint32_t i = (uint32_t)*api.getRawData();
3170 uint32_t myexponent = (i >> 23) & 0xff;
3171 uint32_t mysignificand = i & 0x7fffff;
3173 initialize(&APFloat::IEEEsingle);
3174 assert(partCount()==1);
3177 if (myexponent==0 && mysignificand==0) {
3178 // exponent, significand meaningless
3180 } else if (myexponent==0xff && mysignificand==0) {
3181 // exponent, significand meaningless
3182 category = fcInfinity;
3183 } else if (myexponent==0xff && mysignificand!=0) {
3184 // sign, exponent, significand meaningless
3186 *significandParts() = mysignificand;
3188 category = fcNormal;
3189 exponent = myexponent - 127; //bias
3190 *significandParts() = mysignificand;
3191 if (myexponent==0) // denormal
3194 *significandParts() |= 0x800000; // integer bit
3199 APFloat::initFromHalfAPInt(const APInt & api)
3201 assert(api.getBitWidth()==16);
3202 uint32_t i = (uint32_t)*api.getRawData();
3203 uint32_t myexponent = (i >> 10) & 0x1f;
3204 uint32_t mysignificand = i & 0x3ff;
3206 initialize(&APFloat::IEEEhalf);
3207 assert(partCount()==1);
3210 if (myexponent==0 && mysignificand==0) {
3211 // exponent, significand meaningless
3213 } else if (myexponent==0x1f && mysignificand==0) {
3214 // exponent, significand meaningless
3215 category = fcInfinity;
3216 } else if (myexponent==0x1f && mysignificand!=0) {
3217 // sign, exponent, significand meaningless
3219 *significandParts() = mysignificand;
3221 category = fcNormal;
3222 exponent = myexponent - 15; //bias
3223 *significandParts() = mysignificand;
3224 if (myexponent==0) // denormal
3227 *significandParts() |= 0x400; // integer bit
3231 /// Treat api as containing the bits of a floating point number. Currently
3232 /// we infer the floating point type from the size of the APInt. The
3233 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3234 /// when the size is anything else).
3236 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3238 if (api.getBitWidth() == 16)
3239 return initFromHalfAPInt(api);
3240 else if (api.getBitWidth() == 32)
3241 return initFromFloatAPInt(api);
3242 else if (api.getBitWidth()==64)
3243 return initFromDoubleAPInt(api);
3244 else if (api.getBitWidth()==80)
3245 return initFromF80LongDoubleAPInt(api);
3246 else if (api.getBitWidth()==128)
3248 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3250 llvm_unreachable(0);
3254 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3256 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3259 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3260 APFloat Val(Sem, fcNormal, Negative);
3262 // We want (in interchange format):
3263 // sign = {Negative}
3265 // significand = 1..1
3267 Val.exponent = Sem.maxExponent; // unbiased
3269 // 1-initialize all bits....
3270 Val.zeroSignificand();
3271 integerPart *significand = Val.significandParts();
3272 unsigned N = partCountForBits(Sem.precision);
3273 for (unsigned i = 0; i != N; ++i)
3274 significand[i] = ~((integerPart) 0);
3276 // ...and then clear the top bits for internal consistency.
3277 if (Sem.precision % integerPartWidth != 0)
3279 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3284 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3285 APFloat Val(Sem, fcNormal, Negative);
3287 // We want (in interchange format):
3288 // sign = {Negative}
3290 // significand = 0..01
3292 Val.exponent = Sem.minExponent; // unbiased
3293 Val.zeroSignificand();
3294 Val.significandParts()[0] = 1;
3298 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3299 APFloat Val(Sem, fcNormal, Negative);
3301 // We want (in interchange format):
3302 // sign = {Negative}
3304 // significand = 10..0
3306 Val.exponent = Sem.minExponent;
3307 Val.zeroSignificand();
3308 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3309 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3314 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3315 initFromAPInt(api, isIEEE);
3318 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3319 initFromAPInt(APInt::floatToBits(f));
3322 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3323 initFromAPInt(APInt::doubleToBits(d));
3327 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3328 Buffer.append(Str.begin(), Str.end());
3331 /// Removes data from the given significand until it is no more
3332 /// precise than is required for the desired precision.
3333 void AdjustToPrecision(APInt &significand,
3334 int &exp, unsigned FormatPrecision) {
3335 unsigned bits = significand.getActiveBits();
3337 // 196/59 is a very slight overestimate of lg_2(10).
3338 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3340 if (bits <= bitsRequired) return;
3342 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3343 if (!tensRemovable) return;
3345 exp += tensRemovable;
3347 APInt divisor(significand.getBitWidth(), 1);
3348 APInt powten(significand.getBitWidth(), 10);
3350 if (tensRemovable & 1)
3352 tensRemovable >>= 1;
3353 if (!tensRemovable) break;
3357 significand = significand.udiv(divisor);
3359 // Truncate the significand down to its active bit count, but
3360 // don't try to drop below 32.
3361 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3362 significand = significand.trunc(newPrecision);
3366 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3367 int &exp, unsigned FormatPrecision) {
3368 unsigned N = buffer.size();
3369 if (N <= FormatPrecision) return;
3371 // The most significant figures are the last ones in the buffer.
3372 unsigned FirstSignificant = N - FormatPrecision;
3375 // FIXME: this probably shouldn't use 'round half up'.
3377 // Rounding down is just a truncation, except we also want to drop
3378 // trailing zeros from the new result.
3379 if (buffer[FirstSignificant - 1] < '5') {
3380 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3383 exp += FirstSignificant;
3384 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3388 // Rounding up requires a decimal add-with-carry. If we continue
3389 // the carry, the newly-introduced zeros will just be truncated.
3390 for (unsigned I = FirstSignificant; I != N; ++I) {
3391 if (buffer[I] == '9') {
3399 // If we carried through, we have exactly one digit of precision.
3400 if (FirstSignificant == N) {
3401 exp += FirstSignificant;
3403 buffer.push_back('1');
3407 exp += FirstSignificant;
3408 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3412 void APFloat::toString(SmallVectorImpl<char> &Str,
3413 unsigned FormatPrecision,
3414 unsigned FormatMaxPadding) const {
3418 return append(Str, "-Inf");
3420 return append(Str, "+Inf");
3422 case fcNaN: return append(Str, "NaN");
3428 if (!FormatMaxPadding)
3429 append(Str, "0.0E+0");
3441 // Decompose the number into an APInt and an exponent.
3442 int exp = exponent - ((int) semantics->precision - 1);
3443 APInt significand(semantics->precision,
3444 makeArrayRef(significandParts(),
3445 partCountForBits(semantics->precision)));
3447 // Set FormatPrecision if zero. We want to do this before we
3448 // truncate trailing zeros, as those are part of the precision.
3449 if (!FormatPrecision) {
3450 // It's an interesting question whether to use the nominal
3451 // precision or the active precision here for denormals.
3453 // FormatPrecision = ceil(significandBits / lg_2(10))
3454 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3457 // Ignore trailing binary zeros.
3458 int trailingZeros = significand.countTrailingZeros();
3459 exp += trailingZeros;
3460 significand = significand.lshr(trailingZeros);
3462 // Change the exponent from 2^e to 10^e.
3465 } else if (exp > 0) {
3467 significand = significand.zext(semantics->precision + exp);
3468 significand <<= exp;
3470 } else { /* exp < 0 */
3473 // We transform this using the identity:
3474 // (N)(2^-e) == (N)(5^e)(10^-e)
3475 // This means we have to multiply N (the significand) by 5^e.
3476 // To avoid overflow, we have to operate on numbers large
3477 // enough to store N * 5^e:
3478 // log2(N * 5^e) == log2(N) + e * log2(5)
3479 // <= semantics->precision + e * 137 / 59
3480 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3482 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3484 // Multiply significand by 5^e.
3485 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3486 significand = significand.zext(precision);
3487 APInt five_to_the_i(precision, 5);
3489 if (texp & 1) significand *= five_to_the_i;
3493 five_to_the_i *= five_to_the_i;
3497 AdjustToPrecision(significand, exp, FormatPrecision);
3499 llvm::SmallVector<char, 256> buffer;
3502 unsigned precision = significand.getBitWidth();
3503 APInt ten(precision, 10);
3504 APInt digit(precision, 0);
3506 bool inTrail = true;
3507 while (significand != 0) {
3508 // digit <- significand % 10
3509 // significand <- significand / 10
3510 APInt::udivrem(significand, ten, significand, digit);
3512 unsigned d = digit.getZExtValue();
3514 // Drop trailing zeros.
3515 if (inTrail && !d) exp++;
3517 buffer.push_back((char) ('0' + d));
3522 assert(!buffer.empty() && "no characters in buffer!");
3524 // Drop down to FormatPrecision.
3525 // TODO: don't do more precise calculations above than are required.
3526 AdjustToPrecision(buffer, exp, FormatPrecision);
3528 unsigned NDigits = buffer.size();
3530 // Check whether we should use scientific notation.
3531 bool FormatScientific;
3532 if (!FormatMaxPadding)
3533 FormatScientific = true;
3538 // But we shouldn't make the number look more precise than it is.
3539 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3540 NDigits + (unsigned) exp > FormatPrecision);
3542 // Power of the most significant digit.
3543 int MSD = exp + (int) (NDigits - 1);
3546 FormatScientific = false;
3548 // 765e-5 == 0.00765
3550 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3555 // Scientific formatting is pretty straightforward.
3556 if (FormatScientific) {
3557 exp += (NDigits - 1);
3559 Str.push_back(buffer[NDigits-1]);
3564 for (unsigned I = 1; I != NDigits; ++I)
3565 Str.push_back(buffer[NDigits-1-I]);
3568 Str.push_back(exp >= 0 ? '+' : '-');
3569 if (exp < 0) exp = -exp;
3570 SmallVector<char, 6> expbuf;
3572 expbuf.push_back((char) ('0' + (exp % 10)));
3575 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3576 Str.push_back(expbuf[E-1-I]);
3580 // Non-scientific, positive exponents.
3582 for (unsigned I = 0; I != NDigits; ++I)
3583 Str.push_back(buffer[NDigits-1-I]);
3584 for (unsigned I = 0; I != (unsigned) exp; ++I)
3589 // Non-scientific, negative exponents.
3591 // The number of digits to the left of the decimal point.
3592 int NWholeDigits = exp + (int) NDigits;
3595 if (NWholeDigits > 0) {
3596 for (; I != (unsigned) NWholeDigits; ++I)
3597 Str.push_back(buffer[NDigits-I-1]);
3600 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3604 for (unsigned Z = 1; Z != NZeros; ++Z)
3608 for (; I != NDigits; ++I)
3609 Str.push_back(buffer[NDigits-I-1]);
3612 bool APFloat::getExactInverse(APFloat *inv) const {
3613 // We can only guarantee the existence of an exact inverse for IEEE floats.
3614 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3615 semantics != &IEEEdouble && semantics != &IEEEquad)
3618 // Special floats and denormals have no exact inverse.
3619 if (category != fcNormal)
3622 // Check that the number is a power of two by making sure that only the
3623 // integer bit is set in the significand.
3624 if (significandLSB() != semantics->precision - 1)
3628 APFloat reciprocal(*semantics, 1ULL);
3629 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3632 // Avoid multiplication with a denormal, it is not safe on all platforms and
3633 // may be slower than a normal division.
3634 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3637 assert(reciprocal.category == fcNormal &&
3638 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);