1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
50 /* True if arithmetic is supported. */
51 unsigned int arithmeticOK;
54 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
55 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
56 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
57 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
58 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
59 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
61 // The PowerPC format consists of two doubles. It does not map cleanly
62 // onto the usual format above. For now only storage of constants of
63 // this type is supported, no arithmetic.
64 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
66 /* A tight upper bound on number of parts required to hold the value
69 power * 815 / (351 * integerPartWidth) + 1
71 However, whilst the result may require only this many parts,
72 because we are multiplying two values to get it, the
73 multiplication may require an extra part with the excess part
74 being zero (consider the trivial case of 1 * 1, tcFullMultiply
75 requires two parts to hold the single-part result). So we add an
76 extra one to guarantee enough space whilst multiplying. */
77 const unsigned int maxExponent = 16383;
78 const unsigned int maxPrecision = 113;
79 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
80 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
81 / (351 * integerPartWidth));
84 /* A bunch of private, handy routines. */
86 static inline unsigned int
87 partCountForBits(unsigned int bits)
89 return ((bits) + integerPartWidth - 1) / integerPartWidth;
92 /* Returns 0U-9U. Return values >= 10U are not digits. */
93 static inline unsigned int
94 decDigitValue(unsigned int c)
100 hexDigitValue(unsigned int c)
120 assertArithmeticOK(const llvm::fltSemantics &semantics) {
121 assert(semantics.arithmeticOK &&
122 "Compile-time arithmetic does not support these semantics");
125 /* Return the value of a decimal exponent of the form
128 If the exponent overflows, returns a large exponent with the
131 readExponent(StringRef::iterator begin, StringRef::iterator end)
134 unsigned int absExponent;
135 const unsigned int overlargeExponent = 24000; /* FIXME. */
136 StringRef::iterator p = begin;
138 assert(p != end && "Exponent has no digits");
140 isNegative = (*p == '-');
141 if (*p == '-' || *p == '+') {
143 assert(p != end && "Exponent has no digits");
146 absExponent = decDigitValue(*p++);
147 assert(absExponent < 10U && "Invalid character in exponent");
149 for (; p != end; ++p) {
152 value = decDigitValue(*p);
153 assert(value < 10U && "Invalid character in exponent");
155 value += absExponent * 10;
156 if (absExponent >= overlargeExponent) {
157 absExponent = overlargeExponent;
158 p = end; /* outwit assert below */
164 assert(p == end && "Invalid exponent in exponent");
167 return -(int) absExponent;
169 return (int) absExponent;
172 /* This is ugly and needs cleaning up, but I don't immediately see
173 how whilst remaining safe. */
175 totalExponent(StringRef::iterator p, StringRef::iterator end,
176 int exponentAdjustment)
178 int unsignedExponent;
179 bool negative, overflow;
182 assert(p != end && "Exponent has no digits");
184 negative = *p == '-';
185 if (*p == '-' || *p == '+') {
187 assert(p != end && "Exponent has no digits");
190 unsignedExponent = 0;
192 for (; p != end; ++p) {
195 value = decDigitValue(*p);
196 assert(value < 10U && "Invalid character in exponent");
198 unsignedExponent = unsignedExponent * 10 + value;
199 if (unsignedExponent > 32767)
203 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
207 exponent = unsignedExponent;
209 exponent = -exponent;
210 exponent += exponentAdjustment;
211 if (exponent > 32767 || exponent < -32768)
216 exponent = negative ? -32768: 32767;
221 static StringRef::iterator
222 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
223 StringRef::iterator *dot)
225 StringRef::iterator p = begin;
227 while (*p == '0' && p != end)
233 assert(end - begin != 1 && "Significand has no digits");
235 while (*p == '0' && p != end)
242 /* Given a normal decimal floating point number of the form
246 where the decimal point and exponent are optional, fill out the
247 structure D. Exponent is appropriate if the significand is
248 treated as an integer, and normalizedExponent if the significand
249 is taken to have the decimal point after a single leading
252 If the value is zero, V->firstSigDigit points to a non-digit, and
253 the return exponent is zero.
256 const char *firstSigDigit;
257 const char *lastSigDigit;
259 int normalizedExponent;
263 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
266 StringRef::iterator dot = end;
267 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
269 D->firstSigDigit = p;
271 D->normalizedExponent = 0;
273 for (; p != end; ++p) {
275 assert(dot == end && "String contains multiple dots");
280 if (decDigitValue(*p) >= 10U)
285 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
286 assert(p != begin && "Significand has no digits");
287 assert((dot == end || p - begin != 1) && "Significand has no digits");
289 /* p points to the first non-digit in the string */
290 D->exponent = readExponent(p + 1, end);
292 /* Implied decimal point? */
297 /* If number is all zeroes accept any exponent. */
298 if (p != D->firstSigDigit) {
299 /* Drop insignificant trailing zeroes. */
304 while (p != begin && *p == '0');
305 while (p != begin && *p == '.');
308 /* Adjust the exponents for any decimal point. */
309 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
310 D->normalizedExponent = (D->exponent +
311 static_cast<exponent_t>((p - D->firstSigDigit)
312 - (dot > D->firstSigDigit && dot < p)));
318 /* Return the trailing fraction of a hexadecimal number.
319 DIGITVALUE is the first hex digit of the fraction, P points to
322 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
323 unsigned int digitValue)
325 unsigned int hexDigit;
327 /* If the first trailing digit isn't 0 or 8 we can work out the
328 fraction immediately. */
330 return lfMoreThanHalf;
331 else if (digitValue < 8 && digitValue > 0)
332 return lfLessThanHalf;
334 /* Otherwise we need to find the first non-zero digit. */
338 assert(p != end && "Invalid trailing hexadecimal fraction!");
340 hexDigit = hexDigitValue(*p);
342 /* If we ran off the end it is exactly zero or one-half, otherwise
345 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
347 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
350 /* Return the fraction lost were a bignum truncated losing the least
351 significant BITS bits. */
353 lostFractionThroughTruncation(const integerPart *parts,
354 unsigned int partCount,
359 lsb = APInt::tcLSB(parts, partCount);
361 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
363 return lfExactlyZero;
365 return lfExactlyHalf;
366 if (bits <= partCount * integerPartWidth &&
367 APInt::tcExtractBit(parts, bits - 1))
368 return lfMoreThanHalf;
370 return lfLessThanHalf;
373 /* Shift DST right BITS bits noting lost fraction. */
375 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
377 lostFraction lost_fraction;
379 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
381 APInt::tcShiftRight(dst, parts, bits);
383 return lost_fraction;
386 /* Combine the effect of two lost fractions. */
388 combineLostFractions(lostFraction moreSignificant,
389 lostFraction lessSignificant)
391 if (lessSignificant != lfExactlyZero) {
392 if (moreSignificant == lfExactlyZero)
393 moreSignificant = lfLessThanHalf;
394 else if (moreSignificant == lfExactlyHalf)
395 moreSignificant = lfMoreThanHalf;
398 return moreSignificant;
401 /* The error from the true value, in half-ulps, on multiplying two
402 floating point numbers, which differ from the value they
403 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
404 than the returned value.
406 See "How to Read Floating Point Numbers Accurately" by William D
409 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
411 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
413 if (HUerr1 + HUerr2 == 0)
414 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
416 return inexactMultiply + 2 * (HUerr1 + HUerr2);
419 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
420 when the least significant BITS are truncated. BITS cannot be
423 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
425 unsigned int count, partBits;
426 integerPart part, boundary;
431 count = bits / integerPartWidth;
432 partBits = bits % integerPartWidth + 1;
434 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
437 boundary = (integerPart) 1 << (partBits - 1);
442 if (part - boundary <= boundary - part)
443 return part - boundary;
445 return boundary - part;
448 if (part == boundary) {
451 return ~(integerPart) 0; /* A lot. */
454 } else if (part == boundary - 1) {
457 return ~(integerPart) 0; /* A lot. */
462 return ~(integerPart) 0; /* A lot. */
465 /* Place pow(5, power) in DST, and return the number of parts used.
466 DST must be at least one part larger than size of the answer. */
468 powerOf5(integerPart *dst, unsigned int power)
470 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
472 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
473 pow5s[0] = 78125 * 5;
475 unsigned int partsCount[16] = { 1 };
476 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
478 assert(power <= maxExponent);
483 *p1 = firstEightPowers[power & 7];
489 for (unsigned int n = 0; power; power >>= 1, n++) {
494 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
496 pc = partsCount[n - 1];
497 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
499 if (pow5[pc - 1] == 0)
507 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
509 if (p2[result - 1] == 0)
512 /* Now result is in p1 with partsCount parts and p2 is scratch
514 tmp = p1, p1 = p2, p2 = tmp;
521 APInt::tcAssign(dst, p1, result);
526 /* Zero at the end to avoid modular arithmetic when adding one; used
527 when rounding up during hexadecimal output. */
528 static const char hexDigitsLower[] = "0123456789abcdef0";
529 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
530 static const char infinityL[] = "infinity";
531 static const char infinityU[] = "INFINITY";
532 static const char NaNL[] = "nan";
533 static const char NaNU[] = "NAN";
535 /* Write out an integerPart in hexadecimal, starting with the most
536 significant nibble. Write out exactly COUNT hexdigits, return
539 partAsHex (char *dst, integerPart part, unsigned int count,
540 const char *hexDigitChars)
542 unsigned int result = count;
544 assert(count != 0 && count <= integerPartWidth / 4);
546 part >>= (integerPartWidth - 4 * count);
548 dst[count] = hexDigitChars[part & 0xf];
555 /* Write out an unsigned decimal integer. */
557 writeUnsignedDecimal (char *dst, unsigned int n)
573 /* Write out a signed decimal integer. */
575 writeSignedDecimal (char *dst, int value)
579 dst = writeUnsignedDecimal(dst, -(unsigned) value);
581 dst = writeUnsignedDecimal(dst, value);
588 APFloat::initialize(const fltSemantics *ourSemantics)
592 semantics = ourSemantics;
595 significand.parts = new integerPart[count];
599 APFloat::freeSignificand()
602 delete [] significand.parts;
606 APFloat::assign(const APFloat &rhs)
608 assert(semantics == rhs.semantics);
611 category = rhs.category;
612 exponent = rhs.exponent;
614 exponent2 = rhs.exponent2;
615 if (category == fcNormal || category == fcNaN)
616 copySignificand(rhs);
620 APFloat::copySignificand(const APFloat &rhs)
622 assert(category == fcNormal || category == fcNaN);
623 assert(rhs.partCount() >= partCount());
625 APInt::tcAssign(significandParts(), rhs.significandParts(),
629 /* Make this number a NaN, with an arbitrary but deterministic value
630 for the significand. If double or longer, this is a signalling NaN,
631 which may not be ideal. If float, this is QNaN(0). */
632 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
637 integerPart *significand = significandParts();
638 unsigned numParts = partCount();
640 // Set the significand bits to the fill.
641 if (!fill || fill->getNumWords() < numParts)
642 APInt::tcSet(significand, 0, numParts);
644 APInt::tcAssign(significand, fill->getRawData(),
645 std::min(fill->getNumWords(), numParts));
647 // Zero out the excess bits of the significand.
648 unsigned bitsToPreserve = semantics->precision - 1;
649 unsigned part = bitsToPreserve / 64;
650 bitsToPreserve %= 64;
651 significand[part] &= ((1ULL << bitsToPreserve) - 1);
652 for (part++; part != numParts; ++part)
653 significand[part] = 0;
656 unsigned QNaNBit = semantics->precision - 2;
659 // We always have to clear the QNaN bit to make it an SNaN.
660 APInt::tcClearBit(significand, QNaNBit);
662 // If there are no bits set in the payload, we have to set
663 // *something* to make it a NaN instead of an infinity;
664 // conventionally, this is the next bit down from the QNaN bit.
665 if (APInt::tcIsZero(significand, numParts))
666 APInt::tcSetBit(significand, QNaNBit - 1);
668 // We always have to set the QNaN bit to make it a QNaN.
669 APInt::tcSetBit(significand, QNaNBit);
672 // For x87 extended precision, we want to make a NaN, not a
673 // pseudo-NaN. Maybe we should expose the ability to make
675 if (semantics == &APFloat::x87DoubleExtended)
676 APInt::tcSetBit(significand, QNaNBit + 1);
679 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
681 APFloat value(Sem, uninitialized);
682 value.makeNaN(SNaN, Negative, fill);
687 APFloat::operator=(const APFloat &rhs)
690 if (semantics != rhs.semantics) {
692 initialize(rhs.semantics);
701 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
704 if (semantics != rhs.semantics ||
705 category != rhs.category ||
708 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
711 if (category==fcZero || category==fcInfinity)
713 else if (category==fcNormal && exponent!=rhs.exponent)
715 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
716 exponent2!=rhs.exponent2)
720 const integerPart* p=significandParts();
721 const integerPart* q=rhs.significandParts();
722 for (; i>0; i--, p++, q++) {
730 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
731 : exponent2(0), sign2(0) {
732 assertArithmeticOK(ourSemantics);
733 initialize(&ourSemantics);
736 exponent = ourSemantics.precision - 1;
737 significandParts()[0] = value;
738 normalize(rmNearestTiesToEven, lfExactlyZero);
741 APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
742 assertArithmeticOK(ourSemantics);
743 initialize(&ourSemantics);
748 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
749 : exponent2(0), sign2(0) {
750 assertArithmeticOK(ourSemantics);
751 // Allocates storage if necessary but does not initialize it.
752 initialize(&ourSemantics);
755 APFloat::APFloat(const fltSemantics &ourSemantics,
756 fltCategory ourCategory, bool negative)
757 : exponent2(0), sign2(0) {
758 assertArithmeticOK(ourSemantics);
759 initialize(&ourSemantics);
760 category = ourCategory;
762 if (category == fcNormal)
764 else if (ourCategory == fcNaN)
768 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
769 : exponent2(0), sign2(0) {
770 assertArithmeticOK(ourSemantics);
771 initialize(&ourSemantics);
772 convertFromString(text, rmNearestTiesToEven);
775 APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
776 initialize(rhs.semantics);
785 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
786 void APFloat::Profile(FoldingSetNodeID& ID) const {
787 ID.Add(bitcastToAPInt());
791 APFloat::partCount() const
793 return partCountForBits(semantics->precision + 1);
797 APFloat::semanticsPrecision(const fltSemantics &semantics)
799 return semantics.precision;
803 APFloat::significandParts() const
805 return const_cast<APFloat *>(this)->significandParts();
809 APFloat::significandParts()
811 assert(category == fcNormal || category == fcNaN);
814 return significand.parts;
816 return &significand.part;
820 APFloat::zeroSignificand()
823 APInt::tcSet(significandParts(), 0, partCount());
826 /* Increment an fcNormal floating point number's significand. */
828 APFloat::incrementSignificand()
832 carry = APInt::tcIncrement(significandParts(), partCount());
834 /* Our callers should never cause us to overflow. */
839 /* Add the significand of the RHS. Returns the carry flag. */
841 APFloat::addSignificand(const APFloat &rhs)
845 parts = significandParts();
847 assert(semantics == rhs.semantics);
848 assert(exponent == rhs.exponent);
850 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
853 /* Subtract the significand of the RHS with a borrow flag. Returns
856 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
860 parts = significandParts();
862 assert(semantics == rhs.semantics);
863 assert(exponent == rhs.exponent);
865 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
869 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
870 on to the full-precision result of the multiplication. Returns the
873 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
875 unsigned int omsb; // One, not zero, based MSB.
876 unsigned int partsCount, newPartsCount, precision;
877 integerPart *lhsSignificand;
878 integerPart scratch[4];
879 integerPart *fullSignificand;
880 lostFraction lost_fraction;
883 assert(semantics == rhs.semantics);
885 precision = semantics->precision;
886 newPartsCount = partCountForBits(precision * 2);
888 if (newPartsCount > 4)
889 fullSignificand = new integerPart[newPartsCount];
891 fullSignificand = scratch;
893 lhsSignificand = significandParts();
894 partsCount = partCount();
896 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
897 rhs.significandParts(), partsCount, partsCount);
899 lost_fraction = lfExactlyZero;
900 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
901 exponent += rhs.exponent;
904 Significand savedSignificand = significand;
905 const fltSemantics *savedSemantics = semantics;
906 fltSemantics extendedSemantics;
908 unsigned int extendedPrecision;
910 /* Normalize our MSB. */
911 extendedPrecision = precision + precision - 1;
912 if (omsb != extendedPrecision) {
913 APInt::tcShiftLeft(fullSignificand, newPartsCount,
914 extendedPrecision - omsb);
915 exponent -= extendedPrecision - omsb;
918 /* Create new semantics. */
919 extendedSemantics = *semantics;
920 extendedSemantics.precision = extendedPrecision;
922 if (newPartsCount == 1)
923 significand.part = fullSignificand[0];
925 significand.parts = fullSignificand;
926 semantics = &extendedSemantics;
928 APFloat extendedAddend(*addend);
929 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
930 assert(status == opOK);
932 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
934 /* Restore our state. */
935 if (newPartsCount == 1)
936 fullSignificand[0] = significand.part;
937 significand = savedSignificand;
938 semantics = savedSemantics;
940 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
943 exponent -= (precision - 1);
945 if (omsb > precision) {
946 unsigned int bits, significantParts;
949 bits = omsb - precision;
950 significantParts = partCountForBits(omsb);
951 lf = shiftRight(fullSignificand, significantParts, bits);
952 lost_fraction = combineLostFractions(lf, lost_fraction);
956 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
958 if (newPartsCount > 4)
959 delete [] fullSignificand;
961 return lost_fraction;
964 /* Multiply the significands of LHS and RHS to DST. */
966 APFloat::divideSignificand(const APFloat &rhs)
968 unsigned int bit, i, partsCount;
969 const integerPart *rhsSignificand;
970 integerPart *lhsSignificand, *dividend, *divisor;
971 integerPart scratch[4];
972 lostFraction lost_fraction;
974 assert(semantics == rhs.semantics);
976 lhsSignificand = significandParts();
977 rhsSignificand = rhs.significandParts();
978 partsCount = partCount();
981 dividend = new integerPart[partsCount * 2];
985 divisor = dividend + partsCount;
987 /* Copy the dividend and divisor as they will be modified in-place. */
988 for (i = 0; i < partsCount; i++) {
989 dividend[i] = lhsSignificand[i];
990 divisor[i] = rhsSignificand[i];
991 lhsSignificand[i] = 0;
994 exponent -= rhs.exponent;
996 unsigned int precision = semantics->precision;
998 /* Normalize the divisor. */
999 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1002 APInt::tcShiftLeft(divisor, partsCount, bit);
1005 /* Normalize the dividend. */
1006 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1009 APInt::tcShiftLeft(dividend, partsCount, bit);
1012 /* Ensure the dividend >= divisor initially for the loop below.
1013 Incidentally, this means that the division loop below is
1014 guaranteed to set the integer bit to one. */
1015 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1017 APInt::tcShiftLeft(dividend, partsCount, 1);
1018 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1021 /* Long division. */
1022 for (bit = precision; bit; bit -= 1) {
1023 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1024 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1025 APInt::tcSetBit(lhsSignificand, bit - 1);
1028 APInt::tcShiftLeft(dividend, partsCount, 1);
1031 /* Figure out the lost fraction. */
1032 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1035 lost_fraction = lfMoreThanHalf;
1037 lost_fraction = lfExactlyHalf;
1038 else if (APInt::tcIsZero(dividend, partsCount))
1039 lost_fraction = lfExactlyZero;
1041 lost_fraction = lfLessThanHalf;
1046 return lost_fraction;
1050 APFloat::significandMSB() const
1052 return APInt::tcMSB(significandParts(), partCount());
1056 APFloat::significandLSB() const
1058 return APInt::tcLSB(significandParts(), partCount());
1061 /* Note that a zero result is NOT normalized to fcZero. */
1063 APFloat::shiftSignificandRight(unsigned int bits)
1065 /* Our exponent should not overflow. */
1066 assert((exponent_t) (exponent + bits) >= exponent);
1070 return shiftRight(significandParts(), partCount(), bits);
1073 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1075 APFloat::shiftSignificandLeft(unsigned int bits)
1077 assert(bits < semantics->precision);
1080 unsigned int partsCount = partCount();
1082 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1085 assert(!APInt::tcIsZero(significandParts(), partsCount));
1090 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1094 assert(semantics == rhs.semantics);
1095 assert(category == fcNormal);
1096 assert(rhs.category == fcNormal);
1098 compare = exponent - rhs.exponent;
1100 /* If exponents are equal, do an unsigned bignum comparison of the
1103 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1107 return cmpGreaterThan;
1108 else if (compare < 0)
1114 /* Handle overflow. Sign is preserved. We either become infinity or
1115 the largest finite number. */
1117 APFloat::handleOverflow(roundingMode rounding_mode)
1120 if (rounding_mode == rmNearestTiesToEven ||
1121 rounding_mode == rmNearestTiesToAway ||
1122 (rounding_mode == rmTowardPositive && !sign) ||
1123 (rounding_mode == rmTowardNegative && sign)) {
1124 category = fcInfinity;
1125 return (opStatus) (opOverflow | opInexact);
1128 /* Otherwise we become the largest finite number. */
1129 category = fcNormal;
1130 exponent = semantics->maxExponent;
1131 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1132 semantics->precision);
1137 /* Returns TRUE if, when truncating the current number, with BIT the
1138 new LSB, with the given lost fraction and rounding mode, the result
1139 would need to be rounded away from zero (i.e., by increasing the
1140 signficand). This routine must work for fcZero of both signs, and
1141 fcNormal numbers. */
1143 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1144 lostFraction lost_fraction,
1145 unsigned int bit) const
1147 /* NaNs and infinities should not have lost fractions. */
1148 assert(category == fcNormal || category == fcZero);
1150 /* Current callers never pass this so we don't handle it. */
1151 assert(lost_fraction != lfExactlyZero);
1153 switch (rounding_mode) {
1154 case rmNearestTiesToAway:
1155 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1157 case rmNearestTiesToEven:
1158 if (lost_fraction == lfMoreThanHalf)
1161 /* Our zeroes don't have a significand to test. */
1162 if (lost_fraction == lfExactlyHalf && category != fcZero)
1163 return APInt::tcExtractBit(significandParts(), bit);
1170 case rmTowardPositive:
1171 return sign == false;
1173 case rmTowardNegative:
1174 return sign == true;
1176 llvm_unreachable("Invalid rounding mode found");
1180 APFloat::normalize(roundingMode rounding_mode,
1181 lostFraction lost_fraction)
1183 unsigned int omsb; /* One, not zero, based MSB. */
1186 if (category != fcNormal)
1189 /* Before rounding normalize the exponent of fcNormal numbers. */
1190 omsb = significandMSB() + 1;
1193 /* OMSB is numbered from 1. We want to place it in the integer
1194 bit numbered PRECISION if possible, with a compensating change in
1196 exponentChange = omsb - semantics->precision;
1198 /* If the resulting exponent is too high, overflow according to
1199 the rounding mode. */
1200 if (exponent + exponentChange > semantics->maxExponent)
1201 return handleOverflow(rounding_mode);
1203 /* Subnormal numbers have exponent minExponent, and their MSB
1204 is forced based on that. */
1205 if (exponent + exponentChange < semantics->minExponent)
1206 exponentChange = semantics->minExponent - exponent;
1208 /* Shifting left is easy as we don't lose precision. */
1209 if (exponentChange < 0) {
1210 assert(lost_fraction == lfExactlyZero);
1212 shiftSignificandLeft(-exponentChange);
1217 if (exponentChange > 0) {
1220 /* Shift right and capture any new lost fraction. */
1221 lf = shiftSignificandRight(exponentChange);
1223 lost_fraction = combineLostFractions(lf, lost_fraction);
1225 /* Keep OMSB up-to-date. */
1226 if (omsb > (unsigned) exponentChange)
1227 omsb -= exponentChange;
1233 /* Now round the number according to rounding_mode given the lost
1236 /* As specified in IEEE 754, since we do not trap we do not report
1237 underflow for exact results. */
1238 if (lost_fraction == lfExactlyZero) {
1239 /* Canonicalize zeroes. */
1246 /* Increment the significand if we're rounding away from zero. */
1247 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1249 exponent = semantics->minExponent;
1251 incrementSignificand();
1252 omsb = significandMSB() + 1;
1254 /* Did the significand increment overflow? */
1255 if (omsb == (unsigned) semantics->precision + 1) {
1256 /* Renormalize by incrementing the exponent and shifting our
1257 significand right one. However if we already have the
1258 maximum exponent we overflow to infinity. */
1259 if (exponent == semantics->maxExponent) {
1260 category = fcInfinity;
1262 return (opStatus) (opOverflow | opInexact);
1265 shiftSignificandRight(1);
1271 /* The normal case - we were and are not denormal, and any
1272 significand increment above didn't overflow. */
1273 if (omsb == semantics->precision)
1276 /* We have a non-zero denormal. */
1277 assert(omsb < semantics->precision);
1279 /* Canonicalize zeroes. */
1283 /* The fcZero case is a denormal that underflowed to zero. */
1284 return (opStatus) (opUnderflow | opInexact);
1288 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1290 switch (convolve(category, rhs.category)) {
1292 llvm_unreachable(0);
1294 case convolve(fcNaN, fcZero):
1295 case convolve(fcNaN, fcNormal):
1296 case convolve(fcNaN, fcInfinity):
1297 case convolve(fcNaN, fcNaN):
1298 case convolve(fcNormal, fcZero):
1299 case convolve(fcInfinity, fcNormal):
1300 case convolve(fcInfinity, fcZero):
1303 case convolve(fcZero, fcNaN):
1304 case convolve(fcNormal, fcNaN):
1305 case convolve(fcInfinity, fcNaN):
1307 copySignificand(rhs);
1310 case convolve(fcNormal, fcInfinity):
1311 case convolve(fcZero, fcInfinity):
1312 category = fcInfinity;
1313 sign = rhs.sign ^ subtract;
1316 case convolve(fcZero, fcNormal):
1318 sign = rhs.sign ^ subtract;
1321 case convolve(fcZero, fcZero):
1322 /* Sign depends on rounding mode; handled by caller. */
1325 case convolve(fcInfinity, fcInfinity):
1326 /* Differently signed infinities can only be validly
1328 if (((sign ^ rhs.sign)!=0) != subtract) {
1335 case convolve(fcNormal, fcNormal):
1340 /* Add or subtract two normal numbers. */
1342 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1345 lostFraction lost_fraction;
1348 /* Determine if the operation on the absolute values is effectively
1349 an addition or subtraction. */
1350 subtract ^= (sign ^ rhs.sign) ? true : false;
1352 /* Are we bigger exponent-wise than the RHS? */
1353 bits = exponent - rhs.exponent;
1355 /* Subtraction is more subtle than one might naively expect. */
1357 APFloat temp_rhs(rhs);
1361 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1362 lost_fraction = lfExactlyZero;
1363 } else if (bits > 0) {
1364 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1365 shiftSignificandLeft(1);
1368 lost_fraction = shiftSignificandRight(-bits - 1);
1369 temp_rhs.shiftSignificandLeft(1);
1374 carry = temp_rhs.subtractSignificand
1375 (*this, lost_fraction != lfExactlyZero);
1376 copySignificand(temp_rhs);
1379 carry = subtractSignificand
1380 (temp_rhs, lost_fraction != lfExactlyZero);
1383 /* Invert the lost fraction - it was on the RHS and
1385 if (lost_fraction == lfLessThanHalf)
1386 lost_fraction = lfMoreThanHalf;
1387 else if (lost_fraction == lfMoreThanHalf)
1388 lost_fraction = lfLessThanHalf;
1390 /* The code above is intended to ensure that no borrow is
1396 APFloat temp_rhs(rhs);
1398 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1399 carry = addSignificand(temp_rhs);
1401 lost_fraction = shiftSignificandRight(-bits);
1402 carry = addSignificand(rhs);
1405 /* We have a guard bit; generating a carry cannot happen. */
1410 return lost_fraction;
1414 APFloat::multiplySpecials(const APFloat &rhs)
1416 switch (convolve(category, rhs.category)) {
1418 llvm_unreachable(0);
1420 case convolve(fcNaN, fcZero):
1421 case convolve(fcNaN, fcNormal):
1422 case convolve(fcNaN, fcInfinity):
1423 case convolve(fcNaN, fcNaN):
1426 case convolve(fcZero, fcNaN):
1427 case convolve(fcNormal, fcNaN):
1428 case convolve(fcInfinity, fcNaN):
1430 copySignificand(rhs);
1433 case convolve(fcNormal, fcInfinity):
1434 case convolve(fcInfinity, fcNormal):
1435 case convolve(fcInfinity, fcInfinity):
1436 category = fcInfinity;
1439 case convolve(fcZero, fcNormal):
1440 case convolve(fcNormal, fcZero):
1441 case convolve(fcZero, fcZero):
1445 case convolve(fcZero, fcInfinity):
1446 case convolve(fcInfinity, fcZero):
1450 case convolve(fcNormal, fcNormal):
1456 APFloat::divideSpecials(const APFloat &rhs)
1458 switch (convolve(category, rhs.category)) {
1460 llvm_unreachable(0);
1462 case convolve(fcNaN, fcZero):
1463 case convolve(fcNaN, fcNormal):
1464 case convolve(fcNaN, fcInfinity):
1465 case convolve(fcNaN, fcNaN):
1466 case convolve(fcInfinity, fcZero):
1467 case convolve(fcInfinity, fcNormal):
1468 case convolve(fcZero, fcInfinity):
1469 case convolve(fcZero, fcNormal):
1472 case convolve(fcZero, fcNaN):
1473 case convolve(fcNormal, fcNaN):
1474 case convolve(fcInfinity, fcNaN):
1476 copySignificand(rhs);
1479 case convolve(fcNormal, fcInfinity):
1483 case convolve(fcNormal, fcZero):
1484 category = fcInfinity;
1487 case convolve(fcInfinity, fcInfinity):
1488 case convolve(fcZero, fcZero):
1492 case convolve(fcNormal, fcNormal):
1498 APFloat::modSpecials(const APFloat &rhs)
1500 switch (convolve(category, rhs.category)) {
1502 llvm_unreachable(0);
1504 case convolve(fcNaN, fcZero):
1505 case convolve(fcNaN, fcNormal):
1506 case convolve(fcNaN, fcInfinity):
1507 case convolve(fcNaN, fcNaN):
1508 case convolve(fcZero, fcInfinity):
1509 case convolve(fcZero, fcNormal):
1510 case convolve(fcNormal, fcInfinity):
1513 case convolve(fcZero, fcNaN):
1514 case convolve(fcNormal, fcNaN):
1515 case convolve(fcInfinity, fcNaN):
1517 copySignificand(rhs);
1520 case convolve(fcNormal, fcZero):
1521 case convolve(fcInfinity, fcZero):
1522 case convolve(fcInfinity, fcNormal):
1523 case convolve(fcInfinity, fcInfinity):
1524 case convolve(fcZero, fcZero):
1528 case convolve(fcNormal, fcNormal):
1535 APFloat::changeSign()
1537 /* Look mummy, this one's easy. */
1542 APFloat::clearSign()
1544 /* So is this one. */
1549 APFloat::copySign(const APFloat &rhs)
1555 /* Normalized addition or subtraction. */
1557 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1562 assertArithmeticOK(*semantics);
1564 fs = addOrSubtractSpecials(rhs, subtract);
1566 /* This return code means it was not a simple case. */
1567 if (fs == opDivByZero) {
1568 lostFraction lost_fraction;
1570 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1571 fs = normalize(rounding_mode, lost_fraction);
1573 /* Can only be zero if we lost no fraction. */
1574 assert(category != fcZero || lost_fraction == lfExactlyZero);
1577 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1578 positive zero unless rounding to minus infinity, except that
1579 adding two like-signed zeroes gives that zero. */
1580 if (category == fcZero) {
1581 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1582 sign = (rounding_mode == rmTowardNegative);
1588 /* Normalized addition. */
1590 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1592 return addOrSubtract(rhs, rounding_mode, false);
1595 /* Normalized subtraction. */
1597 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1599 return addOrSubtract(rhs, rounding_mode, true);
1602 /* Normalized multiply. */
1604 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1608 assertArithmeticOK(*semantics);
1610 fs = multiplySpecials(rhs);
1612 if (category == fcNormal) {
1613 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1614 fs = normalize(rounding_mode, lost_fraction);
1615 if (lost_fraction != lfExactlyZero)
1616 fs = (opStatus) (fs | opInexact);
1622 /* Normalized divide. */
1624 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1628 assertArithmeticOK(*semantics);
1630 fs = divideSpecials(rhs);
1632 if (category == fcNormal) {
1633 lostFraction lost_fraction = divideSignificand(rhs);
1634 fs = normalize(rounding_mode, lost_fraction);
1635 if (lost_fraction != lfExactlyZero)
1636 fs = (opStatus) (fs | opInexact);
1642 /* Normalized remainder. This is not currently correct in all cases. */
1644 APFloat::remainder(const APFloat &rhs)
1648 unsigned int origSign = sign;
1650 assertArithmeticOK(*semantics);
1651 fs = V.divide(rhs, rmNearestTiesToEven);
1652 if (fs == opDivByZero)
1655 int parts = partCount();
1656 integerPart *x = new integerPart[parts];
1658 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1659 rmNearestTiesToEven, &ignored);
1660 if (fs==opInvalidOp)
1663 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1664 rmNearestTiesToEven);
1665 assert(fs==opOK); // should always work
1667 fs = V.multiply(rhs, rmNearestTiesToEven);
1668 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1670 fs = subtract(V, rmNearestTiesToEven);
1671 assert(fs==opOK || fs==opInexact); // likewise
1674 sign = origSign; // IEEE754 requires this
1679 /* Normalized llvm frem (C fmod).
1680 This is not currently correct in all cases. */
1682 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1685 assertArithmeticOK(*semantics);
1686 fs = modSpecials(rhs);
1688 if (category == fcNormal && rhs.category == fcNormal) {
1690 unsigned int origSign = sign;
1692 fs = V.divide(rhs, rmNearestTiesToEven);
1693 if (fs == opDivByZero)
1696 int parts = partCount();
1697 integerPart *x = new integerPart[parts];
1699 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1700 rmTowardZero, &ignored);
1701 if (fs==opInvalidOp)
1704 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1705 rmNearestTiesToEven);
1706 assert(fs==opOK); // should always work
1708 fs = V.multiply(rhs, rounding_mode);
1709 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1711 fs = subtract(V, rounding_mode);
1712 assert(fs==opOK || fs==opInexact); // likewise
1715 sign = origSign; // IEEE754 requires this
1721 /* Normalized fused-multiply-add. */
1723 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1724 const APFloat &addend,
1725 roundingMode rounding_mode)
1729 assertArithmeticOK(*semantics);
1731 /* Post-multiplication sign, before addition. */
1732 sign ^= multiplicand.sign;
1734 /* If and only if all arguments are normal do we need to do an
1735 extended-precision calculation. */
1736 if (category == fcNormal &&
1737 multiplicand.category == fcNormal &&
1738 addend.category == fcNormal) {
1739 lostFraction lost_fraction;
1741 lost_fraction = multiplySignificand(multiplicand, &addend);
1742 fs = normalize(rounding_mode, lost_fraction);
1743 if (lost_fraction != lfExactlyZero)
1744 fs = (opStatus) (fs | opInexact);
1746 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1747 positive zero unless rounding to minus infinity, except that
1748 adding two like-signed zeroes gives that zero. */
1749 if (category == fcZero && sign != addend.sign)
1750 sign = (rounding_mode == rmTowardNegative);
1752 fs = multiplySpecials(multiplicand);
1754 /* FS can only be opOK or opInvalidOp. There is no more work
1755 to do in the latter case. The IEEE-754R standard says it is
1756 implementation-defined in this case whether, if ADDEND is a
1757 quiet NaN, we raise invalid op; this implementation does so.
1759 If we need to do the addition we can do so with normal
1762 fs = addOrSubtract(addend, rounding_mode, false);
1768 /* Comparison requires normalized numbers. */
1770 APFloat::compare(const APFloat &rhs) const
1774 assertArithmeticOK(*semantics);
1775 assert(semantics == rhs.semantics);
1777 switch (convolve(category, rhs.category)) {
1779 llvm_unreachable(0);
1781 case convolve(fcNaN, fcZero):
1782 case convolve(fcNaN, fcNormal):
1783 case convolve(fcNaN, fcInfinity):
1784 case convolve(fcNaN, fcNaN):
1785 case convolve(fcZero, fcNaN):
1786 case convolve(fcNormal, fcNaN):
1787 case convolve(fcInfinity, fcNaN):
1788 return cmpUnordered;
1790 case convolve(fcInfinity, fcNormal):
1791 case convolve(fcInfinity, fcZero):
1792 case convolve(fcNormal, fcZero):
1796 return cmpGreaterThan;
1798 case convolve(fcNormal, fcInfinity):
1799 case convolve(fcZero, fcInfinity):
1800 case convolve(fcZero, fcNormal):
1802 return cmpGreaterThan;
1806 case convolve(fcInfinity, fcInfinity):
1807 if (sign == rhs.sign)
1812 return cmpGreaterThan;
1814 case convolve(fcZero, fcZero):
1817 case convolve(fcNormal, fcNormal):
1821 /* Two normal numbers. Do they have the same sign? */
1822 if (sign != rhs.sign) {
1824 result = cmpLessThan;
1826 result = cmpGreaterThan;
1828 /* Compare absolute values; invert result if negative. */
1829 result = compareAbsoluteValue(rhs);
1832 if (result == cmpLessThan)
1833 result = cmpGreaterThan;
1834 else if (result == cmpGreaterThan)
1835 result = cmpLessThan;
1842 /// APFloat::convert - convert a value of one floating point type to another.
1843 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1844 /// records whether the transformation lost information, i.e. whether
1845 /// converting the result back to the original type will produce the
1846 /// original value (this is almost the same as return value==fsOK, but there
1847 /// are edge cases where this is not so).
1850 APFloat::convert(const fltSemantics &toSemantics,
1851 roundingMode rounding_mode, bool *losesInfo)
1853 lostFraction lostFraction;
1854 unsigned int newPartCount, oldPartCount;
1857 const fltSemantics &fromSemantics = *semantics;
1859 assertArithmeticOK(fromSemantics);
1860 assertArithmeticOK(toSemantics);
1861 lostFraction = lfExactlyZero;
1862 newPartCount = partCountForBits(toSemantics.precision + 1);
1863 oldPartCount = partCount();
1864 shift = toSemantics.precision - fromSemantics.precision;
1866 bool X86SpecialNan = false;
1867 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1868 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1869 (!(*significandParts() & 0x8000000000000000ULL) ||
1870 !(*significandParts() & 0x4000000000000000ULL))) {
1871 // x86 has some unusual NaNs which cannot be represented in any other
1872 // format; note them here.
1873 X86SpecialNan = true;
1876 // If this is a truncation, perform the shift before we narrow the storage.
1877 if (shift < 0 && (category==fcNormal || category==fcNaN))
1878 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1880 // Fix the storage so it can hold to new value.
1881 if (newPartCount > oldPartCount) {
1882 // The new type requires more storage; make it available.
1883 integerPart *newParts;
1884 newParts = new integerPart[newPartCount];
1885 APInt::tcSet(newParts, 0, newPartCount);
1886 if (category==fcNormal || category==fcNaN)
1887 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1889 significand.parts = newParts;
1890 } else if (newPartCount == 1 && oldPartCount != 1) {
1891 // Switch to built-in storage for a single part.
1892 integerPart newPart = 0;
1893 if (category==fcNormal || category==fcNaN)
1894 newPart = significandParts()[0];
1896 significand.part = newPart;
1899 // Now that we have the right storage, switch the semantics.
1900 semantics = &toSemantics;
1902 // If this is an extension, perform the shift now that the storage is
1904 if (shift > 0 && (category==fcNormal || category==fcNaN))
1905 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1907 if (category == fcNormal) {
1908 fs = normalize(rounding_mode, lostFraction);
1909 *losesInfo = (fs != opOK);
1910 } else if (category == fcNaN) {
1911 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1912 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1913 // does not give you back the same bits. This is dubious, and we
1914 // don't currently do it. You're really supposed to get
1915 // an invalid operation signal at runtime, but nobody does that.
1925 /* Convert a floating point number to an integer according to the
1926 rounding mode. If the rounded integer value is out of range this
1927 returns an invalid operation exception and the contents of the
1928 destination parts are unspecified. If the rounded value is in
1929 range but the floating point number is not the exact integer, the C
1930 standard doesn't require an inexact exception to be raised. IEEE
1931 854 does require it so we do that.
1933 Note that for conversions to integer type the C standard requires
1934 round-to-zero to always be used. */
1936 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1938 roundingMode rounding_mode,
1939 bool *isExact) const
1941 lostFraction lost_fraction;
1942 const integerPart *src;
1943 unsigned int dstPartsCount, truncatedBits;
1945 assertArithmeticOK(*semantics);
1949 /* Handle the three special cases first. */
1950 if (category == fcInfinity || category == fcNaN)
1953 dstPartsCount = partCountForBits(width);
1955 if (category == fcZero) {
1956 APInt::tcSet(parts, 0, dstPartsCount);
1957 // Negative zero can't be represented as an int.
1962 src = significandParts();
1964 /* Step 1: place our absolute value, with any fraction truncated, in
1967 /* Our absolute value is less than one; truncate everything. */
1968 APInt::tcSet(parts, 0, dstPartsCount);
1969 /* For exponent -1 the integer bit represents .5, look at that.
1970 For smaller exponents leftmost truncated bit is 0. */
1971 truncatedBits = semantics->precision -1U - exponent;
1973 /* We want the most significant (exponent + 1) bits; the rest are
1975 unsigned int bits = exponent + 1U;
1977 /* Hopelessly large in magnitude? */
1981 if (bits < semantics->precision) {
1982 /* We truncate (semantics->precision - bits) bits. */
1983 truncatedBits = semantics->precision - bits;
1984 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1986 /* We want at least as many bits as are available. */
1987 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1988 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1993 /* Step 2: work out any lost fraction, and increment the absolute
1994 value if we would round away from zero. */
1995 if (truncatedBits) {
1996 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1998 if (lost_fraction != lfExactlyZero &&
1999 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2000 if (APInt::tcIncrement(parts, dstPartsCount))
2001 return opInvalidOp; /* Overflow. */
2004 lost_fraction = lfExactlyZero;
2007 /* Step 3: check if we fit in the destination. */
2008 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2012 /* Negative numbers cannot be represented as unsigned. */
2016 /* It takes omsb bits to represent the unsigned integer value.
2017 We lose a bit for the sign, but care is needed as the
2018 maximally negative integer is a special case. */
2019 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2022 /* This case can happen because of rounding. */
2027 APInt::tcNegate (parts, dstPartsCount);
2029 if (omsb >= width + !isSigned)
2033 if (lost_fraction == lfExactlyZero) {
2040 /* Same as convertToSignExtendedInteger, except we provide
2041 deterministic values in case of an invalid operation exception,
2042 namely zero for NaNs and the minimal or maximal value respectively
2043 for underflow or overflow.
2044 The *isExact output tells whether the result is exact, in the sense
2045 that converting it back to the original floating point type produces
2046 the original value. This is almost equivalent to result==opOK,
2047 except for negative zeroes.
2050 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2052 roundingMode rounding_mode, bool *isExact) const
2056 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2059 if (fs == opInvalidOp) {
2060 unsigned int bits, dstPartsCount;
2062 dstPartsCount = partCountForBits(width);
2064 if (category == fcNaN)
2069 bits = width - isSigned;
2071 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2072 if (sign && isSigned)
2073 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2079 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2080 an APSInt, whose initial bit-width and signed-ness are used to determine the
2081 precision of the conversion.
2084 APFloat::convertToInteger(APSInt &result,
2085 roundingMode rounding_mode, bool *isExact) const
2087 unsigned bitWidth = result.getBitWidth();
2088 SmallVector<uint64_t, 4> parts(result.getNumWords());
2089 opStatus status = convertToInteger(
2090 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2091 // Keeps the original signed-ness.
2092 result = APInt(bitWidth, parts);
2096 /* Convert an unsigned integer SRC to a floating point number,
2097 rounding according to ROUNDING_MODE. The sign of the floating
2098 point number is not modified. */
2100 APFloat::convertFromUnsignedParts(const integerPart *src,
2101 unsigned int srcCount,
2102 roundingMode rounding_mode)
2104 unsigned int omsb, precision, dstCount;
2106 lostFraction lost_fraction;
2108 assertArithmeticOK(*semantics);
2109 category = fcNormal;
2110 omsb = APInt::tcMSB(src, srcCount) + 1;
2111 dst = significandParts();
2112 dstCount = partCount();
2113 precision = semantics->precision;
2115 /* We want the most significant PRECISION bits of SRC. There may not
2116 be that many; extract what we can. */
2117 if (precision <= omsb) {
2118 exponent = omsb - 1;
2119 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2121 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2123 exponent = precision - 1;
2124 lost_fraction = lfExactlyZero;
2125 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2128 return normalize(rounding_mode, lost_fraction);
2132 APFloat::convertFromAPInt(const APInt &Val,
2134 roundingMode rounding_mode)
2136 unsigned int partCount = Val.getNumWords();
2140 if (isSigned && api.isNegative()) {
2145 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2148 /* Convert a two's complement integer SRC to a floating point number,
2149 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2150 integer is signed, in which case it must be sign-extended. */
2152 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2153 unsigned int srcCount,
2155 roundingMode rounding_mode)
2159 assertArithmeticOK(*semantics);
2161 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2164 /* If we're signed and negative negate a copy. */
2166 copy = new integerPart[srcCount];
2167 APInt::tcAssign(copy, src, srcCount);
2168 APInt::tcNegate(copy, srcCount);
2169 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2173 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2179 /* FIXME: should this just take a const APInt reference? */
2181 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2182 unsigned int width, bool isSigned,
2183 roundingMode rounding_mode)
2185 unsigned int partCount = partCountForBits(width);
2186 APInt api = APInt(width, makeArrayRef(parts, partCount));
2189 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2194 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2198 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2200 lostFraction lost_fraction = lfExactlyZero;
2201 integerPart *significand;
2202 unsigned int bitPos, partsCount;
2203 StringRef::iterator dot, firstSignificantDigit;
2207 category = fcNormal;
2209 significand = significandParts();
2210 partsCount = partCount();
2211 bitPos = partsCount * integerPartWidth;
2213 /* Skip leading zeroes and any (hexa)decimal point. */
2214 StringRef::iterator begin = s.begin();
2215 StringRef::iterator end = s.end();
2216 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2217 firstSignificantDigit = p;
2220 integerPart hex_value;
2223 assert(dot == end && "String contains multiple dots");
2230 hex_value = hexDigitValue(*p);
2231 if (hex_value == -1U) {
2240 /* Store the number whilst 4-bit nibbles remain. */
2243 hex_value <<= bitPos % integerPartWidth;
2244 significand[bitPos / integerPartWidth] |= hex_value;
2246 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2247 while (p != end && hexDigitValue(*p) != -1U)
2254 /* Hex floats require an exponent but not a hexadecimal point. */
2255 assert(p != end && "Hex strings require an exponent");
2256 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2257 assert(p != begin && "Significand has no digits");
2258 assert((dot == end || p - begin != 1) && "Significand has no digits");
2260 /* Ignore the exponent if we are zero. */
2261 if (p != firstSignificantDigit) {
2264 /* Implicit hexadecimal point? */
2268 /* Calculate the exponent adjustment implicit in the number of
2269 significant digits. */
2270 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2271 if (expAdjustment < 0)
2273 expAdjustment = expAdjustment * 4 - 1;
2275 /* Adjust for writing the significand starting at the most
2276 significant nibble. */
2277 expAdjustment += semantics->precision;
2278 expAdjustment -= partsCount * integerPartWidth;
2280 /* Adjust for the given exponent. */
2281 exponent = totalExponent(p + 1, end, expAdjustment);
2284 return normalize(rounding_mode, lost_fraction);
2288 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2289 unsigned sigPartCount, int exp,
2290 roundingMode rounding_mode)
2292 unsigned int parts, pow5PartCount;
2293 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2294 integerPart pow5Parts[maxPowerOfFiveParts];
2297 isNearest = (rounding_mode == rmNearestTiesToEven ||
2298 rounding_mode == rmNearestTiesToAway);
2300 parts = partCountForBits(semantics->precision + 11);
2302 /* Calculate pow(5, abs(exp)). */
2303 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2305 for (;; parts *= 2) {
2306 opStatus sigStatus, powStatus;
2307 unsigned int excessPrecision, truncatedBits;
2309 calcSemantics.precision = parts * integerPartWidth - 1;
2310 excessPrecision = calcSemantics.precision - semantics->precision;
2311 truncatedBits = excessPrecision;
2313 APFloat decSig(calcSemantics, fcZero, sign);
2314 APFloat pow5(calcSemantics, fcZero, false);
2316 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2317 rmNearestTiesToEven);
2318 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2319 rmNearestTiesToEven);
2320 /* Add exp, as 10^n = 5^n * 2^n. */
2321 decSig.exponent += exp;
2323 lostFraction calcLostFraction;
2324 integerPart HUerr, HUdistance;
2325 unsigned int powHUerr;
2328 /* multiplySignificand leaves the precision-th bit set to 1. */
2329 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2330 powHUerr = powStatus != opOK;
2332 calcLostFraction = decSig.divideSignificand(pow5);
2333 /* Denormal numbers have less precision. */
2334 if (decSig.exponent < semantics->minExponent) {
2335 excessPrecision += (semantics->minExponent - decSig.exponent);
2336 truncatedBits = excessPrecision;
2337 if (excessPrecision > calcSemantics.precision)
2338 excessPrecision = calcSemantics.precision;
2340 /* Extra half-ulp lost in reciprocal of exponent. */
2341 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2344 /* Both multiplySignificand and divideSignificand return the
2345 result with the integer bit set. */
2346 assert(APInt::tcExtractBit
2347 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2349 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2351 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2352 excessPrecision, isNearest);
2354 /* Are we guaranteed to round correctly if we truncate? */
2355 if (HUdistance >= HUerr) {
2356 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2357 calcSemantics.precision - excessPrecision,
2359 /* Take the exponent of decSig. If we tcExtract-ed less bits
2360 above we must adjust our exponent to compensate for the
2361 implicit right shift. */
2362 exponent = (decSig.exponent + semantics->precision
2363 - (calcSemantics.precision - excessPrecision));
2364 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2367 return normalize(rounding_mode, calcLostFraction);
2373 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2378 /* Scan the text. */
2379 StringRef::iterator p = str.begin();
2380 interpretDecimal(p, str.end(), &D);
2382 /* Handle the quick cases. First the case of no significant digits,
2383 i.e. zero, and then exponents that are obviously too large or too
2384 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2385 definitely overflows if
2387 (exp - 1) * L >= maxExponent
2389 and definitely underflows to zero where
2391 (exp + 1) * L <= minExponent - precision
2393 With integer arithmetic the tightest bounds for L are
2395 93/28 < L < 196/59 [ numerator <= 256 ]
2396 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2399 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2403 /* Check whether the normalized exponent is high enough to overflow
2404 max during the log-rebasing in the max-exponent check below. */
2405 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2406 fs = handleOverflow(rounding_mode);
2408 /* If it wasn't, then it also wasn't high enough to overflow max
2409 during the log-rebasing in the min-exponent check. Check that it
2410 won't overflow min in either check, then perform the min-exponent
2412 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2413 (D.normalizedExponent + 1) * 28738 <=
2414 8651 * (semantics->minExponent - (int) semantics->precision)) {
2415 /* Underflow to zero and round. */
2417 fs = normalize(rounding_mode, lfLessThanHalf);
2419 /* We can finally safely perform the max-exponent check. */
2420 } else if ((D.normalizedExponent - 1) * 42039
2421 >= 12655 * semantics->maxExponent) {
2422 /* Overflow and round. */
2423 fs = handleOverflow(rounding_mode);
2425 integerPart *decSignificand;
2426 unsigned int partCount;
2428 /* A tight upper bound on number of bits required to hold an
2429 N-digit decimal integer is N * 196 / 59. Allocate enough space
2430 to hold the full significand, and an extra part required by
2432 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2433 partCount = partCountForBits(1 + 196 * partCount / 59);
2434 decSignificand = new integerPart[partCount + 1];
2437 /* Convert to binary efficiently - we do almost all multiplication
2438 in an integerPart. When this would overflow do we do a single
2439 bignum multiplication, and then revert again to multiplication
2440 in an integerPart. */
2442 integerPart decValue, val, multiplier;
2450 if (p == str.end()) {
2454 decValue = decDigitValue(*p++);
2455 assert(decValue < 10U && "Invalid character in significand");
2457 val = val * 10 + decValue;
2458 /* The maximum number that can be multiplied by ten with any
2459 digit added without overflowing an integerPart. */
2460 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2462 /* Multiply out the current part. */
2463 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2464 partCount, partCount + 1, false);
2466 /* If we used another part (likely but not guaranteed), increase
2468 if (decSignificand[partCount])
2470 } while (p <= D.lastSigDigit);
2472 category = fcNormal;
2473 fs = roundSignificandWithExponent(decSignificand, partCount,
2474 D.exponent, rounding_mode);
2476 delete [] decSignificand;
2483 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2485 assertArithmeticOK(*semantics);
2486 assert(!str.empty() && "Invalid string length");
2488 /* Handle a leading minus sign. */
2489 StringRef::iterator p = str.begin();
2490 size_t slen = str.size();
2491 sign = *p == '-' ? 1 : 0;
2492 if (*p == '-' || *p == '+') {
2495 assert(slen && "String has no digits");
2498 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2499 assert(slen - 2 && "Invalid string");
2500 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2504 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2507 /* Write out a hexadecimal representation of the floating point value
2508 to DST, which must be of sufficient size, in the C99 form
2509 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2510 excluding the terminating NUL.
2512 If UPPERCASE, the output is in upper case, otherwise in lower case.
2514 HEXDIGITS digits appear altogether, rounding the value if
2515 necessary. If HEXDIGITS is 0, the minimal precision to display the
2516 number precisely is used instead. If nothing would appear after
2517 the decimal point it is suppressed.
2519 The decimal exponent is always printed and has at least one digit.
2520 Zero values display an exponent of zero. Infinities and NaNs
2521 appear as "infinity" or "nan" respectively.
2523 The above rules are as specified by C99. There is ambiguity about
2524 what the leading hexadecimal digit should be. This implementation
2525 uses whatever is necessary so that the exponent is displayed as
2526 stored. This implies the exponent will fall within the IEEE format
2527 range, and the leading hexadecimal digit will be 0 (for denormals),
2528 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2529 any other digits zero).
2532 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2533 bool upperCase, roundingMode rounding_mode) const
2537 assertArithmeticOK(*semantics);
2545 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2546 dst += sizeof infinityL - 1;
2550 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2551 dst += sizeof NaNU - 1;
2556 *dst++ = upperCase ? 'X': 'x';
2558 if (hexDigits > 1) {
2560 memset (dst, '0', hexDigits - 1);
2561 dst += hexDigits - 1;
2563 *dst++ = upperCase ? 'P': 'p';
2568 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2574 return static_cast<unsigned int>(dst - p);
2577 /* Does the hard work of outputting the correctly rounded hexadecimal
2578 form of a normal floating point number with the specified number of
2579 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2580 digits necessary to print the value precisely is output. */
2582 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2584 roundingMode rounding_mode) const
2586 unsigned int count, valueBits, shift, partsCount, outputDigits;
2587 const char *hexDigitChars;
2588 const integerPart *significand;
2593 *dst++ = upperCase ? 'X': 'x';
2596 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2598 significand = significandParts();
2599 partsCount = partCount();
2601 /* +3 because the first digit only uses the single integer bit, so
2602 we have 3 virtual zero most-significant-bits. */
2603 valueBits = semantics->precision + 3;
2604 shift = integerPartWidth - valueBits % integerPartWidth;
2606 /* The natural number of digits required ignoring trailing
2607 insignificant zeroes. */
2608 outputDigits = (valueBits - significandLSB () + 3) / 4;
2610 /* hexDigits of zero means use the required number for the
2611 precision. Otherwise, see if we are truncating. If we are,
2612 find out if we need to round away from zero. */
2614 if (hexDigits < outputDigits) {
2615 /* We are dropping non-zero bits, so need to check how to round.
2616 "bits" is the number of dropped bits. */
2618 lostFraction fraction;
2620 bits = valueBits - hexDigits * 4;
2621 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2622 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2624 outputDigits = hexDigits;
2627 /* Write the digits consecutively, and start writing in the location
2628 of the hexadecimal point. We move the most significant digit
2629 left and add the hexadecimal point later. */
2632 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2634 while (outputDigits && count) {
2637 /* Put the most significant integerPartWidth bits in "part". */
2638 if (--count == partsCount)
2639 part = 0; /* An imaginary higher zero part. */
2641 part = significand[count] << shift;
2644 part |= significand[count - 1] >> (integerPartWidth - shift);
2646 /* Convert as much of "part" to hexdigits as we can. */
2647 unsigned int curDigits = integerPartWidth / 4;
2649 if (curDigits > outputDigits)
2650 curDigits = outputDigits;
2651 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2652 outputDigits -= curDigits;
2658 /* Note that hexDigitChars has a trailing '0'. */
2661 *q = hexDigitChars[hexDigitValue (*q) + 1];
2662 } while (*q == '0');
2665 /* Add trailing zeroes. */
2666 memset (dst, '0', outputDigits);
2667 dst += outputDigits;
2670 /* Move the most significant digit to before the point, and if there
2671 is something after the decimal point add it. This must come
2672 after rounding above. */
2679 /* Finally output the exponent. */
2680 *dst++ = upperCase ? 'P': 'p';
2682 return writeSignedDecimal (dst, exponent);
2685 hash_code llvm::hash_value(const APFloat &Arg) {
2686 if (Arg.category != APFloat::fcNormal)
2687 return hash_combine((uint8_t)Arg.category,
2688 // NaN has no sign, fix it at zero.
2689 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2690 Arg.semantics->precision);
2692 // Normal floats need their exponent and significand hashed.
2693 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2694 Arg.semantics->precision, Arg.exponent,
2696 Arg.significandParts(),
2697 Arg.significandParts() + Arg.partCount()));
2700 // Conversion from APFloat to/from host float/double. It may eventually be
2701 // possible to eliminate these and have everybody deal with APFloats, but that
2702 // will take a while. This approach will not easily extend to long double.
2703 // Current implementation requires integerPartWidth==64, which is correct at
2704 // the moment but could be made more general.
2706 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2707 // the actual IEEE respresentations. We compensate for that here.
2710 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2712 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2713 assert(partCount()==2);
2715 uint64_t myexponent, mysignificand;
2717 if (category==fcNormal) {
2718 myexponent = exponent+16383; //bias
2719 mysignificand = significandParts()[0];
2720 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2721 myexponent = 0; // denormal
2722 } else if (category==fcZero) {
2725 } else if (category==fcInfinity) {
2726 myexponent = 0x7fff;
2727 mysignificand = 0x8000000000000000ULL;
2729 assert(category == fcNaN && "Unknown category");
2730 myexponent = 0x7fff;
2731 mysignificand = significandParts()[0];
2735 words[0] = mysignificand;
2736 words[1] = ((uint64_t)(sign & 1) << 15) |
2737 (myexponent & 0x7fffLL);
2738 return APInt(80, words);
2742 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2744 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2745 assert(partCount()==2);
2747 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2749 if (category==fcNormal) {
2750 myexponent = exponent + 1023; //bias
2751 myexponent2 = exponent2 + 1023;
2752 mysignificand = significandParts()[0];
2753 mysignificand2 = significandParts()[1];
2754 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2755 myexponent = 0; // denormal
2756 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2757 myexponent2 = 0; // denormal
2758 } else if (category==fcZero) {
2763 } else if (category==fcInfinity) {
2769 assert(category == fcNaN && "Unknown category");
2771 mysignificand = significandParts()[0];
2772 myexponent2 = exponent2;
2773 mysignificand2 = significandParts()[1];
2777 words[0] = ((uint64_t)(sign & 1) << 63) |
2778 ((myexponent & 0x7ff) << 52) |
2779 (mysignificand & 0xfffffffffffffLL);
2780 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2781 ((myexponent2 & 0x7ff) << 52) |
2782 (mysignificand2 & 0xfffffffffffffLL);
2783 return APInt(128, words);
2787 APFloat::convertQuadrupleAPFloatToAPInt() const
2789 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2790 assert(partCount()==2);
2792 uint64_t myexponent, mysignificand, mysignificand2;
2794 if (category==fcNormal) {
2795 myexponent = exponent+16383; //bias
2796 mysignificand = significandParts()[0];
2797 mysignificand2 = significandParts()[1];
2798 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2799 myexponent = 0; // denormal
2800 } else if (category==fcZero) {
2802 mysignificand = mysignificand2 = 0;
2803 } else if (category==fcInfinity) {
2804 myexponent = 0x7fff;
2805 mysignificand = mysignificand2 = 0;
2807 assert(category == fcNaN && "Unknown category!");
2808 myexponent = 0x7fff;
2809 mysignificand = significandParts()[0];
2810 mysignificand2 = significandParts()[1];
2814 words[0] = mysignificand;
2815 words[1] = ((uint64_t)(sign & 1) << 63) |
2816 ((myexponent & 0x7fff) << 48) |
2817 (mysignificand2 & 0xffffffffffffLL);
2819 return APInt(128, words);
2823 APFloat::convertDoubleAPFloatToAPInt() const
2825 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2826 assert(partCount()==1);
2828 uint64_t myexponent, mysignificand;
2830 if (category==fcNormal) {
2831 myexponent = exponent+1023; //bias
2832 mysignificand = *significandParts();
2833 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2834 myexponent = 0; // denormal
2835 } else if (category==fcZero) {
2838 } else if (category==fcInfinity) {
2842 assert(category == fcNaN && "Unknown category!");
2844 mysignificand = *significandParts();
2847 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2848 ((myexponent & 0x7ff) << 52) |
2849 (mysignificand & 0xfffffffffffffLL))));
2853 APFloat::convertFloatAPFloatToAPInt() const
2855 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2856 assert(partCount()==1);
2858 uint32_t myexponent, mysignificand;
2860 if (category==fcNormal) {
2861 myexponent = exponent+127; //bias
2862 mysignificand = (uint32_t)*significandParts();
2863 if (myexponent == 1 && !(mysignificand & 0x800000))
2864 myexponent = 0; // denormal
2865 } else if (category==fcZero) {
2868 } else if (category==fcInfinity) {
2872 assert(category == fcNaN && "Unknown category!");
2874 mysignificand = (uint32_t)*significandParts();
2877 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2878 (mysignificand & 0x7fffff)));
2882 APFloat::convertHalfAPFloatToAPInt() const
2884 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2885 assert(partCount()==1);
2887 uint32_t myexponent, mysignificand;
2889 if (category==fcNormal) {
2890 myexponent = exponent+15; //bias
2891 mysignificand = (uint32_t)*significandParts();
2892 if (myexponent == 1 && !(mysignificand & 0x400))
2893 myexponent = 0; // denormal
2894 } else if (category==fcZero) {
2897 } else if (category==fcInfinity) {
2901 assert(category == fcNaN && "Unknown category!");
2903 mysignificand = (uint32_t)*significandParts();
2906 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2907 (mysignificand & 0x3ff)));
2910 // This function creates an APInt that is just a bit map of the floating
2911 // point constant as it would appear in memory. It is not a conversion,
2912 // and treating the result as a normal integer is unlikely to be useful.
2915 APFloat::bitcastToAPInt() const
2917 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2918 return convertHalfAPFloatToAPInt();
2920 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2921 return convertFloatAPFloatToAPInt();
2923 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2924 return convertDoubleAPFloatToAPInt();
2926 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2927 return convertQuadrupleAPFloatToAPInt();
2929 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2930 return convertPPCDoubleDoubleAPFloatToAPInt();
2932 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2934 return convertF80LongDoubleAPFloatToAPInt();
2938 APFloat::convertToFloat() const
2940 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2941 "Float semantics are not IEEEsingle");
2942 APInt api = bitcastToAPInt();
2943 return api.bitsToFloat();
2947 APFloat::convertToDouble() const
2949 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2950 "Float semantics are not IEEEdouble");
2951 APInt api = bitcastToAPInt();
2952 return api.bitsToDouble();
2955 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2956 /// does not support these bit patterns:
2957 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2958 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2959 /// exponent = 0, integer bit 1 ("pseudodenormal")
2960 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2961 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2963 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2965 assert(api.getBitWidth()==80);
2966 uint64_t i1 = api.getRawData()[0];
2967 uint64_t i2 = api.getRawData()[1];
2968 uint64_t myexponent = (i2 & 0x7fff);
2969 uint64_t mysignificand = i1;
2971 initialize(&APFloat::x87DoubleExtended);
2972 assert(partCount()==2);
2974 sign = static_cast<unsigned int>(i2>>15);
2975 if (myexponent==0 && mysignificand==0) {
2976 // exponent, significand meaningless
2978 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2979 // exponent, significand meaningless
2980 category = fcInfinity;
2981 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2982 // exponent meaningless
2984 significandParts()[0] = mysignificand;
2985 significandParts()[1] = 0;
2987 category = fcNormal;
2988 exponent = myexponent - 16383;
2989 significandParts()[0] = mysignificand;
2990 significandParts()[1] = 0;
2991 if (myexponent==0) // denormal
2997 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2999 assert(api.getBitWidth()==128);
3000 uint64_t i1 = api.getRawData()[0];
3001 uint64_t i2 = api.getRawData()[1];
3002 uint64_t myexponent = (i1 >> 52) & 0x7ff;
3003 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
3004 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
3005 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
3007 initialize(&APFloat::PPCDoubleDouble);
3008 assert(partCount()==2);
3010 sign = static_cast<unsigned int>(i1>>63);
3011 sign2 = static_cast<unsigned int>(i2>>63);
3012 if (myexponent==0 && mysignificand==0) {
3013 // exponent, significand meaningless
3014 // exponent2 and significand2 are required to be 0; we don't check
3016 } else if (myexponent==0x7ff && mysignificand==0) {
3017 // exponent, significand meaningless
3018 // exponent2 and significand2 are required to be 0; we don't check
3019 category = fcInfinity;
3020 } else if (myexponent==0x7ff && mysignificand!=0) {
3021 // exponent meaningless. So is the whole second word, but keep it
3024 exponent2 = myexponent2;
3025 significandParts()[0] = mysignificand;
3026 significandParts()[1] = mysignificand2;
3028 category = fcNormal;
3029 // Note there is no category2; the second word is treated as if it is
3030 // fcNormal, although it might be something else considered by itself.
3031 exponent = myexponent - 1023;
3032 exponent2 = myexponent2 - 1023;
3033 significandParts()[0] = mysignificand;
3034 significandParts()[1] = mysignificand2;
3035 if (myexponent==0) // denormal
3038 significandParts()[0] |= 0x10000000000000LL; // integer bit
3042 significandParts()[1] |= 0x10000000000000LL; // integer bit
3047 APFloat::initFromQuadrupleAPInt(const APInt &api)
3049 assert(api.getBitWidth()==128);
3050 uint64_t i1 = api.getRawData()[0];
3051 uint64_t i2 = api.getRawData()[1];
3052 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3053 uint64_t mysignificand = i1;
3054 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3056 initialize(&APFloat::IEEEquad);
3057 assert(partCount()==2);
3059 sign = static_cast<unsigned int>(i2>>63);
3060 if (myexponent==0 &&
3061 (mysignificand==0 && mysignificand2==0)) {
3062 // exponent, significand meaningless
3064 } else if (myexponent==0x7fff &&
3065 (mysignificand==0 && mysignificand2==0)) {
3066 // exponent, significand meaningless
3067 category = fcInfinity;
3068 } else if (myexponent==0x7fff &&
3069 (mysignificand!=0 || mysignificand2 !=0)) {
3070 // exponent meaningless
3072 significandParts()[0] = mysignificand;
3073 significandParts()[1] = mysignificand2;
3075 category = fcNormal;
3076 exponent = myexponent - 16383;
3077 significandParts()[0] = mysignificand;
3078 significandParts()[1] = mysignificand2;
3079 if (myexponent==0) // denormal
3082 significandParts()[1] |= 0x1000000000000LL; // integer bit
3087 APFloat::initFromDoubleAPInt(const APInt &api)
3089 assert(api.getBitWidth()==64);
3090 uint64_t i = *api.getRawData();
3091 uint64_t myexponent = (i >> 52) & 0x7ff;
3092 uint64_t mysignificand = i & 0xfffffffffffffLL;
3094 initialize(&APFloat::IEEEdouble);
3095 assert(partCount()==1);
3097 sign = static_cast<unsigned int>(i>>63);
3098 if (myexponent==0 && mysignificand==0) {
3099 // exponent, significand meaningless
3101 } else if (myexponent==0x7ff && mysignificand==0) {
3102 // exponent, significand meaningless
3103 category = fcInfinity;
3104 } else if (myexponent==0x7ff && mysignificand!=0) {
3105 // exponent meaningless
3107 *significandParts() = mysignificand;
3109 category = fcNormal;
3110 exponent = myexponent - 1023;
3111 *significandParts() = mysignificand;
3112 if (myexponent==0) // denormal
3115 *significandParts() |= 0x10000000000000LL; // integer bit
3120 APFloat::initFromFloatAPInt(const APInt & api)
3122 assert(api.getBitWidth()==32);
3123 uint32_t i = (uint32_t)*api.getRawData();
3124 uint32_t myexponent = (i >> 23) & 0xff;
3125 uint32_t mysignificand = i & 0x7fffff;
3127 initialize(&APFloat::IEEEsingle);
3128 assert(partCount()==1);
3131 if (myexponent==0 && mysignificand==0) {
3132 // exponent, significand meaningless
3134 } else if (myexponent==0xff && mysignificand==0) {
3135 // exponent, significand meaningless
3136 category = fcInfinity;
3137 } else if (myexponent==0xff && mysignificand!=0) {
3138 // sign, exponent, significand meaningless
3140 *significandParts() = mysignificand;
3142 category = fcNormal;
3143 exponent = myexponent - 127; //bias
3144 *significandParts() = mysignificand;
3145 if (myexponent==0) // denormal
3148 *significandParts() |= 0x800000; // integer bit
3153 APFloat::initFromHalfAPInt(const APInt & api)
3155 assert(api.getBitWidth()==16);
3156 uint32_t i = (uint32_t)*api.getRawData();
3157 uint32_t myexponent = (i >> 10) & 0x1f;
3158 uint32_t mysignificand = i & 0x3ff;
3160 initialize(&APFloat::IEEEhalf);
3161 assert(partCount()==1);
3164 if (myexponent==0 && mysignificand==0) {
3165 // exponent, significand meaningless
3167 } else if (myexponent==0x1f && mysignificand==0) {
3168 // exponent, significand meaningless
3169 category = fcInfinity;
3170 } else if (myexponent==0x1f && mysignificand!=0) {
3171 // sign, exponent, significand meaningless
3173 *significandParts() = mysignificand;
3175 category = fcNormal;
3176 exponent = myexponent - 15; //bias
3177 *significandParts() = mysignificand;
3178 if (myexponent==0) // denormal
3181 *significandParts() |= 0x400; // integer bit
3185 /// Treat api as containing the bits of a floating point number. Currently
3186 /// we infer the floating point type from the size of the APInt. The
3187 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3188 /// when the size is anything else).
3190 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3192 if (api.getBitWidth() == 16)
3193 return initFromHalfAPInt(api);
3194 else if (api.getBitWidth() == 32)
3195 return initFromFloatAPInt(api);
3196 else if (api.getBitWidth()==64)
3197 return initFromDoubleAPInt(api);
3198 else if (api.getBitWidth()==80)
3199 return initFromF80LongDoubleAPInt(api);
3200 else if (api.getBitWidth()==128)
3202 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3204 llvm_unreachable(0);
3208 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3210 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3213 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3214 APFloat Val(Sem, fcNormal, Negative);
3216 // We want (in interchange format):
3217 // sign = {Negative}
3219 // significand = 1..1
3221 Val.exponent = Sem.maxExponent; // unbiased
3223 // 1-initialize all bits....
3224 Val.zeroSignificand();
3225 integerPart *significand = Val.significandParts();
3226 unsigned N = partCountForBits(Sem.precision);
3227 for (unsigned i = 0; i != N; ++i)
3228 significand[i] = ~((integerPart) 0);
3230 // ...and then clear the top bits for internal consistency.
3231 if (Sem.precision % integerPartWidth != 0)
3233 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3238 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3239 APFloat Val(Sem, fcNormal, Negative);
3241 // We want (in interchange format):
3242 // sign = {Negative}
3244 // significand = 0..01
3246 Val.exponent = Sem.minExponent; // unbiased
3247 Val.zeroSignificand();
3248 Val.significandParts()[0] = 1;
3252 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3253 APFloat Val(Sem, fcNormal, Negative);
3255 // We want (in interchange format):
3256 // sign = {Negative}
3258 // significand = 10..0
3260 Val.exponent = Sem.minExponent;
3261 Val.zeroSignificand();
3262 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3263 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3268 APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
3269 initFromAPInt(api, isIEEE);
3272 APFloat::APFloat(float f) : exponent2(0), sign2(0) {
3273 initFromAPInt(APInt::floatToBits(f));
3276 APFloat::APFloat(double d) : exponent2(0), sign2(0) {
3277 initFromAPInt(APInt::doubleToBits(d));
3281 static void append(SmallVectorImpl<char> &Buffer,
3282 unsigned N, const char *Str) {
3283 unsigned Start = Buffer.size();
3284 Buffer.set_size(Start + N);
3285 memcpy(&Buffer[Start], Str, N);
3288 template <unsigned N>
3289 void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
3290 append(Buffer, N, Str);
3293 /// Removes data from the given significand until it is no more
3294 /// precise than is required for the desired precision.
3295 void AdjustToPrecision(APInt &significand,
3296 int &exp, unsigned FormatPrecision) {
3297 unsigned bits = significand.getActiveBits();
3299 // 196/59 is a very slight overestimate of lg_2(10).
3300 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3302 if (bits <= bitsRequired) return;
3304 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3305 if (!tensRemovable) return;
3307 exp += tensRemovable;
3309 APInt divisor(significand.getBitWidth(), 1);
3310 APInt powten(significand.getBitWidth(), 10);
3312 if (tensRemovable & 1)
3314 tensRemovable >>= 1;
3315 if (!tensRemovable) break;
3319 significand = significand.udiv(divisor);
3321 // Truncate the significand down to its active bit count, but
3322 // don't try to drop below 32.
3323 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3324 significand = significand.trunc(newPrecision);
3328 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3329 int &exp, unsigned FormatPrecision) {
3330 unsigned N = buffer.size();
3331 if (N <= FormatPrecision) return;
3333 // The most significant figures are the last ones in the buffer.
3334 unsigned FirstSignificant = N - FormatPrecision;
3337 // FIXME: this probably shouldn't use 'round half up'.
3339 // Rounding down is just a truncation, except we also want to drop
3340 // trailing zeros from the new result.
3341 if (buffer[FirstSignificant - 1] < '5') {
3342 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3345 exp += FirstSignificant;
3346 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3350 // Rounding up requires a decimal add-with-carry. If we continue
3351 // the carry, the newly-introduced zeros will just be truncated.
3352 for (unsigned I = FirstSignificant; I != N; ++I) {
3353 if (buffer[I] == '9') {
3361 // If we carried through, we have exactly one digit of precision.
3362 if (FirstSignificant == N) {
3363 exp += FirstSignificant;
3365 buffer.push_back('1');
3369 exp += FirstSignificant;
3370 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3374 void APFloat::toString(SmallVectorImpl<char> &Str,
3375 unsigned FormatPrecision,
3376 unsigned FormatMaxPadding) const {
3380 return append(Str, "-Inf");
3382 return append(Str, "+Inf");
3384 case fcNaN: return append(Str, "NaN");
3390 if (!FormatMaxPadding)
3391 append(Str, "0.0E+0");
3403 // Decompose the number into an APInt and an exponent.
3404 int exp = exponent - ((int) semantics->precision - 1);
3405 APInt significand(semantics->precision,
3406 makeArrayRef(significandParts(),
3407 partCountForBits(semantics->precision)));
3409 // Set FormatPrecision if zero. We want to do this before we
3410 // truncate trailing zeros, as those are part of the precision.
3411 if (!FormatPrecision) {
3412 // It's an interesting question whether to use the nominal
3413 // precision or the active precision here for denormals.
3415 // FormatPrecision = ceil(significandBits / lg_2(10))
3416 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3419 // Ignore trailing binary zeros.
3420 int trailingZeros = significand.countTrailingZeros();
3421 exp += trailingZeros;
3422 significand = significand.lshr(trailingZeros);
3424 // Change the exponent from 2^e to 10^e.
3427 } else if (exp > 0) {
3429 significand = significand.zext(semantics->precision + exp);
3430 significand <<= exp;
3432 } else { /* exp < 0 */
3435 // We transform this using the identity:
3436 // (N)(2^-e) == (N)(5^e)(10^-e)
3437 // This means we have to multiply N (the significand) by 5^e.
3438 // To avoid overflow, we have to operate on numbers large
3439 // enough to store N * 5^e:
3440 // log2(N * 5^e) == log2(N) + e * log2(5)
3441 // <= semantics->precision + e * 137 / 59
3442 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3444 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3446 // Multiply significand by 5^e.
3447 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3448 significand = significand.zext(precision);
3449 APInt five_to_the_i(precision, 5);
3451 if (texp & 1) significand *= five_to_the_i;
3455 five_to_the_i *= five_to_the_i;
3459 AdjustToPrecision(significand, exp, FormatPrecision);
3461 llvm::SmallVector<char, 256> buffer;
3464 unsigned precision = significand.getBitWidth();
3465 APInt ten(precision, 10);
3466 APInt digit(precision, 0);
3468 bool inTrail = true;
3469 while (significand != 0) {
3470 // digit <- significand % 10
3471 // significand <- significand / 10
3472 APInt::udivrem(significand, ten, significand, digit);
3474 unsigned d = digit.getZExtValue();
3476 // Drop trailing zeros.
3477 if (inTrail && !d) exp++;
3479 buffer.push_back((char) ('0' + d));
3484 assert(!buffer.empty() && "no characters in buffer!");
3486 // Drop down to FormatPrecision.
3487 // TODO: don't do more precise calculations above than are required.
3488 AdjustToPrecision(buffer, exp, FormatPrecision);
3490 unsigned NDigits = buffer.size();
3492 // Check whether we should use scientific notation.
3493 bool FormatScientific;
3494 if (!FormatMaxPadding)
3495 FormatScientific = true;
3500 // But we shouldn't make the number look more precise than it is.
3501 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3502 NDigits + (unsigned) exp > FormatPrecision);
3504 // Power of the most significant digit.
3505 int MSD = exp + (int) (NDigits - 1);
3508 FormatScientific = false;
3510 // 765e-5 == 0.00765
3512 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3517 // Scientific formatting is pretty straightforward.
3518 if (FormatScientific) {
3519 exp += (NDigits - 1);
3521 Str.push_back(buffer[NDigits-1]);
3526 for (unsigned I = 1; I != NDigits; ++I)
3527 Str.push_back(buffer[NDigits-1-I]);
3530 Str.push_back(exp >= 0 ? '+' : '-');
3531 if (exp < 0) exp = -exp;
3532 SmallVector<char, 6> expbuf;
3534 expbuf.push_back((char) ('0' + (exp % 10)));
3537 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3538 Str.push_back(expbuf[E-1-I]);
3542 // Non-scientific, positive exponents.
3544 for (unsigned I = 0; I != NDigits; ++I)
3545 Str.push_back(buffer[NDigits-1-I]);
3546 for (unsigned I = 0; I != (unsigned) exp; ++I)
3551 // Non-scientific, negative exponents.
3553 // The number of digits to the left of the decimal point.
3554 int NWholeDigits = exp + (int) NDigits;
3557 if (NWholeDigits > 0) {
3558 for (; I != (unsigned) NWholeDigits; ++I)
3559 Str.push_back(buffer[NDigits-I-1]);
3562 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3566 for (unsigned Z = 1; Z != NZeros; ++Z)
3570 for (; I != NDigits; ++I)
3571 Str.push_back(buffer[NDigits-I-1]);
3574 bool APFloat::getExactInverse(APFloat *inv) const {
3575 // We can only guarantee the existence of an exact inverse for IEEE floats.
3576 if (semantics != &IEEEhalf && semantics != &IEEEsingle &&
3577 semantics != &IEEEdouble && semantics != &IEEEquad)
3580 // Special floats and denormals have no exact inverse.
3581 if (category != fcNormal)
3584 // Check that the number is a power of two by making sure that only the
3585 // integer bit is set in the significand.
3586 if (significandLSB() != semantics->precision - 1)
3590 APFloat reciprocal(*semantics, 1ULL);
3591 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3594 // Avoid multiplication with a denormal, it is not safe on all platforms and
3595 // may be slower than a normal division.
3596 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3599 assert(reciprocal.category == fcNormal &&
3600 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);