1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/MathExtras.h"
24 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
26 /* Assumed in hexadecimal significand parsing, and conversion to
27 hexadecimal strings. */
28 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
29 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
33 /* Represents floating point arithmetic semantics. */
35 /* The largest E such that 2^E is representable; this matches the
36 definition of IEEE 754. */
37 exponent_t maxExponent;
39 /* The smallest E such that 2^E is a normalized number; this
40 matches the definition of IEEE 754. */
41 exponent_t minExponent;
43 /* Number of bits in the significand. This includes the integer
45 unsigned int precision;
47 /* True if arithmetic is supported. */
48 unsigned int arithmeticOK;
51 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
52 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
53 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
54 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
55 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
56 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
58 // The PowerPC format consists of two doubles. It does not map cleanly
59 // onto the usual format above. For now only storage of constants of
60 // this type is supported, no arithmetic.
61 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
63 /* A tight upper bound on number of parts required to hold the value
66 power * 815 / (351 * integerPartWidth) + 1
68 However, whilst the result may require only this many parts,
69 because we are multiplying two values to get it, the
70 multiplication may require an extra part with the excess part
71 being zero (consider the trivial case of 1 * 1, tcFullMultiply
72 requires two parts to hold the single-part result). So we add an
73 extra one to guarantee enough space whilst multiplying. */
74 const unsigned int maxExponent = 16383;
75 const unsigned int maxPrecision = 113;
76 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
77 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
78 / (351 * integerPartWidth));
81 /* A bunch of private, handy routines. */
83 static inline unsigned int
84 partCountForBits(unsigned int bits)
86 return ((bits) + integerPartWidth - 1) / integerPartWidth;
89 /* Returns 0U-9U. Return values >= 10U are not digits. */
90 static inline unsigned int
91 decDigitValue(unsigned int c)
97 hexDigitValue(unsigned int c)
117 assertArithmeticOK(const llvm::fltSemantics &semantics) {
118 assert(semantics.arithmeticOK
119 && "Compile-time arithmetic does not support these semantics");
122 /* Return the value of a decimal exponent of the form
125 If the exponent overflows, returns a large exponent with the
128 readExponent(StringRef::iterator begin, StringRef::iterator end)
131 unsigned int absExponent;
132 const unsigned int overlargeExponent = 24000; /* FIXME. */
133 StringRef::iterator p = begin;
135 assert(p != end && "Exponent has no digits");
137 isNegative = (*p == '-');
138 if (*p == '-' || *p == '+') {
140 assert(p != end && "Exponent has no digits");
143 absExponent = decDigitValue(*p++);
144 assert(absExponent < 10U && "Invalid character in exponent");
146 for (; p != end; ++p) {
149 value = decDigitValue(*p);
150 assert(value < 10U && "Invalid character in exponent");
152 value += absExponent * 10;
153 if (absExponent >= overlargeExponent) {
154 absExponent = overlargeExponent;
160 assert(p == end && "Invalid exponent in exponent");
163 return -(int) absExponent;
165 return (int) absExponent;
168 /* This is ugly and needs cleaning up, but I don't immediately see
169 how whilst remaining safe. */
171 totalExponent(StringRef::iterator p, StringRef::iterator end,
172 int exponentAdjustment)
174 int unsignedExponent;
175 bool negative, overflow;
178 assert(p != end && "Exponent has no digits");
180 negative = *p == '-';
181 if(*p == '-' || *p == '+') {
183 assert(p != end && "Exponent has no digits");
186 unsignedExponent = 0;
188 for(; p != end; ++p) {
191 value = decDigitValue(*p);
192 assert(value < 10U && "Invalid character in exponent");
194 unsignedExponent = unsignedExponent * 10 + value;
195 if(unsignedExponent > 65535)
199 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
203 exponent = unsignedExponent;
205 exponent = -exponent;
206 exponent += exponentAdjustment;
207 if(exponent > 65535 || exponent < -65536)
212 exponent = negative ? -65536: 65535;
217 static StringRef::iterator
218 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
219 StringRef::iterator *dot)
221 StringRef::iterator p = begin;
223 while(*p == '0' && p != end)
229 assert(end - begin != 1 && "Significand has no digits");
231 while(*p == '0' && p != end)
238 /* Given a normal decimal floating point number of the form
242 where the decimal point and exponent are optional, fill out the
243 structure D. Exponent is appropriate if the significand is
244 treated as an integer, and normalizedExponent if the significand
245 is taken to have the decimal point after a single leading
248 If the value is zero, V->firstSigDigit points to a non-digit, and
249 the return exponent is zero.
252 const char *firstSigDigit;
253 const char *lastSigDigit;
255 int normalizedExponent;
259 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
262 StringRef::iterator dot = end;
263 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
265 D->firstSigDigit = p;
267 D->normalizedExponent = 0;
269 for (; p != end; ++p) {
271 assert(dot == end && "String contains multiple dots");
276 if (decDigitValue(*p) >= 10U)
281 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
282 assert(p != begin && "Significand has no digits");
283 assert((dot == end || p - begin != 1) && "Significand has no digits");
285 /* p points to the first non-digit in the string */
286 D->exponent = readExponent(p + 1, end);
288 /* Implied decimal point? */
293 /* If number is all zeroes accept any exponent. */
294 if (p != D->firstSigDigit) {
295 /* Drop insignificant trailing zeroes. */
300 while (p != begin && *p == '0');
301 while (p != begin && *p == '.');
304 /* Adjust the exponents for any decimal point. */
305 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
306 D->normalizedExponent = (D->exponent +
307 static_cast<exponent_t>((p - D->firstSigDigit)
308 - (dot > D->firstSigDigit && dot < p)));
314 /* Return the trailing fraction of a hexadecimal number.
315 DIGITVALUE is the first hex digit of the fraction, P points to
318 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
319 unsigned int digitValue)
321 unsigned int hexDigit;
323 /* If the first trailing digit isn't 0 or 8 we can work out the
324 fraction immediately. */
326 return lfMoreThanHalf;
327 else if(digitValue < 8 && digitValue > 0)
328 return lfLessThanHalf;
330 /* Otherwise we need to find the first non-zero digit. */
334 assert(p != end && "Invalid trailing hexadecimal fraction!");
336 hexDigit = hexDigitValue(*p);
338 /* If we ran off the end it is exactly zero or one-half, otherwise
341 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
343 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
346 /* Return the fraction lost were a bignum truncated losing the least
347 significant BITS bits. */
349 lostFractionThroughTruncation(const integerPart *parts,
350 unsigned int partCount,
355 lsb = APInt::tcLSB(parts, partCount);
357 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
359 return lfExactlyZero;
361 return lfExactlyHalf;
362 if(bits <= partCount * integerPartWidth
363 && APInt::tcExtractBit(parts, bits - 1))
364 return lfMoreThanHalf;
366 return lfLessThanHalf;
369 /* Shift DST right BITS bits noting lost fraction. */
371 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
373 lostFraction lost_fraction;
375 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
377 APInt::tcShiftRight(dst, parts, bits);
379 return lost_fraction;
382 /* Combine the effect of two lost fractions. */
384 combineLostFractions(lostFraction moreSignificant,
385 lostFraction lessSignificant)
387 if(lessSignificant != lfExactlyZero) {
388 if(moreSignificant == lfExactlyZero)
389 moreSignificant = lfLessThanHalf;
390 else if(moreSignificant == lfExactlyHalf)
391 moreSignificant = lfMoreThanHalf;
394 return moreSignificant;
397 /* The error from the true value, in half-ulps, on multiplying two
398 floating point numbers, which differ from the value they
399 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
400 than the returned value.
402 See "How to Read Floating Point Numbers Accurately" by William D
405 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
407 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
409 if (HUerr1 + HUerr2 == 0)
410 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
412 return inexactMultiply + 2 * (HUerr1 + HUerr2);
415 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
416 when the least significant BITS are truncated. BITS cannot be
419 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
421 unsigned int count, partBits;
422 integerPart part, boundary;
427 count = bits / integerPartWidth;
428 partBits = bits % integerPartWidth + 1;
430 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
433 boundary = (integerPart) 1 << (partBits - 1);
438 if (part - boundary <= boundary - part)
439 return part - boundary;
441 return boundary - part;
444 if (part == boundary) {
447 return ~(integerPart) 0; /* A lot. */
450 } else if (part == boundary - 1) {
453 return ~(integerPart) 0; /* A lot. */
458 return ~(integerPart) 0; /* A lot. */
461 /* Place pow(5, power) in DST, and return the number of parts used.
462 DST must be at least one part larger than size of the answer. */
464 powerOf5(integerPart *dst, unsigned int power)
466 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
468 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
469 pow5s[0] = 78125 * 5;
471 unsigned int partsCount[16] = { 1 };
472 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
474 assert(power <= maxExponent);
479 *p1 = firstEightPowers[power & 7];
485 for (unsigned int n = 0; power; power >>= 1, n++) {
490 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
492 pc = partsCount[n - 1];
493 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
495 if (pow5[pc - 1] == 0)
503 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
505 if (p2[result - 1] == 0)
508 /* Now result is in p1 with partsCount parts and p2 is scratch
510 tmp = p1, p1 = p2, p2 = tmp;
517 APInt::tcAssign(dst, p1, result);
522 /* Zero at the end to avoid modular arithmetic when adding one; used
523 when rounding up during hexadecimal output. */
524 static const char hexDigitsLower[] = "0123456789abcdef0";
525 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
526 static const char infinityL[] = "infinity";
527 static const char infinityU[] = "INFINITY";
528 static const char NaNL[] = "nan";
529 static const char NaNU[] = "NAN";
531 /* Write out an integerPart in hexadecimal, starting with the most
532 significant nibble. Write out exactly COUNT hexdigits, return
535 partAsHex (char *dst, integerPart part, unsigned int count,
536 const char *hexDigitChars)
538 unsigned int result = count;
540 assert(count != 0 && count <= integerPartWidth / 4);
542 part >>= (integerPartWidth - 4 * count);
544 dst[count] = hexDigitChars[part & 0xf];
551 /* Write out an unsigned decimal integer. */
553 writeUnsignedDecimal (char *dst, unsigned int n)
569 /* Write out a signed decimal integer. */
571 writeSignedDecimal (char *dst, int value)
575 dst = writeUnsignedDecimal(dst, -(unsigned) value);
577 dst = writeUnsignedDecimal(dst, value);
584 APFloat::initialize(const fltSemantics *ourSemantics)
588 semantics = ourSemantics;
591 significand.parts = new integerPart[count];
595 APFloat::freeSignificand()
598 delete [] significand.parts;
602 APFloat::assign(const APFloat &rhs)
604 assert(semantics == rhs.semantics);
607 category = rhs.category;
608 exponent = rhs.exponent;
610 exponent2 = rhs.exponent2;
611 if(category == fcNormal || category == fcNaN)
612 copySignificand(rhs);
616 APFloat::copySignificand(const APFloat &rhs)
618 assert(category == fcNormal || category == fcNaN);
619 assert(rhs.partCount() >= partCount());
621 APInt::tcAssign(significandParts(), rhs.significandParts(),
625 /* Make this number a NaN, with an arbitrary but deterministic value
626 for the significand. If double or longer, this is a signalling NaN,
627 which may not be ideal. If float, this is QNaN(0). */
629 APFloat::makeNaN(unsigned type)
632 // FIXME: Add double and long double support for QNaN(0).
633 if (semantics->precision == 24 && semantics->maxExponent == 127) {
635 type &= ~0x80000000U;
638 APInt::tcSet(significandParts(), type, partCount());
642 APFloat::operator=(const APFloat &rhs)
645 if(semantics != rhs.semantics) {
647 initialize(rhs.semantics);
656 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
659 if (semantics != rhs.semantics ||
660 category != rhs.category ||
663 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
666 if (category==fcZero || category==fcInfinity)
668 else if (category==fcNormal && exponent!=rhs.exponent)
670 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
671 exponent2!=rhs.exponent2)
675 const integerPart* p=significandParts();
676 const integerPart* q=rhs.significandParts();
677 for (; i>0; i--, p++, q++) {
685 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
687 assertArithmeticOK(ourSemantics);
688 initialize(&ourSemantics);
691 exponent = ourSemantics.precision - 1;
692 significandParts()[0] = value;
693 normalize(rmNearestTiesToEven, lfExactlyZero);
696 APFloat::APFloat(const fltSemantics &ourSemantics) {
697 assertArithmeticOK(ourSemantics);
698 initialize(&ourSemantics);
704 APFloat::APFloat(const fltSemantics &ourSemantics,
705 fltCategory ourCategory, bool negative, unsigned type)
707 assertArithmeticOK(ourSemantics);
708 initialize(&ourSemantics);
709 category = ourCategory;
711 if (category == fcNormal)
713 else if (ourCategory == fcNaN)
717 APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
719 assertArithmeticOK(ourSemantics);
720 initialize(&ourSemantics);
721 convertFromString(text, rmNearestTiesToEven);
724 APFloat::APFloat(const APFloat &rhs)
726 initialize(rhs.semantics);
735 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
736 void APFloat::Profile(FoldingSetNodeID& ID) const {
737 ID.Add(bitcastToAPInt());
741 APFloat::partCount() const
743 return partCountForBits(semantics->precision + 1);
747 APFloat::semanticsPrecision(const fltSemantics &semantics)
749 return semantics.precision;
753 APFloat::significandParts() const
755 return const_cast<APFloat *>(this)->significandParts();
759 APFloat::significandParts()
761 assert(category == fcNormal || category == fcNaN);
764 return significand.parts;
766 return &significand.part;
770 APFloat::zeroSignificand()
773 APInt::tcSet(significandParts(), 0, partCount());
776 /* Increment an fcNormal floating point number's significand. */
778 APFloat::incrementSignificand()
782 carry = APInt::tcIncrement(significandParts(), partCount());
784 /* Our callers should never cause us to overflow. */
788 /* Add the significand of the RHS. Returns the carry flag. */
790 APFloat::addSignificand(const APFloat &rhs)
794 parts = significandParts();
796 assert(semantics == rhs.semantics);
797 assert(exponent == rhs.exponent);
799 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
802 /* Subtract the significand of the RHS with a borrow flag. Returns
805 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
809 parts = significandParts();
811 assert(semantics == rhs.semantics);
812 assert(exponent == rhs.exponent);
814 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
818 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
819 on to the full-precision result of the multiplication. Returns the
822 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
824 unsigned int omsb; // One, not zero, based MSB.
825 unsigned int partsCount, newPartsCount, precision;
826 integerPart *lhsSignificand;
827 integerPart scratch[4];
828 integerPart *fullSignificand;
829 lostFraction lost_fraction;
832 assert(semantics == rhs.semantics);
834 precision = semantics->precision;
835 newPartsCount = partCountForBits(precision * 2);
837 if(newPartsCount > 4)
838 fullSignificand = new integerPart[newPartsCount];
840 fullSignificand = scratch;
842 lhsSignificand = significandParts();
843 partsCount = partCount();
845 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
846 rhs.significandParts(), partsCount, partsCount);
848 lost_fraction = lfExactlyZero;
849 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
850 exponent += rhs.exponent;
853 Significand savedSignificand = significand;
854 const fltSemantics *savedSemantics = semantics;
855 fltSemantics extendedSemantics;
857 unsigned int extendedPrecision;
859 /* Normalize our MSB. */
860 extendedPrecision = precision + precision - 1;
861 if(omsb != extendedPrecision)
863 APInt::tcShiftLeft(fullSignificand, newPartsCount,
864 extendedPrecision - omsb);
865 exponent -= extendedPrecision - omsb;
868 /* Create new semantics. */
869 extendedSemantics = *semantics;
870 extendedSemantics.precision = extendedPrecision;
872 if(newPartsCount == 1)
873 significand.part = fullSignificand[0];
875 significand.parts = fullSignificand;
876 semantics = &extendedSemantics;
878 APFloat extendedAddend(*addend);
879 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
880 assert(status == opOK);
881 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
883 /* Restore our state. */
884 if(newPartsCount == 1)
885 fullSignificand[0] = significand.part;
886 significand = savedSignificand;
887 semantics = savedSemantics;
889 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
892 exponent -= (precision - 1);
894 if(omsb > precision) {
895 unsigned int bits, significantParts;
898 bits = omsb - precision;
899 significantParts = partCountForBits(omsb);
900 lf = shiftRight(fullSignificand, significantParts, bits);
901 lost_fraction = combineLostFractions(lf, lost_fraction);
905 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
907 if(newPartsCount > 4)
908 delete [] fullSignificand;
910 return lost_fraction;
913 /* Multiply the significands of LHS and RHS to DST. */
915 APFloat::divideSignificand(const APFloat &rhs)
917 unsigned int bit, i, partsCount;
918 const integerPart *rhsSignificand;
919 integerPart *lhsSignificand, *dividend, *divisor;
920 integerPart scratch[4];
921 lostFraction lost_fraction;
923 assert(semantics == rhs.semantics);
925 lhsSignificand = significandParts();
926 rhsSignificand = rhs.significandParts();
927 partsCount = partCount();
930 dividend = new integerPart[partsCount * 2];
934 divisor = dividend + partsCount;
936 /* Copy the dividend and divisor as they will be modified in-place. */
937 for(i = 0; i < partsCount; i++) {
938 dividend[i] = lhsSignificand[i];
939 divisor[i] = rhsSignificand[i];
940 lhsSignificand[i] = 0;
943 exponent -= rhs.exponent;
945 unsigned int precision = semantics->precision;
947 /* Normalize the divisor. */
948 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
951 APInt::tcShiftLeft(divisor, partsCount, bit);
954 /* Normalize the dividend. */
955 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
958 APInt::tcShiftLeft(dividend, partsCount, bit);
961 /* Ensure the dividend >= divisor initially for the loop below.
962 Incidentally, this means that the division loop below is
963 guaranteed to set the integer bit to one. */
964 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
966 APInt::tcShiftLeft(dividend, partsCount, 1);
967 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
971 for(bit = precision; bit; bit -= 1) {
972 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
973 APInt::tcSubtract(dividend, divisor, 0, partsCount);
974 APInt::tcSetBit(lhsSignificand, bit - 1);
977 APInt::tcShiftLeft(dividend, partsCount, 1);
980 /* Figure out the lost fraction. */
981 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
984 lost_fraction = lfMoreThanHalf;
986 lost_fraction = lfExactlyHalf;
987 else if(APInt::tcIsZero(dividend, partsCount))
988 lost_fraction = lfExactlyZero;
990 lost_fraction = lfLessThanHalf;
995 return lost_fraction;
999 APFloat::significandMSB() const
1001 return APInt::tcMSB(significandParts(), partCount());
1005 APFloat::significandLSB() const
1007 return APInt::tcLSB(significandParts(), partCount());
1010 /* Note that a zero result is NOT normalized to fcZero. */
1012 APFloat::shiftSignificandRight(unsigned int bits)
1014 /* Our exponent should not overflow. */
1015 assert((exponent_t) (exponent + bits) >= exponent);
1019 return shiftRight(significandParts(), partCount(), bits);
1022 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1024 APFloat::shiftSignificandLeft(unsigned int bits)
1026 assert(bits < semantics->precision);
1029 unsigned int partsCount = partCount();
1031 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1034 assert(!APInt::tcIsZero(significandParts(), partsCount));
1039 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1043 assert(semantics == rhs.semantics);
1044 assert(category == fcNormal);
1045 assert(rhs.category == fcNormal);
1047 compare = exponent - rhs.exponent;
1049 /* If exponents are equal, do an unsigned bignum comparison of the
1052 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1056 return cmpGreaterThan;
1057 else if(compare < 0)
1063 /* Handle overflow. Sign is preserved. We either become infinity or
1064 the largest finite number. */
1066 APFloat::handleOverflow(roundingMode rounding_mode)
1069 if(rounding_mode == rmNearestTiesToEven
1070 || rounding_mode == rmNearestTiesToAway
1071 || (rounding_mode == rmTowardPositive && !sign)
1072 || (rounding_mode == rmTowardNegative && sign))
1074 category = fcInfinity;
1075 return (opStatus) (opOverflow | opInexact);
1078 /* Otherwise we become the largest finite number. */
1079 category = fcNormal;
1080 exponent = semantics->maxExponent;
1081 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1082 semantics->precision);
1087 /* Returns TRUE if, when truncating the current number, with BIT the
1088 new LSB, with the given lost fraction and rounding mode, the result
1089 would need to be rounded away from zero (i.e., by increasing the
1090 signficand). This routine must work for fcZero of both signs, and
1091 fcNormal numbers. */
1093 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1094 lostFraction lost_fraction,
1095 unsigned int bit) const
1097 /* NaNs and infinities should not have lost fractions. */
1098 assert(category == fcNormal || category == fcZero);
1100 /* Current callers never pass this so we don't handle it. */
1101 assert(lost_fraction != lfExactlyZero);
1103 switch (rounding_mode) {
1105 llvm_unreachable(0);
1107 case rmNearestTiesToAway:
1108 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1110 case rmNearestTiesToEven:
1111 if(lost_fraction == lfMoreThanHalf)
1114 /* Our zeroes don't have a significand to test. */
1115 if(lost_fraction == lfExactlyHalf && category != fcZero)
1116 return APInt::tcExtractBit(significandParts(), bit);
1123 case rmTowardPositive:
1124 return sign == false;
1126 case rmTowardNegative:
1127 return sign == true;
1132 APFloat::normalize(roundingMode rounding_mode,
1133 lostFraction lost_fraction)
1135 unsigned int omsb; /* One, not zero, based MSB. */
1138 if(category != fcNormal)
1141 /* Before rounding normalize the exponent of fcNormal numbers. */
1142 omsb = significandMSB() + 1;
1145 /* OMSB is numbered from 1. We want to place it in the integer
1146 bit numbered PRECISON if possible, with a compensating change in
1148 exponentChange = omsb - semantics->precision;
1150 /* If the resulting exponent is too high, overflow according to
1151 the rounding mode. */
1152 if(exponent + exponentChange > semantics->maxExponent)
1153 return handleOverflow(rounding_mode);
1155 /* Subnormal numbers have exponent minExponent, and their MSB
1156 is forced based on that. */
1157 if(exponent + exponentChange < semantics->minExponent)
1158 exponentChange = semantics->minExponent - exponent;
1160 /* Shifting left is easy as we don't lose precision. */
1161 if(exponentChange < 0) {
1162 assert(lost_fraction == lfExactlyZero);
1164 shiftSignificandLeft(-exponentChange);
1169 if(exponentChange > 0) {
1172 /* Shift right and capture any new lost fraction. */
1173 lf = shiftSignificandRight(exponentChange);
1175 lost_fraction = combineLostFractions(lf, lost_fraction);
1177 /* Keep OMSB up-to-date. */
1178 if(omsb > (unsigned) exponentChange)
1179 omsb -= exponentChange;
1185 /* Now round the number according to rounding_mode given the lost
1188 /* As specified in IEEE 754, since we do not trap we do not report
1189 underflow for exact results. */
1190 if(lost_fraction == lfExactlyZero) {
1191 /* Canonicalize zeroes. */
1198 /* Increment the significand if we're rounding away from zero. */
1199 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1201 exponent = semantics->minExponent;
1203 incrementSignificand();
1204 omsb = significandMSB() + 1;
1206 /* Did the significand increment overflow? */
1207 if(omsb == (unsigned) semantics->precision + 1) {
1208 /* Renormalize by incrementing the exponent and shifting our
1209 significand right one. However if we already have the
1210 maximum exponent we overflow to infinity. */
1211 if(exponent == semantics->maxExponent) {
1212 category = fcInfinity;
1214 return (opStatus) (opOverflow | opInexact);
1217 shiftSignificandRight(1);
1223 /* The normal case - we were and are not denormal, and any
1224 significand increment above didn't overflow. */
1225 if(omsb == semantics->precision)
1228 /* We have a non-zero denormal. */
1229 assert(omsb < semantics->precision);
1231 /* Canonicalize zeroes. */
1235 /* The fcZero case is a denormal that underflowed to zero. */
1236 return (opStatus) (opUnderflow | opInexact);
1240 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1242 switch (convolve(category, rhs.category)) {
1244 llvm_unreachable(0);
1246 case convolve(fcNaN, fcZero):
1247 case convolve(fcNaN, fcNormal):
1248 case convolve(fcNaN, fcInfinity):
1249 case convolve(fcNaN, fcNaN):
1250 case convolve(fcNormal, fcZero):
1251 case convolve(fcInfinity, fcNormal):
1252 case convolve(fcInfinity, fcZero):
1255 case convolve(fcZero, fcNaN):
1256 case convolve(fcNormal, fcNaN):
1257 case convolve(fcInfinity, fcNaN):
1259 copySignificand(rhs);
1262 case convolve(fcNormal, fcInfinity):
1263 case convolve(fcZero, fcInfinity):
1264 category = fcInfinity;
1265 sign = rhs.sign ^ subtract;
1268 case convolve(fcZero, fcNormal):
1270 sign = rhs.sign ^ subtract;
1273 case convolve(fcZero, fcZero):
1274 /* Sign depends on rounding mode; handled by caller. */
1277 case convolve(fcInfinity, fcInfinity):
1278 /* Differently signed infinities can only be validly
1280 if(((sign ^ rhs.sign)!=0) != subtract) {
1287 case convolve(fcNormal, fcNormal):
1292 /* Add or subtract two normal numbers. */
1294 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1297 lostFraction lost_fraction;
1300 /* Determine if the operation on the absolute values is effectively
1301 an addition or subtraction. */
1302 subtract ^= (sign ^ rhs.sign) ? true : false;
1304 /* Are we bigger exponent-wise than the RHS? */
1305 bits = exponent - rhs.exponent;
1307 /* Subtraction is more subtle than one might naively expect. */
1309 APFloat temp_rhs(rhs);
1313 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1314 lost_fraction = lfExactlyZero;
1315 } else if (bits > 0) {
1316 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1317 shiftSignificandLeft(1);
1320 lost_fraction = shiftSignificandRight(-bits - 1);
1321 temp_rhs.shiftSignificandLeft(1);
1326 carry = temp_rhs.subtractSignificand
1327 (*this, lost_fraction != lfExactlyZero);
1328 copySignificand(temp_rhs);
1331 carry = subtractSignificand
1332 (temp_rhs, lost_fraction != lfExactlyZero);
1335 /* Invert the lost fraction - it was on the RHS and
1337 if(lost_fraction == lfLessThanHalf)
1338 lost_fraction = lfMoreThanHalf;
1339 else if(lost_fraction == lfMoreThanHalf)
1340 lost_fraction = lfLessThanHalf;
1342 /* The code above is intended to ensure that no borrow is
1347 APFloat temp_rhs(rhs);
1349 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1350 carry = addSignificand(temp_rhs);
1352 lost_fraction = shiftSignificandRight(-bits);
1353 carry = addSignificand(rhs);
1356 /* We have a guard bit; generating a carry cannot happen. */
1360 return lost_fraction;
1364 APFloat::multiplySpecials(const APFloat &rhs)
1366 switch (convolve(category, rhs.category)) {
1368 llvm_unreachable(0);
1370 case convolve(fcNaN, fcZero):
1371 case convolve(fcNaN, fcNormal):
1372 case convolve(fcNaN, fcInfinity):
1373 case convolve(fcNaN, fcNaN):
1376 case convolve(fcZero, fcNaN):
1377 case convolve(fcNormal, fcNaN):
1378 case convolve(fcInfinity, fcNaN):
1380 copySignificand(rhs);
1383 case convolve(fcNormal, fcInfinity):
1384 case convolve(fcInfinity, fcNormal):
1385 case convolve(fcInfinity, fcInfinity):
1386 category = fcInfinity;
1389 case convolve(fcZero, fcNormal):
1390 case convolve(fcNormal, fcZero):
1391 case convolve(fcZero, fcZero):
1395 case convolve(fcZero, fcInfinity):
1396 case convolve(fcInfinity, fcZero):
1400 case convolve(fcNormal, fcNormal):
1406 APFloat::divideSpecials(const APFloat &rhs)
1408 switch (convolve(category, rhs.category)) {
1410 llvm_unreachable(0);
1412 case convolve(fcNaN, fcZero):
1413 case convolve(fcNaN, fcNormal):
1414 case convolve(fcNaN, fcInfinity):
1415 case convolve(fcNaN, fcNaN):
1416 case convolve(fcInfinity, fcZero):
1417 case convolve(fcInfinity, fcNormal):
1418 case convolve(fcZero, fcInfinity):
1419 case convolve(fcZero, fcNormal):
1422 case convolve(fcZero, fcNaN):
1423 case convolve(fcNormal, fcNaN):
1424 case convolve(fcInfinity, fcNaN):
1426 copySignificand(rhs);
1429 case convolve(fcNormal, fcInfinity):
1433 case convolve(fcNormal, fcZero):
1434 category = fcInfinity;
1437 case convolve(fcInfinity, fcInfinity):
1438 case convolve(fcZero, fcZero):
1442 case convolve(fcNormal, fcNormal):
1448 APFloat::modSpecials(const APFloat &rhs)
1450 switch (convolve(category, rhs.category)) {
1452 llvm_unreachable(0);
1454 case convolve(fcNaN, fcZero):
1455 case convolve(fcNaN, fcNormal):
1456 case convolve(fcNaN, fcInfinity):
1457 case convolve(fcNaN, fcNaN):
1458 case convolve(fcZero, fcInfinity):
1459 case convolve(fcZero, fcNormal):
1460 case convolve(fcNormal, fcInfinity):
1463 case convolve(fcZero, fcNaN):
1464 case convolve(fcNormal, fcNaN):
1465 case convolve(fcInfinity, fcNaN):
1467 copySignificand(rhs);
1470 case convolve(fcNormal, fcZero):
1471 case convolve(fcInfinity, fcZero):
1472 case convolve(fcInfinity, fcNormal):
1473 case convolve(fcInfinity, fcInfinity):
1474 case convolve(fcZero, fcZero):
1478 case convolve(fcNormal, fcNormal):
1485 APFloat::changeSign()
1487 /* Look mummy, this one's easy. */
1492 APFloat::clearSign()
1494 /* So is this one. */
1499 APFloat::copySign(const APFloat &rhs)
1505 /* Normalized addition or subtraction. */
1507 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1512 assertArithmeticOK(*semantics);
1514 fs = addOrSubtractSpecials(rhs, subtract);
1516 /* This return code means it was not a simple case. */
1517 if(fs == opDivByZero) {
1518 lostFraction lost_fraction;
1520 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1521 fs = normalize(rounding_mode, lost_fraction);
1523 /* Can only be zero if we lost no fraction. */
1524 assert(category != fcZero || lost_fraction == lfExactlyZero);
1527 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1528 positive zero unless rounding to minus infinity, except that
1529 adding two like-signed zeroes gives that zero. */
1530 if(category == fcZero) {
1531 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1532 sign = (rounding_mode == rmTowardNegative);
1538 /* Normalized addition. */
1540 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1542 return addOrSubtract(rhs, rounding_mode, false);
1545 /* Normalized subtraction. */
1547 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1549 return addOrSubtract(rhs, rounding_mode, true);
1552 /* Normalized multiply. */
1554 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1558 assertArithmeticOK(*semantics);
1560 fs = multiplySpecials(rhs);
1562 if(category == fcNormal) {
1563 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1564 fs = normalize(rounding_mode, lost_fraction);
1565 if(lost_fraction != lfExactlyZero)
1566 fs = (opStatus) (fs | opInexact);
1572 /* Normalized divide. */
1574 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1578 assertArithmeticOK(*semantics);
1580 fs = divideSpecials(rhs);
1582 if(category == fcNormal) {
1583 lostFraction lost_fraction = divideSignificand(rhs);
1584 fs = normalize(rounding_mode, lost_fraction);
1585 if(lost_fraction != lfExactlyZero)
1586 fs = (opStatus) (fs | opInexact);
1592 /* Normalized remainder. This is not currently correct in all cases. */
1594 APFloat::remainder(const APFloat &rhs)
1598 unsigned int origSign = sign;
1600 assertArithmeticOK(*semantics);
1601 fs = V.divide(rhs, rmNearestTiesToEven);
1602 if (fs == opDivByZero)
1605 int parts = partCount();
1606 integerPart *x = new integerPart[parts];
1608 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1609 rmNearestTiesToEven, &ignored);
1610 if (fs==opInvalidOp)
1613 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1614 rmNearestTiesToEven);
1615 assert(fs==opOK); // should always work
1617 fs = V.multiply(rhs, rmNearestTiesToEven);
1618 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1620 fs = subtract(V, rmNearestTiesToEven);
1621 assert(fs==opOK || fs==opInexact); // likewise
1624 sign = origSign; // IEEE754 requires this
1629 /* Normalized llvm frem (C fmod).
1630 This is not currently correct in all cases. */
1632 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1635 assertArithmeticOK(*semantics);
1636 fs = modSpecials(rhs);
1638 if (category == fcNormal && rhs.category == fcNormal) {
1640 unsigned int origSign = sign;
1642 fs = V.divide(rhs, rmNearestTiesToEven);
1643 if (fs == opDivByZero)
1646 int parts = partCount();
1647 integerPart *x = new integerPart[parts];
1649 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1650 rmTowardZero, &ignored);
1651 if (fs==opInvalidOp)
1654 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1655 rmNearestTiesToEven);
1656 assert(fs==opOK); // should always work
1658 fs = V.multiply(rhs, rounding_mode);
1659 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1661 fs = subtract(V, rounding_mode);
1662 assert(fs==opOK || fs==opInexact); // likewise
1665 sign = origSign; // IEEE754 requires this
1671 /* Normalized fused-multiply-add. */
1673 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1674 const APFloat &addend,
1675 roundingMode rounding_mode)
1679 assertArithmeticOK(*semantics);
1681 /* Post-multiplication sign, before addition. */
1682 sign ^= multiplicand.sign;
1684 /* If and only if all arguments are normal do we need to do an
1685 extended-precision calculation. */
1686 if(category == fcNormal
1687 && multiplicand.category == fcNormal
1688 && addend.category == fcNormal) {
1689 lostFraction lost_fraction;
1691 lost_fraction = multiplySignificand(multiplicand, &addend);
1692 fs = normalize(rounding_mode, lost_fraction);
1693 if(lost_fraction != lfExactlyZero)
1694 fs = (opStatus) (fs | opInexact);
1696 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1697 positive zero unless rounding to minus infinity, except that
1698 adding two like-signed zeroes gives that zero. */
1699 if(category == fcZero && sign != addend.sign)
1700 sign = (rounding_mode == rmTowardNegative);
1702 fs = multiplySpecials(multiplicand);
1704 /* FS can only be opOK or opInvalidOp. There is no more work
1705 to do in the latter case. The IEEE-754R standard says it is
1706 implementation-defined in this case whether, if ADDEND is a
1707 quiet NaN, we raise invalid op; this implementation does so.
1709 If we need to do the addition we can do so with normal
1712 fs = addOrSubtract(addend, rounding_mode, false);
1718 /* Comparison requires normalized numbers. */
1720 APFloat::compare(const APFloat &rhs) const
1724 assertArithmeticOK(*semantics);
1725 assert(semantics == rhs.semantics);
1727 switch (convolve(category, rhs.category)) {
1729 llvm_unreachable(0);
1731 case convolve(fcNaN, fcZero):
1732 case convolve(fcNaN, fcNormal):
1733 case convolve(fcNaN, fcInfinity):
1734 case convolve(fcNaN, fcNaN):
1735 case convolve(fcZero, fcNaN):
1736 case convolve(fcNormal, fcNaN):
1737 case convolve(fcInfinity, fcNaN):
1738 return cmpUnordered;
1740 case convolve(fcInfinity, fcNormal):
1741 case convolve(fcInfinity, fcZero):
1742 case convolve(fcNormal, fcZero):
1746 return cmpGreaterThan;
1748 case convolve(fcNormal, fcInfinity):
1749 case convolve(fcZero, fcInfinity):
1750 case convolve(fcZero, fcNormal):
1752 return cmpGreaterThan;
1756 case convolve(fcInfinity, fcInfinity):
1757 if(sign == rhs.sign)
1762 return cmpGreaterThan;
1764 case convolve(fcZero, fcZero):
1767 case convolve(fcNormal, fcNormal):
1771 /* Two normal numbers. Do they have the same sign? */
1772 if(sign != rhs.sign) {
1774 result = cmpLessThan;
1776 result = cmpGreaterThan;
1778 /* Compare absolute values; invert result if negative. */
1779 result = compareAbsoluteValue(rhs);
1782 if(result == cmpLessThan)
1783 result = cmpGreaterThan;
1784 else if(result == cmpGreaterThan)
1785 result = cmpLessThan;
1792 /// APFloat::convert - convert a value of one floating point type to another.
1793 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1794 /// records whether the transformation lost information, i.e. whether
1795 /// converting the result back to the original type will produce the
1796 /// original value (this is almost the same as return value==fsOK, but there
1797 /// are edge cases where this is not so).
1800 APFloat::convert(const fltSemantics &toSemantics,
1801 roundingMode rounding_mode, bool *losesInfo)
1803 lostFraction lostFraction;
1804 unsigned int newPartCount, oldPartCount;
1807 assertArithmeticOK(*semantics);
1808 assertArithmeticOK(toSemantics);
1809 lostFraction = lfExactlyZero;
1810 newPartCount = partCountForBits(toSemantics.precision + 1);
1811 oldPartCount = partCount();
1813 /* Handle storage complications. If our new form is wider,
1814 re-allocate our bit pattern into wider storage. If it is
1815 narrower, we ignore the excess parts, but if narrowing to a
1816 single part we need to free the old storage.
1817 Be careful not to reference significandParts for zeroes
1818 and infinities, since it aborts. */
1819 if (newPartCount > oldPartCount) {
1820 integerPart *newParts;
1821 newParts = new integerPart[newPartCount];
1822 APInt::tcSet(newParts, 0, newPartCount);
1823 if (category==fcNormal || category==fcNaN)
1824 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1826 significand.parts = newParts;
1827 } else if (newPartCount < oldPartCount) {
1828 /* Capture any lost fraction through truncation of parts so we get
1829 correct rounding whilst normalizing. */
1830 if (category==fcNormal)
1831 lostFraction = lostFractionThroughTruncation
1832 (significandParts(), oldPartCount, toSemantics.precision);
1833 if (newPartCount == 1) {
1834 integerPart newPart = 0;
1835 if (category==fcNormal || category==fcNaN)
1836 newPart = significandParts()[0];
1838 significand.part = newPart;
1842 if(category == fcNormal) {
1843 /* Re-interpret our bit-pattern. */
1844 exponent += toSemantics.precision - semantics->precision;
1845 semantics = &toSemantics;
1846 fs = normalize(rounding_mode, lostFraction);
1847 *losesInfo = (fs != opOK);
1848 } else if (category == fcNaN) {
1849 int shift = toSemantics.precision - semantics->precision;
1850 // Do this now so significandParts gets the right answer
1851 const fltSemantics *oldSemantics = semantics;
1852 semantics = &toSemantics;
1854 // No normalization here, just truncate
1856 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1857 else if (shift < 0) {
1858 unsigned ushift = -shift;
1859 // Figure out if we are losing information. This happens
1860 // if are shifting out something other than 0s, or if the x87 long
1861 // double input did not have its integer bit set (pseudo-NaN), or if the
1862 // x87 long double input did not have its QNan bit set (because the x87
1863 // hardware sets this bit when converting a lower-precision NaN to
1864 // x87 long double).
1865 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1867 if (oldSemantics == &APFloat::x87DoubleExtended &&
1868 (!(*significandParts() & 0x8000000000000000ULL) ||
1869 !(*significandParts() & 0x4000000000000000ULL)))
1871 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1873 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1874 // does not give you back the same bits. This is dubious, and we
1875 // don't currently do it. You're really supposed to get
1876 // an invalid operation signal at runtime, but nobody does that.
1879 semantics = &toSemantics;
1887 /* Convert a floating point number to an integer according to the
1888 rounding mode. If the rounded integer value is out of range this
1889 returns an invalid operation exception and the contents of the
1890 destination parts are unspecified. If the rounded value is in
1891 range but the floating point number is not the exact integer, the C
1892 standard doesn't require an inexact exception to be raised. IEEE
1893 854 does require it so we do that.
1895 Note that for conversions to integer type the C standard requires
1896 round-to-zero to always be used. */
1898 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1900 roundingMode rounding_mode,
1901 bool *isExact) const
1903 lostFraction lost_fraction;
1904 const integerPart *src;
1905 unsigned int dstPartsCount, truncatedBits;
1907 assertArithmeticOK(*semantics);
1911 /* Handle the three special cases first. */
1912 if(category == fcInfinity || category == fcNaN)
1915 dstPartsCount = partCountForBits(width);
1917 if(category == fcZero) {
1918 APInt::tcSet(parts, 0, dstPartsCount);
1919 // Negative zero can't be represented as an int.
1924 src = significandParts();
1926 /* Step 1: place our absolute value, with any fraction truncated, in
1929 /* Our absolute value is less than one; truncate everything. */
1930 APInt::tcSet(parts, 0, dstPartsCount);
1931 /* For exponent -1 the integer bit represents .5, look at that.
1932 For smaller exponents leftmost truncated bit is 0. */
1933 truncatedBits = semantics->precision -1U - exponent;
1935 /* We want the most significant (exponent + 1) bits; the rest are
1937 unsigned int bits = exponent + 1U;
1939 /* Hopelessly large in magnitude? */
1943 if (bits < semantics->precision) {
1944 /* We truncate (semantics->precision - bits) bits. */
1945 truncatedBits = semantics->precision - bits;
1946 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1948 /* We want at least as many bits as are available. */
1949 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1950 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1955 /* Step 2: work out any lost fraction, and increment the absolute
1956 value if we would round away from zero. */
1957 if (truncatedBits) {
1958 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1960 if (lost_fraction != lfExactlyZero
1961 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1962 if (APInt::tcIncrement(parts, dstPartsCount))
1963 return opInvalidOp; /* Overflow. */
1966 lost_fraction = lfExactlyZero;
1969 /* Step 3: check if we fit in the destination. */
1970 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1974 /* Negative numbers cannot be represented as unsigned. */
1978 /* It takes omsb bits to represent the unsigned integer value.
1979 We lose a bit for the sign, but care is needed as the
1980 maximally negative integer is a special case. */
1981 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1984 /* This case can happen because of rounding. */
1989 APInt::tcNegate (parts, dstPartsCount);
1991 if (omsb >= width + !isSigned)
1995 if (lost_fraction == lfExactlyZero) {
2002 /* Same as convertToSignExtendedInteger, except we provide
2003 deterministic values in case of an invalid operation exception,
2004 namely zero for NaNs and the minimal or maximal value respectively
2005 for underflow or overflow.
2006 The *isExact output tells whether the result is exact, in the sense
2007 that converting it back to the original floating point type produces
2008 the original value. This is almost equivalent to result==opOK,
2009 except for negative zeroes.
2012 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2014 roundingMode rounding_mode, bool *isExact) const
2018 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2021 if (fs == opInvalidOp) {
2022 unsigned int bits, dstPartsCount;
2024 dstPartsCount = partCountForBits(width);
2026 if (category == fcNaN)
2031 bits = width - isSigned;
2033 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2034 if (sign && isSigned)
2035 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2041 /* Convert an unsigned integer SRC to a floating point number,
2042 rounding according to ROUNDING_MODE. The sign of the floating
2043 point number is not modified. */
2045 APFloat::convertFromUnsignedParts(const integerPart *src,
2046 unsigned int srcCount,
2047 roundingMode rounding_mode)
2049 unsigned int omsb, precision, dstCount;
2051 lostFraction lost_fraction;
2053 assertArithmeticOK(*semantics);
2054 category = fcNormal;
2055 omsb = APInt::tcMSB(src, srcCount) + 1;
2056 dst = significandParts();
2057 dstCount = partCount();
2058 precision = semantics->precision;
2060 /* We want the most significant PRECISON bits of SRC. There may not
2061 be that many; extract what we can. */
2062 if (precision <= omsb) {
2063 exponent = omsb - 1;
2064 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2066 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2068 exponent = precision - 1;
2069 lost_fraction = lfExactlyZero;
2070 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2073 return normalize(rounding_mode, lost_fraction);
2077 APFloat::convertFromAPInt(const APInt &Val,
2079 roundingMode rounding_mode)
2081 unsigned int partCount = Val.getNumWords();
2085 if (isSigned && api.isNegative()) {
2090 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2093 /* Convert a two's complement integer SRC to a floating point number,
2094 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2095 integer is signed, in which case it must be sign-extended. */
2097 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2098 unsigned int srcCount,
2100 roundingMode rounding_mode)
2104 assertArithmeticOK(*semantics);
2106 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2109 /* If we're signed and negative negate a copy. */
2111 copy = new integerPart[srcCount];
2112 APInt::tcAssign(copy, src, srcCount);
2113 APInt::tcNegate(copy, srcCount);
2114 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2118 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2124 /* FIXME: should this just take a const APInt reference? */
2126 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2127 unsigned int width, bool isSigned,
2128 roundingMode rounding_mode)
2130 unsigned int partCount = partCountForBits(width);
2131 APInt api = APInt(width, partCount, parts);
2134 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2139 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2143 APFloat::convertFromHexadecimalString(const StringRef &s,
2144 roundingMode rounding_mode)
2146 lostFraction lost_fraction = lfExactlyZero;
2147 integerPart *significand;
2148 unsigned int bitPos, partsCount;
2149 StringRef::iterator dot, firstSignificantDigit;
2153 category = fcNormal;
2155 significand = significandParts();
2156 partsCount = partCount();
2157 bitPos = partsCount * integerPartWidth;
2159 /* Skip leading zeroes and any (hexa)decimal point. */
2160 StringRef::iterator begin = s.begin();
2161 StringRef::iterator end = s.end();
2162 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2163 firstSignificantDigit = p;
2166 integerPart hex_value;
2169 assert(dot == end && "String contains multiple dots");
2176 hex_value = hexDigitValue(*p);
2177 if(hex_value == -1U) {
2186 /* Store the number whilst 4-bit nibbles remain. */
2189 hex_value <<= bitPos % integerPartWidth;
2190 significand[bitPos / integerPartWidth] |= hex_value;
2192 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2193 while(p != end && hexDigitValue(*p) != -1U)
2200 /* Hex floats require an exponent but not a hexadecimal point. */
2201 assert(p != end && "Hex strings require an exponent");
2202 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2203 assert(p != begin && "Significand has no digits");
2204 assert((dot == end || p - begin != 1) && "Significand has no digits");
2206 /* Ignore the exponent if we are zero. */
2207 if(p != firstSignificantDigit) {
2210 /* Implicit hexadecimal point? */
2214 /* Calculate the exponent adjustment implicit in the number of
2215 significant digits. */
2216 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2217 if(expAdjustment < 0)
2219 expAdjustment = expAdjustment * 4 - 1;
2221 /* Adjust for writing the significand starting at the most
2222 significant nibble. */
2223 expAdjustment += semantics->precision;
2224 expAdjustment -= partsCount * integerPartWidth;
2226 /* Adjust for the given exponent. */
2227 exponent = totalExponent(p + 1, end, expAdjustment);
2230 return normalize(rounding_mode, lost_fraction);
2234 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2235 unsigned sigPartCount, int exp,
2236 roundingMode rounding_mode)
2238 unsigned int parts, pow5PartCount;
2239 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2240 integerPart pow5Parts[maxPowerOfFiveParts];
2243 isNearest = (rounding_mode == rmNearestTiesToEven
2244 || rounding_mode == rmNearestTiesToAway);
2246 parts = partCountForBits(semantics->precision + 11);
2248 /* Calculate pow(5, abs(exp)). */
2249 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2251 for (;; parts *= 2) {
2252 opStatus sigStatus, powStatus;
2253 unsigned int excessPrecision, truncatedBits;
2255 calcSemantics.precision = parts * integerPartWidth - 1;
2256 excessPrecision = calcSemantics.precision - semantics->precision;
2257 truncatedBits = excessPrecision;
2259 APFloat decSig(calcSemantics, fcZero, sign);
2260 APFloat pow5(calcSemantics, fcZero, false);
2262 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2263 rmNearestTiesToEven);
2264 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2265 rmNearestTiesToEven);
2266 /* Add exp, as 10^n = 5^n * 2^n. */
2267 decSig.exponent += exp;
2269 lostFraction calcLostFraction;
2270 integerPart HUerr, HUdistance;
2271 unsigned int powHUerr;
2274 /* multiplySignificand leaves the precision-th bit set to 1. */
2275 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2276 powHUerr = powStatus != opOK;
2278 calcLostFraction = decSig.divideSignificand(pow5);
2279 /* Denormal numbers have less precision. */
2280 if (decSig.exponent < semantics->minExponent) {
2281 excessPrecision += (semantics->minExponent - decSig.exponent);
2282 truncatedBits = excessPrecision;
2283 if (excessPrecision > calcSemantics.precision)
2284 excessPrecision = calcSemantics.precision;
2286 /* Extra half-ulp lost in reciprocal of exponent. */
2287 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2290 /* Both multiplySignificand and divideSignificand return the
2291 result with the integer bit set. */
2292 assert(APInt::tcExtractBit
2293 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2295 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2297 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2298 excessPrecision, isNearest);
2300 /* Are we guaranteed to round correctly if we truncate? */
2301 if (HUdistance >= HUerr) {
2302 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2303 calcSemantics.precision - excessPrecision,
2305 /* Take the exponent of decSig. If we tcExtract-ed less bits
2306 above we must adjust our exponent to compensate for the
2307 implicit right shift. */
2308 exponent = (decSig.exponent + semantics->precision
2309 - (calcSemantics.precision - excessPrecision));
2310 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2313 return normalize(rounding_mode, calcLostFraction);
2319 APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
2324 /* Scan the text. */
2325 StringRef::iterator p = str.begin();
2326 interpretDecimal(p, str.end(), &D);
2328 /* Handle the quick cases. First the case of no significant digits,
2329 i.e. zero, and then exponents that are obviously too large or too
2330 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2331 definitely overflows if
2333 (exp - 1) * L >= maxExponent
2335 and definitely underflows to zero where
2337 (exp + 1) * L <= minExponent - precision
2339 With integer arithmetic the tightest bounds for L are
2341 93/28 < L < 196/59 [ numerator <= 256 ]
2342 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2345 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2348 } else if ((D.normalizedExponent + 1) * 28738
2349 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2350 /* Underflow to zero and round. */
2352 fs = normalize(rounding_mode, lfLessThanHalf);
2353 } else if ((D.normalizedExponent - 1) * 42039
2354 >= 12655 * semantics->maxExponent) {
2355 /* Overflow and round. */
2356 fs = handleOverflow(rounding_mode);
2358 integerPart *decSignificand;
2359 unsigned int partCount;
2361 /* A tight upper bound on number of bits required to hold an
2362 N-digit decimal integer is N * 196 / 59. Allocate enough space
2363 to hold the full significand, and an extra part required by
2365 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2366 partCount = partCountForBits(1 + 196 * partCount / 59);
2367 decSignificand = new integerPart[partCount + 1];
2370 /* Convert to binary efficiently - we do almost all multiplication
2371 in an integerPart. When this would overflow do we do a single
2372 bignum multiplication, and then revert again to multiplication
2373 in an integerPart. */
2375 integerPart decValue, val, multiplier;
2383 if (p == str.end()) {
2387 decValue = decDigitValue(*p++);
2388 assert(decValue < 10U && "Invalid character in significand");
2390 val = val * 10 + decValue;
2391 /* The maximum number that can be multiplied by ten with any
2392 digit added without overflowing an integerPart. */
2393 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2395 /* Multiply out the current part. */
2396 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2397 partCount, partCount + 1, false);
2399 /* If we used another part (likely but not guaranteed), increase
2401 if (decSignificand[partCount])
2403 } while (p <= D.lastSigDigit);
2405 category = fcNormal;
2406 fs = roundSignificandWithExponent(decSignificand, partCount,
2407 D.exponent, rounding_mode);
2409 delete [] decSignificand;
2416 APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
2418 assertArithmeticOK(*semantics);
2419 assert(!str.empty() && "Invalid string length");
2421 /* Handle a leading minus sign. */
2422 StringRef::iterator p = str.begin();
2423 size_t slen = str.size();
2424 sign = *p == '-' ? 1 : 0;
2425 if(*p == '-' || *p == '+') {
2428 assert(slen && "String has no digits");
2431 if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2432 assert(slen - 2 && "Invalid string");
2433 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2437 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2440 /* Write out a hexadecimal representation of the floating point value
2441 to DST, which must be of sufficient size, in the C99 form
2442 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2443 excluding the terminating NUL.
2445 If UPPERCASE, the output is in upper case, otherwise in lower case.
2447 HEXDIGITS digits appear altogether, rounding the value if
2448 necessary. If HEXDIGITS is 0, the minimal precision to display the
2449 number precisely is used instead. If nothing would appear after
2450 the decimal point it is suppressed.
2452 The decimal exponent is always printed and has at least one digit.
2453 Zero values display an exponent of zero. Infinities and NaNs
2454 appear as "infinity" or "nan" respectively.
2456 The above rules are as specified by C99. There is ambiguity about
2457 what the leading hexadecimal digit should be. This implementation
2458 uses whatever is necessary so that the exponent is displayed as
2459 stored. This implies the exponent will fall within the IEEE format
2460 range, and the leading hexadecimal digit will be 0 (for denormals),
2461 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2462 any other digits zero).
2465 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2466 bool upperCase, roundingMode rounding_mode) const
2470 assertArithmeticOK(*semantics);
2478 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2479 dst += sizeof infinityL - 1;
2483 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2484 dst += sizeof NaNU - 1;
2489 *dst++ = upperCase ? 'X': 'x';
2491 if (hexDigits > 1) {
2493 memset (dst, '0', hexDigits - 1);
2494 dst += hexDigits - 1;
2496 *dst++ = upperCase ? 'P': 'p';
2501 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2507 return static_cast<unsigned int>(dst - p);
2510 /* Does the hard work of outputting the correctly rounded hexadecimal
2511 form of a normal floating point number with the specified number of
2512 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2513 digits necessary to print the value precisely is output. */
2515 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2517 roundingMode rounding_mode) const
2519 unsigned int count, valueBits, shift, partsCount, outputDigits;
2520 const char *hexDigitChars;
2521 const integerPart *significand;
2526 *dst++ = upperCase ? 'X': 'x';
2529 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2531 significand = significandParts();
2532 partsCount = partCount();
2534 /* +3 because the first digit only uses the single integer bit, so
2535 we have 3 virtual zero most-significant-bits. */
2536 valueBits = semantics->precision + 3;
2537 shift = integerPartWidth - valueBits % integerPartWidth;
2539 /* The natural number of digits required ignoring trailing
2540 insignificant zeroes. */
2541 outputDigits = (valueBits - significandLSB () + 3) / 4;
2543 /* hexDigits of zero means use the required number for the
2544 precision. Otherwise, see if we are truncating. If we are,
2545 find out if we need to round away from zero. */
2547 if (hexDigits < outputDigits) {
2548 /* We are dropping non-zero bits, so need to check how to round.
2549 "bits" is the number of dropped bits. */
2551 lostFraction fraction;
2553 bits = valueBits - hexDigits * 4;
2554 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2555 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2557 outputDigits = hexDigits;
2560 /* Write the digits consecutively, and start writing in the location
2561 of the hexadecimal point. We move the most significant digit
2562 left and add the hexadecimal point later. */
2565 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2567 while (outputDigits && count) {
2570 /* Put the most significant integerPartWidth bits in "part". */
2571 if (--count == partsCount)
2572 part = 0; /* An imaginary higher zero part. */
2574 part = significand[count] << shift;
2577 part |= significand[count - 1] >> (integerPartWidth - shift);
2579 /* Convert as much of "part" to hexdigits as we can. */
2580 unsigned int curDigits = integerPartWidth / 4;
2582 if (curDigits > outputDigits)
2583 curDigits = outputDigits;
2584 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2585 outputDigits -= curDigits;
2591 /* Note that hexDigitChars has a trailing '0'. */
2594 *q = hexDigitChars[hexDigitValue (*q) + 1];
2595 } while (*q == '0');
2598 /* Add trailing zeroes. */
2599 memset (dst, '0', outputDigits);
2600 dst += outputDigits;
2603 /* Move the most significant digit to before the point, and if there
2604 is something after the decimal point add it. This must come
2605 after rounding above. */
2612 /* Finally output the exponent. */
2613 *dst++ = upperCase ? 'P': 'p';
2615 return writeSignedDecimal (dst, exponent);
2618 // For good performance it is desirable for different APFloats
2619 // to produce different integers.
2621 APFloat::getHashValue() const
2623 if (category==fcZero) return sign<<8 | semantics->precision ;
2624 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2625 else if (category==fcNaN) return 1<<10 | semantics->precision;
2627 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2628 const integerPart* p = significandParts();
2629 for (int i=partCount(); i>0; i--, p++)
2630 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2635 // Conversion from APFloat to/from host float/double. It may eventually be
2636 // possible to eliminate these and have everybody deal with APFloats, but that
2637 // will take a while. This approach will not easily extend to long double.
2638 // Current implementation requires integerPartWidth==64, which is correct at
2639 // the moment but could be made more general.
2641 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2642 // the actual IEEE respresentations. We compensate for that here.
2645 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2647 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2648 assert(partCount()==2);
2650 uint64_t myexponent, mysignificand;
2652 if (category==fcNormal) {
2653 myexponent = exponent+16383; //bias
2654 mysignificand = significandParts()[0];
2655 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2656 myexponent = 0; // denormal
2657 } else if (category==fcZero) {
2660 } else if (category==fcInfinity) {
2661 myexponent = 0x7fff;
2662 mysignificand = 0x8000000000000000ULL;
2664 assert(category == fcNaN && "Unknown category");
2665 myexponent = 0x7fff;
2666 mysignificand = significandParts()[0];
2670 words[0] = mysignificand;
2671 words[1] = ((uint64_t)(sign & 1) << 15) |
2672 (myexponent & 0x7fffLL);
2673 return APInt(80, 2, words);
2677 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2679 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2680 assert(partCount()==2);
2682 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2684 if (category==fcNormal) {
2685 myexponent = exponent + 1023; //bias
2686 myexponent2 = exponent2 + 1023;
2687 mysignificand = significandParts()[0];
2688 mysignificand2 = significandParts()[1];
2689 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2690 myexponent = 0; // denormal
2691 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2692 myexponent2 = 0; // denormal
2693 } else if (category==fcZero) {
2698 } else if (category==fcInfinity) {
2704 assert(category == fcNaN && "Unknown category");
2706 mysignificand = significandParts()[0];
2707 myexponent2 = exponent2;
2708 mysignificand2 = significandParts()[1];
2712 words[0] = ((uint64_t)(sign & 1) << 63) |
2713 ((myexponent & 0x7ff) << 52) |
2714 (mysignificand & 0xfffffffffffffLL);
2715 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2716 ((myexponent2 & 0x7ff) << 52) |
2717 (mysignificand2 & 0xfffffffffffffLL);
2718 return APInt(128, 2, words);
2722 APFloat::convertQuadrupleAPFloatToAPInt() const
2724 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2725 assert(partCount()==2);
2727 uint64_t myexponent, mysignificand, mysignificand2;
2729 if (category==fcNormal) {
2730 myexponent = exponent+16383; //bias
2731 mysignificand = significandParts()[0];
2732 mysignificand2 = significandParts()[1];
2733 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2734 myexponent = 0; // denormal
2735 } else if (category==fcZero) {
2737 mysignificand = mysignificand2 = 0;
2738 } else if (category==fcInfinity) {
2739 myexponent = 0x7fff;
2740 mysignificand = mysignificand2 = 0;
2742 assert(category == fcNaN && "Unknown category!");
2743 myexponent = 0x7fff;
2744 mysignificand = significandParts()[0];
2745 mysignificand2 = significandParts()[1];
2749 words[0] = mysignificand;
2750 words[1] = ((uint64_t)(sign & 1) << 63) |
2751 ((myexponent & 0x7fff) << 48) |
2752 (mysignificand2 & 0xffffffffffffLL);
2754 return APInt(128, 2, words);
2758 APFloat::convertDoubleAPFloatToAPInt() const
2760 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2761 assert(partCount()==1);
2763 uint64_t myexponent, mysignificand;
2765 if (category==fcNormal) {
2766 myexponent = exponent+1023; //bias
2767 mysignificand = *significandParts();
2768 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2769 myexponent = 0; // denormal
2770 } else if (category==fcZero) {
2773 } else if (category==fcInfinity) {
2777 assert(category == fcNaN && "Unknown category!");
2779 mysignificand = *significandParts();
2782 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2783 ((myexponent & 0x7ff) << 52) |
2784 (mysignificand & 0xfffffffffffffLL))));
2788 APFloat::convertFloatAPFloatToAPInt() const
2790 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2791 assert(partCount()==1);
2793 uint32_t myexponent, mysignificand;
2795 if (category==fcNormal) {
2796 myexponent = exponent+127; //bias
2797 mysignificand = (uint32_t)*significandParts();
2798 if (myexponent == 1 && !(mysignificand & 0x800000))
2799 myexponent = 0; // denormal
2800 } else if (category==fcZero) {
2803 } else if (category==fcInfinity) {
2807 assert(category == fcNaN && "Unknown category!");
2809 mysignificand = (uint32_t)*significandParts();
2812 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2813 (mysignificand & 0x7fffff)));
2817 APFloat::convertHalfAPFloatToAPInt() const
2819 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2820 assert(partCount()==1);
2822 uint32_t myexponent, mysignificand;
2824 if (category==fcNormal) {
2825 myexponent = exponent+15; //bias
2826 mysignificand = (uint32_t)*significandParts();
2827 if (myexponent == 1 && !(mysignificand & 0x400))
2828 myexponent = 0; // denormal
2829 } else if (category==fcZero) {
2832 } else if (category==fcInfinity) {
2836 assert(category == fcNaN && "Unknown category!");
2838 mysignificand = (uint32_t)*significandParts();
2841 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2842 (mysignificand & 0x3ff)));
2845 // This function creates an APInt that is just a bit map of the floating
2846 // point constant as it would appear in memory. It is not a conversion,
2847 // and treating the result as a normal integer is unlikely to be useful.
2850 APFloat::bitcastToAPInt() const
2852 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2853 return convertHalfAPFloatToAPInt();
2855 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2856 return convertFloatAPFloatToAPInt();
2858 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2859 return convertDoubleAPFloatToAPInt();
2861 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2862 return convertQuadrupleAPFloatToAPInt();
2864 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2865 return convertPPCDoubleDoubleAPFloatToAPInt();
2867 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2869 return convertF80LongDoubleAPFloatToAPInt();
2873 APFloat::convertToFloat() const
2875 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2876 "Float semantics are not IEEEsingle");
2877 APInt api = bitcastToAPInt();
2878 return api.bitsToFloat();
2882 APFloat::convertToDouble() const
2884 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2885 "Float semantics are not IEEEdouble");
2886 APInt api = bitcastToAPInt();
2887 return api.bitsToDouble();
2890 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2891 /// does not support these bit patterns:
2892 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2893 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2894 /// exponent = 0, integer bit 1 ("pseudodenormal")
2895 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2896 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2898 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2900 assert(api.getBitWidth()==80);
2901 uint64_t i1 = api.getRawData()[0];
2902 uint64_t i2 = api.getRawData()[1];
2903 uint64_t myexponent = (i2 & 0x7fff);
2904 uint64_t mysignificand = i1;
2906 initialize(&APFloat::x87DoubleExtended);
2907 assert(partCount()==2);
2909 sign = static_cast<unsigned int>(i2>>15);
2910 if (myexponent==0 && mysignificand==0) {
2911 // exponent, significand meaningless
2913 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2914 // exponent, significand meaningless
2915 category = fcInfinity;
2916 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2917 // exponent meaningless
2919 significandParts()[0] = mysignificand;
2920 significandParts()[1] = 0;
2922 category = fcNormal;
2923 exponent = myexponent - 16383;
2924 significandParts()[0] = mysignificand;
2925 significandParts()[1] = 0;
2926 if (myexponent==0) // denormal
2932 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2934 assert(api.getBitWidth()==128);
2935 uint64_t i1 = api.getRawData()[0];
2936 uint64_t i2 = api.getRawData()[1];
2937 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2938 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2939 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2940 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2942 initialize(&APFloat::PPCDoubleDouble);
2943 assert(partCount()==2);
2945 sign = static_cast<unsigned int>(i1>>63);
2946 sign2 = static_cast<unsigned int>(i2>>63);
2947 if (myexponent==0 && mysignificand==0) {
2948 // exponent, significand meaningless
2949 // exponent2 and significand2 are required to be 0; we don't check
2951 } else if (myexponent==0x7ff && mysignificand==0) {
2952 // exponent, significand meaningless
2953 // exponent2 and significand2 are required to be 0; we don't check
2954 category = fcInfinity;
2955 } else if (myexponent==0x7ff && mysignificand!=0) {
2956 // exponent meaningless. So is the whole second word, but keep it
2959 exponent2 = myexponent2;
2960 significandParts()[0] = mysignificand;
2961 significandParts()[1] = mysignificand2;
2963 category = fcNormal;
2964 // Note there is no category2; the second word is treated as if it is
2965 // fcNormal, although it might be something else considered by itself.
2966 exponent = myexponent - 1023;
2967 exponent2 = myexponent2 - 1023;
2968 significandParts()[0] = mysignificand;
2969 significandParts()[1] = mysignificand2;
2970 if (myexponent==0) // denormal
2973 significandParts()[0] |= 0x10000000000000LL; // integer bit
2977 significandParts()[1] |= 0x10000000000000LL; // integer bit
2982 APFloat::initFromQuadrupleAPInt(const APInt &api)
2984 assert(api.getBitWidth()==128);
2985 uint64_t i1 = api.getRawData()[0];
2986 uint64_t i2 = api.getRawData()[1];
2987 uint64_t myexponent = (i2 >> 48) & 0x7fff;
2988 uint64_t mysignificand = i1;
2989 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
2991 initialize(&APFloat::IEEEquad);
2992 assert(partCount()==2);
2994 sign = static_cast<unsigned int>(i2>>63);
2995 if (myexponent==0 &&
2996 (mysignificand==0 && mysignificand2==0)) {
2997 // exponent, significand meaningless
2999 } else if (myexponent==0x7fff &&
3000 (mysignificand==0 && mysignificand2==0)) {
3001 // exponent, significand meaningless
3002 category = fcInfinity;
3003 } else if (myexponent==0x7fff &&
3004 (mysignificand!=0 || mysignificand2 !=0)) {
3005 // exponent meaningless
3007 significandParts()[0] = mysignificand;
3008 significandParts()[1] = mysignificand2;
3010 category = fcNormal;
3011 exponent = myexponent - 16383;
3012 significandParts()[0] = mysignificand;
3013 significandParts()[1] = mysignificand2;
3014 if (myexponent==0) // denormal
3017 significandParts()[1] |= 0x1000000000000LL; // integer bit
3022 APFloat::initFromDoubleAPInt(const APInt &api)
3024 assert(api.getBitWidth()==64);
3025 uint64_t i = *api.getRawData();
3026 uint64_t myexponent = (i >> 52) & 0x7ff;
3027 uint64_t mysignificand = i & 0xfffffffffffffLL;
3029 initialize(&APFloat::IEEEdouble);
3030 assert(partCount()==1);
3032 sign = static_cast<unsigned int>(i>>63);
3033 if (myexponent==0 && mysignificand==0) {
3034 // exponent, significand meaningless
3036 } else if (myexponent==0x7ff && mysignificand==0) {
3037 // exponent, significand meaningless
3038 category = fcInfinity;
3039 } else if (myexponent==0x7ff && mysignificand!=0) {
3040 // exponent meaningless
3042 *significandParts() = mysignificand;
3044 category = fcNormal;
3045 exponent = myexponent - 1023;
3046 *significandParts() = mysignificand;
3047 if (myexponent==0) // denormal
3050 *significandParts() |= 0x10000000000000LL; // integer bit
3055 APFloat::initFromFloatAPInt(const APInt & api)
3057 assert(api.getBitWidth()==32);
3058 uint32_t i = (uint32_t)*api.getRawData();
3059 uint32_t myexponent = (i >> 23) & 0xff;
3060 uint32_t mysignificand = i & 0x7fffff;
3062 initialize(&APFloat::IEEEsingle);
3063 assert(partCount()==1);
3066 if (myexponent==0 && mysignificand==0) {
3067 // exponent, significand meaningless
3069 } else if (myexponent==0xff && mysignificand==0) {
3070 // exponent, significand meaningless
3071 category = fcInfinity;
3072 } else if (myexponent==0xff && mysignificand!=0) {
3073 // sign, exponent, significand meaningless
3075 *significandParts() = mysignificand;
3077 category = fcNormal;
3078 exponent = myexponent - 127; //bias
3079 *significandParts() = mysignificand;
3080 if (myexponent==0) // denormal
3083 *significandParts() |= 0x800000; // integer bit
3088 APFloat::initFromHalfAPInt(const APInt & api)
3090 assert(api.getBitWidth()==16);
3091 uint32_t i = (uint32_t)*api.getRawData();
3092 uint32_t myexponent = (i >> 10) & 0x1f;
3093 uint32_t mysignificand = i & 0x3ff;
3095 initialize(&APFloat::IEEEhalf);
3096 assert(partCount()==1);
3099 if (myexponent==0 && mysignificand==0) {
3100 // exponent, significand meaningless
3102 } else if (myexponent==0x1f && mysignificand==0) {
3103 // exponent, significand meaningless
3104 category = fcInfinity;
3105 } else if (myexponent==0x1f && mysignificand!=0) {
3106 // sign, exponent, significand meaningless
3108 *significandParts() = mysignificand;
3110 category = fcNormal;
3111 exponent = myexponent - 15; //bias
3112 *significandParts() = mysignificand;
3113 if (myexponent==0) // denormal
3116 *significandParts() |= 0x400; // integer bit
3120 /// Treat api as containing the bits of a floating point number. Currently
3121 /// we infer the floating point type from the size of the APInt. The
3122 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3123 /// when the size is anything else).
3125 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3127 if (api.getBitWidth() == 16)
3128 return initFromHalfAPInt(api);
3129 else if (api.getBitWidth() == 32)
3130 return initFromFloatAPInt(api);
3131 else if (api.getBitWidth()==64)
3132 return initFromDoubleAPInt(api);
3133 else if (api.getBitWidth()==80)
3134 return initFromF80LongDoubleAPInt(api);
3135 else if (api.getBitWidth()==128)
3137 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3139 llvm_unreachable(0);
3142 APFloat::APFloat(const APInt& api, bool isIEEE)
3144 initFromAPInt(api, isIEEE);
3147 APFloat::APFloat(float f)
3149 APInt api = APInt(32, 0);
3150 initFromAPInt(api.floatToBits(f));
3153 APFloat::APFloat(double d)
3155 APInt api = APInt(64, 0);
3156 initFromAPInt(api.doubleToBits(d));