1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/Support/MathExtras.h"
21 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
23 /* Assumed in hexadecimal significand parsing. */
24 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
28 /* Represents floating point arithmetic semantics. */
30 /* The largest E such that 2^E is representable; this matches the
31 definition of IEEE 754. */
32 exponent_t maxExponent;
34 /* The smallest E such that 2^E is a normalized number; this
35 matches the definition of IEEE 754. */
36 exponent_t minExponent;
38 /* Number of bits in the significand. This includes the integer
40 unsigned char precision;
42 /* If the target format has an implicit integer bit. */
43 bool implicitIntegerBit;
46 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
47 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
48 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
49 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false };
50 const fltSemantics APFloat::Bogus = { 0, 0, 0, false };
53 /* Put a bunch of private, handy routines in an anonymous namespace. */
57 partCountForBits(unsigned int bits)
59 return ((bits) + integerPartWidth - 1) / integerPartWidth;
63 digitValue(unsigned int c)
75 hexDigitValue (unsigned int c)
94 /* This is ugly and needs cleaning up, but I don't immediately see
95 how whilst remaining safe. */
97 totalExponent(const char *p, int exponentAdjustment)
99 integerPart unsignedExponent;
100 bool negative, overflow;
103 /* Move past the exponent letter and sign to the digits. */
105 negative = *p == '-';
106 if(*p == '-' || *p == '+')
109 unsignedExponent = 0;
114 value = digitValue(*p);
119 unsignedExponent = unsignedExponent * 10 + value;
120 if(unsignedExponent > 65535)
124 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
128 exponent = unsignedExponent;
130 exponent = -exponent;
131 exponent += exponentAdjustment;
132 if(exponent > 65535 || exponent < -65536)
137 exponent = negative ? -65536: 65535;
143 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
158 /* Return the trailing fraction of a hexadecimal number.
159 DIGITVALUE is the first hex digit of the fraction, P points to
162 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
164 unsigned int hexDigit;
166 /* If the first trailing digit isn't 0 or 8 we can work out the
167 fraction immediately. */
169 return lfMoreThanHalf;
170 else if(digitValue < 8 && digitValue > 0)
171 return lfLessThanHalf;
173 /* Otherwise we need to find the first non-zero digit. */
177 hexDigit = hexDigitValue(*p);
179 /* If we ran off the end it is exactly zero or one-half, otherwise
182 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
184 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
187 /* Return the fraction lost were a bignum truncated. */
189 lostFractionThroughTruncation(integerPart *parts,
190 unsigned int partCount,
195 lsb = APInt::tcLSB(parts, partCount);
197 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
199 return lfExactlyZero;
201 return lfExactlyHalf;
202 if(bits <= partCount * integerPartWidth
203 && APInt::tcExtractBit(parts, bits - 1))
204 return lfMoreThanHalf;
206 return lfLessThanHalf;
209 /* Shift DST right BITS bits noting lost fraction. */
211 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
213 lostFraction lost_fraction;
215 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
217 APInt::tcShiftRight(dst, parts, bits);
219 return lost_fraction;
225 APFloat::initialize(const fltSemantics *ourSemantics)
229 semantics = ourSemantics;
232 significand.parts = new integerPart[count];
236 APFloat::freeSignificand()
239 delete [] significand.parts;
243 APFloat::assign(const APFloat &rhs)
245 assert(semantics == rhs.semantics);
248 category = rhs.category;
249 exponent = rhs.exponent;
250 if(category == fcNormal || category == fcNaN)
251 copySignificand(rhs);
255 APFloat::copySignificand(const APFloat &rhs)
257 assert(category == fcNormal || category == fcNaN);
258 assert(rhs.partCount() >= partCount());
260 APInt::tcAssign(significandParts(), rhs.significandParts(),
265 APFloat::operator=(const APFloat &rhs)
268 if(semantics != rhs.semantics) {
270 initialize(rhs.semantics);
279 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
282 if (semantics != rhs.semantics ||
283 category != rhs.category ||
286 if (category==fcZero || category==fcInfinity)
288 else if (category==fcNormal && exponent!=rhs.exponent)
292 const integerPart* p=significandParts();
293 const integerPart* q=rhs.significandParts();
294 for (; i>0; i--, p++, q++) {
302 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
304 initialize(&ourSemantics);
307 exponent = ourSemantics.precision - 1;
308 significandParts()[0] = value;
309 normalize(rmNearestTiesToEven, lfExactlyZero);
312 APFloat::APFloat(const fltSemantics &ourSemantics,
313 fltCategory ourCategory, bool negative)
315 initialize(&ourSemantics);
316 category = ourCategory;
318 if(category == fcNormal)
322 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
324 initialize(&ourSemantics);
325 convertFromString(text, rmNearestTiesToEven);
328 APFloat::APFloat(const APFloat &rhs)
330 initialize(rhs.semantics);
340 APFloat::partCount() const
342 return partCountForBits(semantics->precision + 1);
346 APFloat::semanticsPrecision(const fltSemantics &semantics)
348 return semantics.precision;
352 APFloat::significandParts() const
354 return const_cast<APFloat *>(this)->significandParts();
358 APFloat::significandParts()
360 assert(category == fcNormal || category == fcNaN);
363 return significand.parts;
365 return &significand.part;
368 /* Combine the effect of two lost fractions. */
370 APFloat::combineLostFractions(lostFraction moreSignificant,
371 lostFraction lessSignificant)
373 if(lessSignificant != lfExactlyZero) {
374 if(moreSignificant == lfExactlyZero)
375 moreSignificant = lfLessThanHalf;
376 else if(moreSignificant == lfExactlyHalf)
377 moreSignificant = lfMoreThanHalf;
380 return moreSignificant;
384 APFloat::zeroSignificand()
387 APInt::tcSet(significandParts(), 0, partCount());
390 /* Increment an fcNormal floating point number's significand. */
392 APFloat::incrementSignificand()
396 carry = APInt::tcIncrement(significandParts(), partCount());
398 /* Our callers should never cause us to overflow. */
402 /* Add the significand of the RHS. Returns the carry flag. */
404 APFloat::addSignificand(const APFloat &rhs)
408 parts = significandParts();
410 assert(semantics == rhs.semantics);
411 assert(exponent == rhs.exponent);
413 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
416 /* Subtract the significand of the RHS with a borrow flag. Returns
419 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
423 parts = significandParts();
425 assert(semantics == rhs.semantics);
426 assert(exponent == rhs.exponent);
428 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
432 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
433 on to the full-precision result of the multiplication. Returns the
436 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
438 unsigned int omsb; // One, not zero, based MSB.
439 unsigned int partsCount, newPartsCount, precision;
440 integerPart *lhsSignificand;
441 integerPart scratch[4];
442 integerPart *fullSignificand;
443 lostFraction lost_fraction;
445 assert(semantics == rhs.semantics);
447 precision = semantics->precision;
448 newPartsCount = partCountForBits(precision * 2);
450 if(newPartsCount > 4)
451 fullSignificand = new integerPart[newPartsCount];
453 fullSignificand = scratch;
455 lhsSignificand = significandParts();
456 partsCount = partCount();
458 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
459 rhs.significandParts(), partsCount);
461 lost_fraction = lfExactlyZero;
462 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
463 exponent += rhs.exponent;
466 Significand savedSignificand = significand;
467 const fltSemantics *savedSemantics = semantics;
468 fltSemantics extendedSemantics;
470 unsigned int extendedPrecision;
472 /* Normalize our MSB. */
473 extendedPrecision = precision + precision - 1;
474 if(omsb != extendedPrecision)
476 APInt::tcShiftLeft(fullSignificand, newPartsCount,
477 extendedPrecision - omsb);
478 exponent -= extendedPrecision - omsb;
481 /* Create new semantics. */
482 extendedSemantics = *semantics;
483 extendedSemantics.precision = extendedPrecision;
485 if(newPartsCount == 1)
486 significand.part = fullSignificand[0];
488 significand.parts = fullSignificand;
489 semantics = &extendedSemantics;
491 APFloat extendedAddend(*addend);
492 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
493 assert(status == opOK);
494 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
496 /* Restore our state. */
497 if(newPartsCount == 1)
498 fullSignificand[0] = significand.part;
499 significand = savedSignificand;
500 semantics = savedSemantics;
502 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
505 exponent -= (precision - 1);
507 if(omsb > precision) {
508 unsigned int bits, significantParts;
511 bits = omsb - precision;
512 significantParts = partCountForBits(omsb);
513 lf = shiftRight(fullSignificand, significantParts, bits);
514 lost_fraction = combineLostFractions(lf, lost_fraction);
518 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
520 if(newPartsCount > 4)
521 delete [] fullSignificand;
523 return lost_fraction;
526 /* Multiply the significands of LHS and RHS to DST. */
528 APFloat::divideSignificand(const APFloat &rhs)
530 unsigned int bit, i, partsCount;
531 const integerPart *rhsSignificand;
532 integerPart *lhsSignificand, *dividend, *divisor;
533 integerPart scratch[4];
534 lostFraction lost_fraction;
536 assert(semantics == rhs.semantics);
538 lhsSignificand = significandParts();
539 rhsSignificand = rhs.significandParts();
540 partsCount = partCount();
543 dividend = new integerPart[partsCount * 2];
547 divisor = dividend + partsCount;
549 /* Copy the dividend and divisor as they will be modified in-place. */
550 for(i = 0; i < partsCount; i++) {
551 dividend[i] = lhsSignificand[i];
552 divisor[i] = rhsSignificand[i];
553 lhsSignificand[i] = 0;
556 exponent -= rhs.exponent;
558 unsigned int precision = semantics->precision;
560 /* Normalize the divisor. */
561 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
564 APInt::tcShiftLeft(divisor, partsCount, bit);
567 /* Normalize the dividend. */
568 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
571 APInt::tcShiftLeft(dividend, partsCount, bit);
574 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
576 APInt::tcShiftLeft(dividend, partsCount, 1);
577 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
581 for(bit = precision; bit; bit -= 1) {
582 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
583 APInt::tcSubtract(dividend, divisor, 0, partsCount);
584 APInt::tcSetBit(lhsSignificand, bit - 1);
587 APInt::tcShiftLeft(dividend, partsCount, 1);
590 /* Figure out the lost fraction. */
591 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
594 lost_fraction = lfMoreThanHalf;
596 lost_fraction = lfExactlyHalf;
597 else if(APInt::tcIsZero(dividend, partsCount))
598 lost_fraction = lfExactlyZero;
600 lost_fraction = lfLessThanHalf;
605 return lost_fraction;
609 APFloat::significandMSB() const
611 return APInt::tcMSB(significandParts(), partCount());
615 APFloat::significandLSB() const
617 return APInt::tcLSB(significandParts(), partCount());
620 /* Note that a zero result is NOT normalized to fcZero. */
622 APFloat::shiftSignificandRight(unsigned int bits)
624 /* Our exponent should not overflow. */
625 assert((exponent_t) (exponent + bits) >= exponent);
629 return shiftRight(significandParts(), partCount(), bits);
632 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
634 APFloat::shiftSignificandLeft(unsigned int bits)
636 assert(bits < semantics->precision);
639 unsigned int partsCount = partCount();
641 APInt::tcShiftLeft(significandParts(), partsCount, bits);
644 assert(!APInt::tcIsZero(significandParts(), partsCount));
649 APFloat::compareAbsoluteValue(const APFloat &rhs) const
653 assert(semantics == rhs.semantics);
654 assert(category == fcNormal);
655 assert(rhs.category == fcNormal);
657 compare = exponent - rhs.exponent;
659 /* If exponents are equal, do an unsigned bignum comparison of the
662 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
666 return cmpGreaterThan;
673 /* Handle overflow. Sign is preserved. We either become infinity or
674 the largest finite number. */
676 APFloat::handleOverflow(roundingMode rounding_mode)
679 if(rounding_mode == rmNearestTiesToEven
680 || rounding_mode == rmNearestTiesToAway
681 || (rounding_mode == rmTowardPositive && !sign)
682 || (rounding_mode == rmTowardNegative && sign))
684 category = fcInfinity;
685 return (opStatus) (opOverflow | opInexact);
688 /* Otherwise we become the largest finite number. */
690 exponent = semantics->maxExponent;
691 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
692 semantics->precision);
697 /* This routine must work for fcZero of both signs, and fcNormal
700 APFloat::roundAwayFromZero(roundingMode rounding_mode,
701 lostFraction lost_fraction)
703 /* NaNs and infinities should not have lost fractions. */
704 assert(category == fcNormal || category == fcZero);
706 /* Our caller has already handled this case. */
707 assert(lost_fraction != lfExactlyZero);
709 switch(rounding_mode) {
713 case rmNearestTiesToAway:
714 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
716 case rmNearestTiesToEven:
717 if(lost_fraction == lfMoreThanHalf)
720 /* Our zeroes don't have a significand to test. */
721 if(lost_fraction == lfExactlyHalf && category != fcZero)
722 return significandParts()[0] & 1;
729 case rmTowardPositive:
730 return sign == false;
732 case rmTowardNegative:
738 APFloat::normalize(roundingMode rounding_mode,
739 lostFraction lost_fraction)
741 unsigned int omsb; /* One, not zero, based MSB. */
744 if(category != fcNormal)
747 /* Before rounding normalize the exponent of fcNormal numbers. */
748 omsb = significandMSB() + 1;
751 /* OMSB is numbered from 1. We want to place it in the integer
752 bit numbered PRECISON if possible, with a compensating change in
754 exponentChange = omsb - semantics->precision;
756 /* If the resulting exponent is too high, overflow according to
757 the rounding mode. */
758 if(exponent + exponentChange > semantics->maxExponent)
759 return handleOverflow(rounding_mode);
761 /* Subnormal numbers have exponent minExponent, and their MSB
762 is forced based on that. */
763 if(exponent + exponentChange < semantics->minExponent)
764 exponentChange = semantics->minExponent - exponent;
766 /* Shifting left is easy as we don't lose precision. */
767 if(exponentChange < 0) {
768 assert(lost_fraction == lfExactlyZero);
770 shiftSignificandLeft(-exponentChange);
775 if(exponentChange > 0) {
778 /* Shift right and capture any new lost fraction. */
779 lf = shiftSignificandRight(exponentChange);
781 lost_fraction = combineLostFractions(lf, lost_fraction);
783 /* Keep OMSB up-to-date. */
784 if(omsb > (unsigned) exponentChange)
785 omsb -= (unsigned) exponentChange;
791 /* Now round the number according to rounding_mode given the lost
794 /* As specified in IEEE 754, since we do not trap we do not report
795 underflow for exact results. */
796 if(lost_fraction == lfExactlyZero) {
797 /* Canonicalize zeroes. */
804 /* Increment the significand if we're rounding away from zero. */
805 if(roundAwayFromZero(rounding_mode, lost_fraction)) {
807 exponent = semantics->minExponent;
809 incrementSignificand();
810 omsb = significandMSB() + 1;
812 /* Did the significand increment overflow? */
813 if(omsb == (unsigned) semantics->precision + 1) {
814 /* Renormalize by incrementing the exponent and shifting our
815 significand right one. However if we already have the
816 maximum exponent we overflow to infinity. */
817 if(exponent == semantics->maxExponent) {
818 category = fcInfinity;
820 return (opStatus) (opOverflow | opInexact);
823 shiftSignificandRight(1);
829 /* The normal case - we were and are not denormal, and any
830 significand increment above didn't overflow. */
831 if(omsb == semantics->precision)
834 /* We have a non-zero denormal. */
835 assert(omsb < semantics->precision);
836 assert(exponent == semantics->minExponent);
838 /* Canonicalize zeroes. */
842 /* The fcZero case is a denormal that underflowed to zero. */
843 return (opStatus) (opUnderflow | opInexact);
847 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
849 switch(convolve(category, rhs.category)) {
853 case convolve(fcNaN, fcZero):
854 case convolve(fcNaN, fcNormal):
855 case convolve(fcNaN, fcInfinity):
856 case convolve(fcNaN, fcNaN):
857 case convolve(fcNormal, fcZero):
858 case convolve(fcInfinity, fcNormal):
859 case convolve(fcInfinity, fcZero):
862 case convolve(fcZero, fcNaN):
863 case convolve(fcNormal, fcNaN):
864 case convolve(fcInfinity, fcNaN):
866 copySignificand(rhs);
869 case convolve(fcNormal, fcInfinity):
870 case convolve(fcZero, fcInfinity):
871 category = fcInfinity;
872 sign = rhs.sign ^ subtract;
875 case convolve(fcZero, fcNormal):
877 sign = rhs.sign ^ subtract;
880 case convolve(fcZero, fcZero):
881 /* Sign depends on rounding mode; handled by caller. */
884 case convolve(fcInfinity, fcInfinity):
885 /* Differently signed infinities can only be validly
887 if(sign ^ rhs.sign != subtract) {
889 // Arbitrary but deterministic value for significand
890 APInt::tcSet(significandParts(), ~0U, partCount());
896 case convolve(fcNormal, fcNormal):
901 /* Add or subtract two normal numbers. */
903 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
906 lostFraction lost_fraction;
909 /* Determine if the operation on the absolute values is effectively
910 an addition or subtraction. */
911 subtract ^= (sign ^ rhs.sign);
913 /* Are we bigger exponent-wise than the RHS? */
914 bits = exponent - rhs.exponent;
916 /* Subtraction is more subtle than one might naively expect. */
918 APFloat temp_rhs(rhs);
922 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
923 lost_fraction = lfExactlyZero;
924 } else if (bits > 0) {
925 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
926 shiftSignificandLeft(1);
929 lost_fraction = shiftSignificandRight(-bits - 1);
930 temp_rhs.shiftSignificandLeft(1);
935 carry = temp_rhs.subtractSignificand
936 (*this, lost_fraction != lfExactlyZero);
937 copySignificand(temp_rhs);
940 carry = subtractSignificand
941 (temp_rhs, lost_fraction != lfExactlyZero);
944 /* Invert the lost fraction - it was on the RHS and
946 if(lost_fraction == lfLessThanHalf)
947 lost_fraction = lfMoreThanHalf;
948 else if(lost_fraction == lfMoreThanHalf)
949 lost_fraction = lfLessThanHalf;
951 /* The code above is intended to ensure that no borrow is
956 APFloat temp_rhs(rhs);
958 lost_fraction = temp_rhs.shiftSignificandRight(bits);
959 carry = addSignificand(temp_rhs);
961 lost_fraction = shiftSignificandRight(-bits);
962 carry = addSignificand(rhs);
965 /* We have a guard bit; generating a carry cannot happen. */
969 return lost_fraction;
973 APFloat::multiplySpecials(const APFloat &rhs)
975 switch(convolve(category, rhs.category)) {
979 case convolve(fcNaN, fcZero):
980 case convolve(fcNaN, fcNormal):
981 case convolve(fcNaN, fcInfinity):
982 case convolve(fcNaN, fcNaN):
985 case convolve(fcZero, fcNaN):
986 case convolve(fcNormal, fcNaN):
987 case convolve(fcInfinity, fcNaN):
989 copySignificand(rhs);
992 case convolve(fcNormal, fcInfinity):
993 case convolve(fcInfinity, fcNormal):
994 case convolve(fcInfinity, fcInfinity):
995 category = fcInfinity;
998 case convolve(fcZero, fcNormal):
999 case convolve(fcNormal, fcZero):
1000 case convolve(fcZero, fcZero):
1004 case convolve(fcZero, fcInfinity):
1005 case convolve(fcInfinity, fcZero):
1007 // Arbitrary but deterministic value for significand
1008 APInt::tcSet(significandParts(), ~0U, partCount());
1011 case convolve(fcNormal, fcNormal):
1017 APFloat::divideSpecials(const APFloat &rhs)
1019 switch(convolve(category, rhs.category)) {
1023 case convolve(fcNaN, fcZero):
1024 case convolve(fcNaN, fcNormal):
1025 case convolve(fcNaN, fcInfinity):
1026 case convolve(fcNaN, fcNaN):
1027 case convolve(fcInfinity, fcZero):
1028 case convolve(fcInfinity, fcNormal):
1029 case convolve(fcZero, fcInfinity):
1030 case convolve(fcZero, fcNormal):
1033 case convolve(fcZero, fcNaN):
1034 case convolve(fcNormal, fcNaN):
1035 case convolve(fcInfinity, fcNaN):
1037 copySignificand(rhs);
1040 case convolve(fcNormal, fcInfinity):
1044 case convolve(fcNormal, fcZero):
1045 category = fcInfinity;
1048 case convolve(fcInfinity, fcInfinity):
1049 case convolve(fcZero, fcZero):
1051 // Arbitrary but deterministic value for significand
1052 APInt::tcSet(significandParts(), ~0U, partCount());
1055 case convolve(fcNormal, fcNormal):
1062 APFloat::changeSign()
1064 /* Look mummy, this one's easy. */
1069 APFloat::clearSign()
1071 /* So is this one. */
1076 APFloat::copySign(const APFloat &rhs)
1082 /* Normalized addition or subtraction. */
1084 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1089 fs = addOrSubtractSpecials(rhs, subtract);
1091 /* This return code means it was not a simple case. */
1092 if(fs == opDivByZero) {
1093 lostFraction lost_fraction;
1095 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1096 fs = normalize(rounding_mode, lost_fraction);
1098 /* Can only be zero if we lost no fraction. */
1099 assert(category != fcZero || lost_fraction == lfExactlyZero);
1102 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1103 positive zero unless rounding to minus infinity, except that
1104 adding two like-signed zeroes gives that zero. */
1105 if(category == fcZero) {
1106 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1107 sign = (rounding_mode == rmTowardNegative);
1113 /* Normalized addition. */
1115 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1117 return addOrSubtract(rhs, rounding_mode, false);
1120 /* Normalized subtraction. */
1122 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1124 return addOrSubtract(rhs, rounding_mode, true);
1127 /* Normalized multiply. */
1129 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1134 fs = multiplySpecials(rhs);
1136 if(category == fcNormal) {
1137 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1138 fs = normalize(rounding_mode, lost_fraction);
1139 if(lost_fraction != lfExactlyZero)
1140 fs = (opStatus) (fs | opInexact);
1146 /* Normalized divide. */
1148 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1153 fs = divideSpecials(rhs);
1155 if(category == fcNormal) {
1156 lostFraction lost_fraction = divideSignificand(rhs);
1157 fs = normalize(rounding_mode, lost_fraction);
1158 if(lost_fraction != lfExactlyZero)
1159 fs = (opStatus) (fs | opInexact);
1165 /* Normalized remainder. */
1167 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1171 unsigned int origSign = sign;
1172 fs = V.divide(rhs, rmNearestTiesToEven);
1173 if (fs == opDivByZero)
1176 int parts = partCount();
1177 integerPart *x = new integerPart[parts];
1178 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1179 rmNearestTiesToEven);
1180 if (fs==opInvalidOp)
1183 fs = V.convertFromInteger(x, parts * integerPartWidth, true,
1184 rmNearestTiesToEven);
1185 assert(fs==opOK); // should always work
1187 fs = V.multiply(rhs, rounding_mode);
1188 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1190 fs = subtract(V, rounding_mode);
1191 assert(fs==opOK || fs==opInexact); // likewise
1194 sign = origSign; // IEEE754 requires this
1199 /* Normalized fused-multiply-add. */
1201 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1202 const APFloat &addend,
1203 roundingMode rounding_mode)
1207 /* Post-multiplication sign, before addition. */
1208 sign ^= multiplicand.sign;
1210 /* If and only if all arguments are normal do we need to do an
1211 extended-precision calculation. */
1212 if(category == fcNormal
1213 && multiplicand.category == fcNormal
1214 && addend.category == fcNormal) {
1215 lostFraction lost_fraction;
1217 lost_fraction = multiplySignificand(multiplicand, &addend);
1218 fs = normalize(rounding_mode, lost_fraction);
1219 if(lost_fraction != lfExactlyZero)
1220 fs = (opStatus) (fs | opInexact);
1222 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1223 positive zero unless rounding to minus infinity, except that
1224 adding two like-signed zeroes gives that zero. */
1225 if(category == fcZero && sign != addend.sign)
1226 sign = (rounding_mode == rmTowardNegative);
1228 fs = multiplySpecials(multiplicand);
1230 /* FS can only be opOK or opInvalidOp. There is no more work
1231 to do in the latter case. The IEEE-754R standard says it is
1232 implementation-defined in this case whether, if ADDEND is a
1233 quiet NaN, we raise invalid op; this implementation does so.
1235 If we need to do the addition we can do so with normal
1238 fs = addOrSubtract(addend, rounding_mode, false);
1244 /* Comparison requires normalized numbers. */
1246 APFloat::compare(const APFloat &rhs) const
1250 assert(semantics == rhs.semantics);
1252 switch(convolve(category, rhs.category)) {
1256 case convolve(fcNaN, fcZero):
1257 case convolve(fcNaN, fcNormal):
1258 case convolve(fcNaN, fcInfinity):
1259 case convolve(fcNaN, fcNaN):
1260 case convolve(fcZero, fcNaN):
1261 case convolve(fcNormal, fcNaN):
1262 case convolve(fcInfinity, fcNaN):
1263 return cmpUnordered;
1265 case convolve(fcInfinity, fcNormal):
1266 case convolve(fcInfinity, fcZero):
1267 case convolve(fcNormal, fcZero):
1271 return cmpGreaterThan;
1273 case convolve(fcNormal, fcInfinity):
1274 case convolve(fcZero, fcInfinity):
1275 case convolve(fcZero, fcNormal):
1277 return cmpGreaterThan;
1281 case convolve(fcInfinity, fcInfinity):
1282 if(sign == rhs.sign)
1287 return cmpGreaterThan;
1289 case convolve(fcZero, fcZero):
1292 case convolve(fcNormal, fcNormal):
1296 /* Two normal numbers. Do they have the same sign? */
1297 if(sign != rhs.sign) {
1299 result = cmpLessThan;
1301 result = cmpGreaterThan;
1303 /* Compare absolute values; invert result if negative. */
1304 result = compareAbsoluteValue(rhs);
1307 if(result == cmpLessThan)
1308 result = cmpGreaterThan;
1309 else if(result == cmpGreaterThan)
1310 result = cmpLessThan;
1318 APFloat::convert(const fltSemantics &toSemantics,
1319 roundingMode rounding_mode)
1321 lostFraction lostFraction;
1322 unsigned int newPartCount, oldPartCount;
1325 lostFraction = lfExactlyZero;
1326 newPartCount = partCountForBits(toSemantics.precision + 1);
1327 oldPartCount = partCount();
1329 /* Handle storage complications. If our new form is wider,
1330 re-allocate our bit pattern into wider storage. If it is
1331 narrower, we ignore the excess parts, but if narrowing to a
1332 single part we need to free the old storage.
1333 Be careful not to reference significandParts for zeroes
1334 and infinities, since it aborts. */
1335 if (newPartCount > oldPartCount) {
1336 integerPart *newParts;
1337 newParts = new integerPart[newPartCount];
1338 APInt::tcSet(newParts, 0, newPartCount);
1339 if (category==fcNormal || category==fcNaN)
1340 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1342 significand.parts = newParts;
1343 } else if (newPartCount < oldPartCount) {
1344 /* Capture any lost fraction through truncation of parts so we get
1345 correct rounding whilst normalizing. */
1346 if (category==fcNormal)
1347 lostFraction = lostFractionThroughTruncation
1348 (significandParts(), oldPartCount, toSemantics.precision);
1349 if (newPartCount == 1) {
1350 integerPart newPart = 0;
1351 if (category==fcNormal || category==fcNaN)
1352 newPart = significandParts()[0];
1354 significand.part = newPart;
1358 if(category == fcNormal) {
1359 /* Re-interpret our bit-pattern. */
1360 exponent += toSemantics.precision - semantics->precision;
1361 semantics = &toSemantics;
1362 fs = normalize(rounding_mode, lostFraction);
1363 } else if (category == fcNaN) {
1364 int shift = toSemantics.precision - semantics->precision;
1365 // No normalization here, just truncate
1367 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1369 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1370 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1371 // does not give you back the same bits. This is dubious, and we
1372 // don't currently do it. You're really supposed to get
1373 // an invalid operation signal at runtime, but nobody does that.
1374 semantics = &toSemantics;
1377 semantics = &toSemantics;
1384 /* Convert a floating point number to an integer according to the
1385 rounding mode. If the rounded integer value is out of range this
1386 returns an invalid operation exception. If the rounded value is in
1387 range but the floating point number is not the exact integer, the C
1388 standard doesn't require an inexact exception to be raised. IEEE
1389 854 does require it so we do that.
1391 Note that for conversions to integer type the C standard requires
1392 round-to-zero to always be used. */
1394 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1396 roundingMode rounding_mode) const
1398 lostFraction lost_fraction;
1399 unsigned int msb, partsCount;
1402 partsCount = partCountForBits(width);
1404 /* Handle the three special cases first. We produce
1405 a deterministic result even for the Invalid cases. */
1406 if (category == fcNaN) {
1407 // Neither sign nor isSigned affects this.
1408 APInt::tcSet(parts, 0, partsCount);
1411 if (category == fcInfinity) {
1412 if (!sign && isSigned)
1413 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1414 else if (!sign && !isSigned)
1415 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1416 else if (sign && isSigned) {
1417 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1418 APInt::tcShiftLeft(parts, partsCount, width-1);
1419 } else // sign && !isSigned
1420 APInt::tcSet(parts, 0, partsCount);
1423 if (category == fcZero) {
1424 APInt::tcSet(parts, 0, partsCount);
1428 /* Shift the bit pattern so the fraction is lost. */
1431 bits = (int) semantics->precision - 1 - exponent;
1434 lost_fraction = tmp.shiftSignificandRight(bits);
1436 if (-bits >= semantics->precision) {
1437 // Unrepresentably large.
1438 if (!sign && isSigned)
1439 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1440 else if (!sign && !isSigned)
1441 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1442 else if (sign && isSigned) {
1443 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1444 APInt::tcShiftLeft(parts, partsCount, width-1);
1445 } else // sign && !isSigned
1446 APInt::tcSet(parts, 0, partsCount);
1447 return (opStatus)(opOverflow | opInexact);
1449 tmp.shiftSignificandLeft(-bits);
1450 lost_fraction = lfExactlyZero;
1453 if(lost_fraction != lfExactlyZero
1454 && tmp.roundAwayFromZero(rounding_mode, lost_fraction))
1455 tmp.incrementSignificand();
1457 msb = tmp.significandMSB();
1459 /* Negative numbers cannot be represented as unsigned. */
1460 if(!isSigned && tmp.sign && msb != -1U)
1463 /* It takes exponent + 1 bits to represent the truncated floating
1464 point number without its sign. We lose a bit for the sign, but
1465 the maximally negative integer is a special case. */
1466 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1469 if(isSigned && msb + 1 == width
1470 && (!tmp.sign || tmp.significandLSB() != msb))
1473 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1476 APInt::tcNegate(parts, partsCount);
1478 if(lost_fraction == lfExactlyZero)
1485 APFloat::convertFromUnsignedInteger(integerPart *parts,
1486 unsigned int partCount,
1487 roundingMode rounding_mode)
1489 unsigned int msb, precision;
1490 lostFraction lost_fraction;
1492 msb = APInt::tcMSB(parts, partCount) + 1;
1493 precision = semantics->precision;
1495 category = fcNormal;
1496 exponent = precision - 1;
1498 if(msb > precision) {
1499 exponent += (msb - precision);
1500 lost_fraction = shiftRight(parts, partCount, msb - precision);
1503 lost_fraction = lfExactlyZero;
1505 /* Copy the bit image. */
1507 APInt::tcAssign(significandParts(), parts, partCountForBits(msb));
1509 return normalize(rounding_mode, lost_fraction);
1513 APFloat::convertFromInteger(const integerPart *parts, unsigned int width,
1514 bool isSigned, roundingMode rounding_mode)
1516 unsigned int partCount = partCountForBits(width);
1518 APInt api = APInt(width, partCount, parts);
1519 integerPart *copy = new integerPart[partCount];
1522 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1527 APInt::tcAssign(copy, api.getRawData(), partCount);
1528 status = convertFromUnsignedInteger(copy, partCount, rounding_mode);
1533 APFloat::convertFromHexadecimalString(const char *p,
1534 roundingMode rounding_mode)
1536 lostFraction lost_fraction;
1537 integerPart *significand;
1538 unsigned int bitPos, partsCount;
1539 const char *dot, *firstSignificantDigit;
1543 category = fcNormal;
1545 significand = significandParts();
1546 partsCount = partCount();
1547 bitPos = partsCount * integerPartWidth;
1549 /* Skip leading zeroes and any(hexa)decimal point. */
1550 p = skipLeadingZeroesAndAnyDot(p, &dot);
1551 firstSignificantDigit = p;
1554 integerPart hex_value;
1561 hex_value = hexDigitValue(*p);
1562 if(hex_value == -1U) {
1563 lost_fraction = lfExactlyZero;
1569 /* Store the number whilst 4-bit nibbles remain. */
1572 hex_value <<= bitPos % integerPartWidth;
1573 significand[bitPos / integerPartWidth] |= hex_value;
1575 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1576 while(hexDigitValue(*p) != -1U)
1582 /* Hex floats require an exponent but not a hexadecimal point. */
1583 assert(*p == 'p' || *p == 'P');
1585 /* Ignore the exponent if we are zero. */
1586 if(p != firstSignificantDigit) {
1589 /* Implicit hexadecimal point? */
1593 /* Calculate the exponent adjustment implicit in the number of
1594 significant digits. */
1595 expAdjustment = dot - firstSignificantDigit;
1596 if(expAdjustment < 0)
1598 expAdjustment = expAdjustment * 4 - 1;
1600 /* Adjust for writing the significand starting at the most
1601 significant nibble. */
1602 expAdjustment += semantics->precision;
1603 expAdjustment -= partsCount * integerPartWidth;
1605 /* Adjust for the given exponent. */
1606 exponent = totalExponent(p, expAdjustment);
1609 return normalize(rounding_mode, lost_fraction);
1613 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
1615 /* Handle a leading minus sign. */
1621 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
1622 return convertFromHexadecimalString(p + 2, rounding_mode);
1624 assert(0 && "Decimal to binary conversions not yet implemented");
1628 // For good performance it is desirable for different APFloats
1629 // to produce different integers.
1631 APFloat::getHashValue() const
1633 if (category==fcZero) return sign<<8 | semantics->precision ;
1634 else if (category==fcInfinity) return sign<<9 | semantics->precision;
1635 else if (category==fcNaN) return 1<<10 | semantics->precision;
1637 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
1638 const integerPart* p = significandParts();
1639 for (int i=partCount(); i>0; i--, p++)
1640 hash ^= ((uint32_t)*p) ^ (*p)>>32;
1645 // Conversion from APFloat to/from host float/double. It may eventually be
1646 // possible to eliminate these and have everybody deal with APFloats, but that
1647 // will take a while. This approach will not easily extend to long double.
1648 // Current implementation requires integerPartWidth==64, which is correct at
1649 // the moment but could be made more general.
1651 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
1652 // the actual IEEE respresentations. We compensate for that here.
1655 APFloat::convertF80LongDoubleAPFloatToAPInt() const
1657 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
1658 assert (partCount()==2);
1660 uint64_t myexponent, mysignificand;
1662 if (category==fcNormal) {
1663 myexponent = exponent+16383; //bias
1664 mysignificand = significandParts()[0];
1665 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
1666 myexponent = 0; // denormal
1667 } else if (category==fcZero) {
1670 } else if (category==fcInfinity) {
1671 myexponent = 0x7fff;
1672 mysignificand = 0x8000000000000000ULL;
1673 } else if (category==fcNaN) {
1674 myexponent = 0x7fff;
1675 mysignificand = significandParts()[0];
1680 words[0] = (((uint64_t)sign & 1) << 63) |
1681 ((myexponent & 0x7fff) << 48) |
1682 ((mysignificand >>16) & 0xffffffffffffLL);
1683 words[1] = mysignificand & 0xffff;
1684 APInt api(80, 2, words);
1689 APFloat::convertDoubleAPFloatToAPInt() const
1691 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
1692 assert (partCount()==1);
1694 uint64_t myexponent, mysignificand;
1696 if (category==fcNormal) {
1697 myexponent = exponent+1023; //bias
1698 mysignificand = *significandParts();
1699 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
1700 myexponent = 0; // denormal
1701 } else if (category==fcZero) {
1704 } else if (category==fcInfinity) {
1707 } else if (category==fcNaN) {
1709 mysignificand = *significandParts();
1713 APInt api(64, (((((uint64_t)sign & 1) << 63) |
1714 ((myexponent & 0x7ff) << 52) |
1715 (mysignificand & 0xfffffffffffffLL))));
1720 APFloat::convertFloatAPFloatToAPInt() const
1722 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
1723 assert (partCount()==1);
1725 uint32_t myexponent, mysignificand;
1727 if (category==fcNormal) {
1728 myexponent = exponent+127; //bias
1729 mysignificand = *significandParts();
1730 if (myexponent == 1 && !(mysignificand & 0x400000))
1731 myexponent = 0; // denormal
1732 } else if (category==fcZero) {
1735 } else if (category==fcInfinity) {
1738 } else if (category==fcNaN) {
1740 mysignificand = *significandParts();
1744 APInt api(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
1745 (mysignificand & 0x7fffff)));
1750 APFloat::convertToAPInt() const
1752 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
1753 return convertFloatAPFloatToAPInt();
1754 else if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
1755 return convertDoubleAPFloatToAPInt();
1756 else if (semantics == (const llvm::fltSemantics* const)&x87DoubleExtended)
1757 return convertF80LongDoubleAPFloatToAPInt();
1764 APFloat::convertToFloat() const
1766 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
1767 APInt api = convertToAPInt();
1768 return api.bitsToFloat();
1772 APFloat::convertToDouble() const
1774 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
1775 APInt api = convertToAPInt();
1776 return api.bitsToDouble();
1779 /// Integer bit is explicit in this format. Current Intel book does not
1780 /// define meaning of:
1781 /// exponent = all 1's, integer bit not set.
1782 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
1783 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
1785 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
1787 assert(api.getBitWidth()==80);
1788 uint64_t i1 = api.getRawData()[0];
1789 uint64_t i2 = api.getRawData()[1];
1790 uint64_t myexponent = (i1 >> 48) & 0x7fff;
1791 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
1794 initialize(&APFloat::x87DoubleExtended);
1795 assert(partCount()==2);
1798 if (myexponent==0 && mysignificand==0) {
1799 // exponent, significand meaningless
1801 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
1802 // exponent, significand meaningless
1803 category = fcInfinity;
1804 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
1805 // exponent meaningless
1807 significandParts()[0] = mysignificand;
1808 significandParts()[1] = 0;
1810 category = fcNormal;
1811 exponent = myexponent - 16383;
1812 significandParts()[0] = mysignificand;
1813 significandParts()[1] = 0;
1814 if (myexponent==0) // denormal
1820 APFloat::initFromDoubleAPInt(const APInt &api)
1822 assert(api.getBitWidth()==64);
1823 uint64_t i = *api.getRawData();
1824 uint64_t myexponent = (i >> 52) & 0x7ff;
1825 uint64_t mysignificand = i & 0xfffffffffffffLL;
1827 initialize(&APFloat::IEEEdouble);
1828 assert(partCount()==1);
1831 if (myexponent==0 && mysignificand==0) {
1832 // exponent, significand meaningless
1834 } else if (myexponent==0x7ff && mysignificand==0) {
1835 // exponent, significand meaningless
1836 category = fcInfinity;
1837 } else if (myexponent==0x7ff && mysignificand!=0) {
1838 // exponent meaningless
1840 *significandParts() = mysignificand;
1842 category = fcNormal;
1843 exponent = myexponent - 1023;
1844 *significandParts() = mysignificand;
1845 if (myexponent==0) // denormal
1848 *significandParts() |= 0x10000000000000LL; // integer bit
1853 APFloat::initFromFloatAPInt(const APInt & api)
1855 assert(api.getBitWidth()==32);
1856 uint32_t i = (uint32_t)*api.getRawData();
1857 uint32_t myexponent = (i >> 23) & 0xff;
1858 uint32_t mysignificand = i & 0x7fffff;
1860 initialize(&APFloat::IEEEsingle);
1861 assert(partCount()==1);
1864 if (myexponent==0 && mysignificand==0) {
1865 // exponent, significand meaningless
1867 } else if (myexponent==0xff && mysignificand==0) {
1868 // exponent, significand meaningless
1869 category = fcInfinity;
1870 } else if (myexponent==0xff && mysignificand!=0) {
1871 // sign, exponent, significand meaningless
1873 *significandParts() = mysignificand;
1875 category = fcNormal;
1876 exponent = myexponent - 127; //bias
1877 *significandParts() = mysignificand;
1878 if (myexponent==0) // denormal
1881 *significandParts() |= 0x800000; // integer bit
1885 /// Treat api as containing the bits of a floating point number. Currently
1886 /// we infer the floating point type from the size of the APInt. FIXME: This
1887 /// breaks when we get to PPC128 and IEEE128 (but both cannot exist in the
1888 /// same compile...)
1890 APFloat::initFromAPInt(const APInt& api)
1892 if (api.getBitWidth() == 32)
1893 return initFromFloatAPInt(api);
1894 else if (api.getBitWidth()==64)
1895 return initFromDoubleAPInt(api);
1896 else if (api.getBitWidth()==80)
1897 return initFromF80LongDoubleAPInt(api);
1902 APFloat::APFloat(const APInt& api)
1907 APFloat::APFloat(float f)
1909 APInt api = APInt(32, 0);
1910 initFromAPInt(api.floatToBits(f));
1913 APFloat::APFloat(double d)
1915 APInt api = APInt(64, 0);
1916 initFromAPInt(api.doubleToBits(d));