1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/Support/MathExtras.h"
21 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
23 /* Assumed in hexadecimal significand parsing. */
24 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
28 /* Represents floating point arithmetic semantics. */
30 /* The largest E such that 2^E is representable; this matches the
31 definition of IEEE 754. */
32 exponent_t maxExponent;
34 /* The smallest E such that 2^E is a normalized number; this
35 matches the definition of IEEE 754. */
36 exponent_t minExponent;
38 /* Number of bits in the significand. This includes the integer
40 unsigned char precision;
42 /* If the target format has an implicit integer bit. */
43 bool implicitIntegerBit;
46 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
47 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
48 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
49 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false };
50 const fltSemantics APFloat::Bogus = { 0, 0, 0, false };
53 /* Put a bunch of private, handy routines in an anonymous namespace. */
57 partCountForBits(unsigned int bits)
59 return ((bits) + integerPartWidth - 1) / integerPartWidth;
63 digitValue(unsigned int c)
75 hexDigitValue (unsigned int c)
94 /* This is ugly and needs cleaning up, but I don't immediately see
95 how whilst remaining safe. */
97 totalExponent(const char *p, int exponentAdjustment)
99 integerPart unsignedExponent;
100 bool negative, overflow;
103 /* Move past the exponent letter and sign to the digits. */
105 negative = *p == '-';
106 if(*p == '-' || *p == '+')
109 unsignedExponent = 0;
114 value = digitValue(*p);
119 unsignedExponent = unsignedExponent * 10 + value;
120 if(unsignedExponent > 65535)
124 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
128 exponent = unsignedExponent;
130 exponent = -exponent;
131 exponent += exponentAdjustment;
132 if(exponent > 65535 || exponent < -65536)
137 exponent = negative ? -65536: 65535;
143 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
158 /* Return the trailing fraction of a hexadecimal number.
159 DIGITVALUE is the first hex digit of the fraction, P points to
162 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
164 unsigned int hexDigit;
166 /* If the first trailing digit isn't 0 or 8 we can work out the
167 fraction immediately. */
169 return lfMoreThanHalf;
170 else if(digitValue < 8 && digitValue > 0)
171 return lfLessThanHalf;
173 /* Otherwise we need to find the first non-zero digit. */
177 hexDigit = hexDigitValue(*p);
179 /* If we ran off the end it is exactly zero or one-half, otherwise
182 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
184 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
187 /* Return the fraction lost were a bignum truncated. */
189 lostFractionThroughTruncation(integerPart *parts,
190 unsigned int partCount,
195 lsb = APInt::tcLSB(parts, partCount);
197 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
199 return lfExactlyZero;
201 return lfExactlyHalf;
202 if(bits <= partCount * integerPartWidth
203 && APInt::tcExtractBit(parts, bits - 1))
204 return lfMoreThanHalf;
206 return lfLessThanHalf;
209 /* Shift DST right BITS bits noting lost fraction. */
211 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
213 lostFraction lost_fraction;
215 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
217 APInt::tcShiftRight(dst, parts, bits);
219 return lost_fraction;
225 APFloat::initialize(const fltSemantics *ourSemantics)
229 semantics = ourSemantics;
232 significand.parts = new integerPart[count];
236 APFloat::freeSignificand()
239 delete [] significand.parts;
243 APFloat::assign(const APFloat &rhs)
245 assert(semantics == rhs.semantics);
248 category = rhs.category;
249 exponent = rhs.exponent;
250 if(category == fcNormal || category == fcNaN)
251 copySignificand(rhs);
255 APFloat::copySignificand(const APFloat &rhs)
257 assert(category == fcNormal || category == fcNaN);
258 assert(rhs.partCount() >= partCount());
260 APInt::tcAssign(significandParts(), rhs.significandParts(),
265 APFloat::operator=(const APFloat &rhs)
268 if(semantics != rhs.semantics) {
270 initialize(rhs.semantics);
279 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
282 if (semantics != rhs.semantics ||
283 category != rhs.category ||
286 if (category==fcZero || category==fcInfinity)
288 else if (category==fcNormal && exponent!=rhs.exponent)
292 const integerPart* p=significandParts();
293 const integerPart* q=rhs.significandParts();
294 for (; i>0; i--, p++, q++) {
302 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
304 initialize(&ourSemantics);
307 exponent = ourSemantics.precision - 1;
308 significandParts()[0] = value;
309 normalize(rmNearestTiesToEven, lfExactlyZero);
312 APFloat::APFloat(const fltSemantics &ourSemantics,
313 fltCategory ourCategory, bool negative)
315 initialize(&ourSemantics);
316 category = ourCategory;
318 if(category == fcNormal)
322 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
324 initialize(&ourSemantics);
325 convertFromString(text, rmNearestTiesToEven);
328 APFloat::APFloat(const APFloat &rhs)
330 initialize(rhs.semantics);
340 APFloat::partCount() const
342 return partCountForBits(semantics->precision +
343 semantics->implicitIntegerBit ? 1 : 0);
347 APFloat::semanticsPrecision(const fltSemantics &semantics)
349 return semantics.precision;
353 APFloat::significandParts() const
355 return const_cast<APFloat *>(this)->significandParts();
359 APFloat::significandParts()
361 assert(category == fcNormal || category == fcNaN);
364 return significand.parts;
366 return &significand.part;
369 /* Combine the effect of two lost fractions. */
371 APFloat::combineLostFractions(lostFraction moreSignificant,
372 lostFraction lessSignificant)
374 if(lessSignificant != lfExactlyZero) {
375 if(moreSignificant == lfExactlyZero)
376 moreSignificant = lfLessThanHalf;
377 else if(moreSignificant == lfExactlyHalf)
378 moreSignificant = lfMoreThanHalf;
381 return moreSignificant;
385 APFloat::zeroSignificand()
388 APInt::tcSet(significandParts(), 0, partCount());
391 /* Increment an fcNormal floating point number's significand. */
393 APFloat::incrementSignificand()
397 carry = APInt::tcIncrement(significandParts(), partCount());
399 /* Our callers should never cause us to overflow. */
403 /* Add the significand of the RHS. Returns the carry flag. */
405 APFloat::addSignificand(const APFloat &rhs)
409 parts = significandParts();
411 assert(semantics == rhs.semantics);
412 assert(exponent == rhs.exponent);
414 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
417 /* Subtract the significand of the RHS with a borrow flag. Returns
420 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
424 parts = significandParts();
426 assert(semantics == rhs.semantics);
427 assert(exponent == rhs.exponent);
429 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
433 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
434 on to the full-precision result of the multiplication. Returns the
437 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
439 unsigned int omsb; // One, not zero, based MSB.
440 unsigned int partsCount, newPartsCount, precision;
441 integerPart *lhsSignificand;
442 integerPart scratch[4];
443 integerPart *fullSignificand;
444 lostFraction lost_fraction;
446 assert(semantics == rhs.semantics);
448 precision = semantics->precision;
449 newPartsCount = partCountForBits(precision * 2);
451 if(newPartsCount > 4)
452 fullSignificand = new integerPart[newPartsCount];
454 fullSignificand = scratch;
456 lhsSignificand = significandParts();
457 partsCount = partCount();
459 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
460 rhs.significandParts(), partsCount);
462 lost_fraction = lfExactlyZero;
463 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
464 exponent += rhs.exponent;
467 Significand savedSignificand = significand;
468 const fltSemantics *savedSemantics = semantics;
469 fltSemantics extendedSemantics;
471 unsigned int extendedPrecision;
473 /* Normalize our MSB. */
474 extendedPrecision = precision + precision - 1;
475 if(omsb != extendedPrecision)
477 APInt::tcShiftLeft(fullSignificand, newPartsCount,
478 extendedPrecision - omsb);
479 exponent -= extendedPrecision - omsb;
482 /* Create new semantics. */
483 extendedSemantics = *semantics;
484 extendedSemantics.precision = extendedPrecision;
486 if(newPartsCount == 1)
487 significand.part = fullSignificand[0];
489 significand.parts = fullSignificand;
490 semantics = &extendedSemantics;
492 APFloat extendedAddend(*addend);
493 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
494 assert(status == opOK);
495 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
497 /* Restore our state. */
498 if(newPartsCount == 1)
499 fullSignificand[0] = significand.part;
500 significand = savedSignificand;
501 semantics = savedSemantics;
503 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
506 exponent -= (precision - 1);
508 if(omsb > precision) {
509 unsigned int bits, significantParts;
512 bits = omsb - precision;
513 significantParts = partCountForBits(omsb);
514 lf = shiftRight(fullSignificand, significantParts, bits);
515 lost_fraction = combineLostFractions(lf, lost_fraction);
519 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
521 if(newPartsCount > 4)
522 delete [] fullSignificand;
524 return lost_fraction;
527 /* Multiply the significands of LHS and RHS to DST. */
529 APFloat::divideSignificand(const APFloat &rhs)
531 unsigned int bit, i, partsCount;
532 const integerPart *rhsSignificand;
533 integerPart *lhsSignificand, *dividend, *divisor;
534 integerPart scratch[4];
535 lostFraction lost_fraction;
537 assert(semantics == rhs.semantics);
539 lhsSignificand = significandParts();
540 rhsSignificand = rhs.significandParts();
541 partsCount = partCount();
544 dividend = new integerPart[partsCount * 2];
548 divisor = dividend + partsCount;
550 /* Copy the dividend and divisor as they will be modified in-place. */
551 for(i = 0; i < partsCount; i++) {
552 dividend[i] = lhsSignificand[i];
553 divisor[i] = rhsSignificand[i];
554 lhsSignificand[i] = 0;
557 exponent -= rhs.exponent;
559 unsigned int precision = semantics->precision;
561 /* Normalize the divisor. */
562 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
565 APInt::tcShiftLeft(divisor, partsCount, bit);
568 /* Normalize the dividend. */
569 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
572 APInt::tcShiftLeft(dividend, partsCount, bit);
575 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
577 APInt::tcShiftLeft(dividend, partsCount, 1);
578 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
582 for(bit = precision; bit; bit -= 1) {
583 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
584 APInt::tcSubtract(dividend, divisor, 0, partsCount);
585 APInt::tcSetBit(lhsSignificand, bit - 1);
588 APInt::tcShiftLeft(dividend, partsCount, 1);
591 /* Figure out the lost fraction. */
592 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
595 lost_fraction = lfMoreThanHalf;
597 lost_fraction = lfExactlyHalf;
598 else if(APInt::tcIsZero(dividend, partsCount))
599 lost_fraction = lfExactlyZero;
601 lost_fraction = lfLessThanHalf;
606 return lost_fraction;
610 APFloat::significandMSB() const
612 return APInt::tcMSB(significandParts(), partCount());
616 APFloat::significandLSB() const
618 return APInt::tcLSB(significandParts(), partCount());
621 /* Note that a zero result is NOT normalized to fcZero. */
623 APFloat::shiftSignificandRight(unsigned int bits)
625 /* Our exponent should not overflow. */
626 assert((exponent_t) (exponent + bits) >= exponent);
630 return shiftRight(significandParts(), partCount(), bits);
633 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
635 APFloat::shiftSignificandLeft(unsigned int bits)
637 assert(bits < semantics->precision);
640 unsigned int partsCount = partCount();
642 APInt::tcShiftLeft(significandParts(), partsCount, bits);
645 assert(!APInt::tcIsZero(significandParts(), partsCount));
650 APFloat::compareAbsoluteValue(const APFloat &rhs) const
654 assert(semantics == rhs.semantics);
655 assert(category == fcNormal);
656 assert(rhs.category == fcNormal);
658 compare = exponent - rhs.exponent;
660 /* If exponents are equal, do an unsigned bignum comparison of the
663 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
667 return cmpGreaterThan;
674 /* Handle overflow. Sign is preserved. We either become infinity or
675 the largest finite number. */
677 APFloat::handleOverflow(roundingMode rounding_mode)
680 if(rounding_mode == rmNearestTiesToEven
681 || rounding_mode == rmNearestTiesToAway
682 || (rounding_mode == rmTowardPositive && !sign)
683 || (rounding_mode == rmTowardNegative && sign))
685 category = fcInfinity;
686 return (opStatus) (opOverflow | opInexact);
689 /* Otherwise we become the largest finite number. */
691 exponent = semantics->maxExponent;
692 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
693 semantics->precision);
698 /* This routine must work for fcZero of both signs, and fcNormal
701 APFloat::roundAwayFromZero(roundingMode rounding_mode,
702 lostFraction lost_fraction)
704 /* NaNs and infinities should not have lost fractions. */
705 assert(category == fcNormal || category == fcZero);
707 /* Our caller has already handled this case. */
708 assert(lost_fraction != lfExactlyZero);
710 switch(rounding_mode) {
714 case rmNearestTiesToAway:
715 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
717 case rmNearestTiesToEven:
718 if(lost_fraction == lfMoreThanHalf)
721 /* Our zeroes don't have a significand to test. */
722 if(lost_fraction == lfExactlyHalf && category != fcZero)
723 return significandParts()[0] & 1;
730 case rmTowardPositive:
731 return sign == false;
733 case rmTowardNegative:
739 APFloat::normalize(roundingMode rounding_mode,
740 lostFraction lost_fraction)
742 unsigned int omsb; /* One, not zero, based MSB. */
745 if(category != fcNormal)
748 /* Before rounding normalize the exponent of fcNormal numbers. */
749 omsb = significandMSB() + 1;
752 /* OMSB is numbered from 1. We want to place it in the integer
753 bit numbered PRECISON if possible, with a compensating change in
755 exponentChange = omsb - semantics->precision;
757 /* If the resulting exponent is too high, overflow according to
758 the rounding mode. */
759 if(exponent + exponentChange > semantics->maxExponent)
760 return handleOverflow(rounding_mode);
762 /* Subnormal numbers have exponent minExponent, and their MSB
763 is forced based on that. */
764 if(exponent + exponentChange < semantics->minExponent)
765 exponentChange = semantics->minExponent - exponent;
767 /* Shifting left is easy as we don't lose precision. */
768 if(exponentChange < 0) {
769 assert(lost_fraction == lfExactlyZero);
771 shiftSignificandLeft(-exponentChange);
776 if(exponentChange > 0) {
779 /* Shift right and capture any new lost fraction. */
780 lf = shiftSignificandRight(exponentChange);
782 lost_fraction = combineLostFractions(lf, lost_fraction);
784 /* Keep OMSB up-to-date. */
785 if(omsb > (unsigned) exponentChange)
786 omsb -= (unsigned) exponentChange;
792 /* Now round the number according to rounding_mode given the lost
795 /* As specified in IEEE 754, since we do not trap we do not report
796 underflow for exact results. */
797 if(lost_fraction == lfExactlyZero) {
798 /* Canonicalize zeroes. */
805 /* Increment the significand if we're rounding away from zero. */
806 if(roundAwayFromZero(rounding_mode, lost_fraction)) {
808 exponent = semantics->minExponent;
810 incrementSignificand();
811 omsb = significandMSB() + 1;
813 /* Did the significand increment overflow? */
814 if(omsb == (unsigned) semantics->precision + 1) {
815 /* Renormalize by incrementing the exponent and shifting our
816 significand right one. However if we already have the
817 maximum exponent we overflow to infinity. */
818 if(exponent == semantics->maxExponent) {
819 category = fcInfinity;
821 return (opStatus) (opOverflow | opInexact);
824 shiftSignificandRight(1);
830 /* The normal case - we were and are not denormal, and any
831 significand increment above didn't overflow. */
832 if(omsb == semantics->precision)
835 /* We have a non-zero denormal. */
836 assert(omsb < semantics->precision);
837 assert(exponent == semantics->minExponent);
839 /* Canonicalize zeroes. */
843 /* The fcZero case is a denormal that underflowed to zero. */
844 return (opStatus) (opUnderflow | opInexact);
848 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
850 switch(convolve(category, rhs.category)) {
854 case convolve(fcNaN, fcZero):
855 case convolve(fcNaN, fcNormal):
856 case convolve(fcNaN, fcInfinity):
857 case convolve(fcNaN, fcNaN):
858 case convolve(fcNormal, fcZero):
859 case convolve(fcInfinity, fcNormal):
860 case convolve(fcInfinity, fcZero):
863 case convolve(fcZero, fcNaN):
864 case convolve(fcNormal, fcNaN):
865 case convolve(fcInfinity, fcNaN):
867 copySignificand(rhs);
870 case convolve(fcNormal, fcInfinity):
871 case convolve(fcZero, fcInfinity):
872 category = fcInfinity;
873 sign = rhs.sign ^ subtract;
876 case convolve(fcZero, fcNormal):
878 sign = rhs.sign ^ subtract;
881 case convolve(fcZero, fcZero):
882 /* Sign depends on rounding mode; handled by caller. */
885 case convolve(fcInfinity, fcInfinity):
886 /* Differently signed infinities can only be validly
888 if(sign ^ rhs.sign != subtract) {
890 // Arbitrary but deterministic value for significand
891 APInt::tcSet(significandParts(), ~0U, partCount());
897 case convolve(fcNormal, fcNormal):
902 /* Add or subtract two normal numbers. */
904 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
907 lostFraction lost_fraction;
910 /* Determine if the operation on the absolute values is effectively
911 an addition or subtraction. */
912 subtract ^= (sign ^ rhs.sign);
914 /* Are we bigger exponent-wise than the RHS? */
915 bits = exponent - rhs.exponent;
917 /* Subtraction is more subtle than one might naively expect. */
919 APFloat temp_rhs(rhs);
923 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
924 lost_fraction = lfExactlyZero;
925 } else if (bits > 0) {
926 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
927 shiftSignificandLeft(1);
930 lost_fraction = shiftSignificandRight(-bits - 1);
931 temp_rhs.shiftSignificandLeft(1);
936 carry = temp_rhs.subtractSignificand
937 (*this, lost_fraction != lfExactlyZero);
938 copySignificand(temp_rhs);
941 carry = subtractSignificand
942 (temp_rhs, lost_fraction != lfExactlyZero);
945 /* Invert the lost fraction - it was on the RHS and
947 if(lost_fraction == lfLessThanHalf)
948 lost_fraction = lfMoreThanHalf;
949 else if(lost_fraction == lfMoreThanHalf)
950 lost_fraction = lfLessThanHalf;
952 /* The code above is intended to ensure that no borrow is
957 APFloat temp_rhs(rhs);
959 lost_fraction = temp_rhs.shiftSignificandRight(bits);
960 carry = addSignificand(temp_rhs);
962 lost_fraction = shiftSignificandRight(-bits);
963 carry = addSignificand(rhs);
966 /* We have a guard bit; generating a carry cannot happen. */
970 return lost_fraction;
974 APFloat::multiplySpecials(const APFloat &rhs)
976 switch(convolve(category, rhs.category)) {
980 case convolve(fcNaN, fcZero):
981 case convolve(fcNaN, fcNormal):
982 case convolve(fcNaN, fcInfinity):
983 case convolve(fcNaN, fcNaN):
986 case convolve(fcZero, fcNaN):
987 case convolve(fcNormal, fcNaN):
988 case convolve(fcInfinity, fcNaN):
990 copySignificand(rhs);
993 case convolve(fcNormal, fcInfinity):
994 case convolve(fcInfinity, fcNormal):
995 case convolve(fcInfinity, fcInfinity):
996 category = fcInfinity;
999 case convolve(fcZero, fcNormal):
1000 case convolve(fcNormal, fcZero):
1001 case convolve(fcZero, fcZero):
1005 case convolve(fcZero, fcInfinity):
1006 case convolve(fcInfinity, fcZero):
1008 // Arbitrary but deterministic value for significand
1009 APInt::tcSet(significandParts(), ~0U, partCount());
1012 case convolve(fcNormal, fcNormal):
1018 APFloat::divideSpecials(const APFloat &rhs)
1020 switch(convolve(category, rhs.category)) {
1024 case convolve(fcNaN, fcZero):
1025 case convolve(fcNaN, fcNormal):
1026 case convolve(fcNaN, fcInfinity):
1027 case convolve(fcNaN, fcNaN):
1028 case convolve(fcInfinity, fcZero):
1029 case convolve(fcInfinity, fcNormal):
1030 case convolve(fcZero, fcInfinity):
1031 case convolve(fcZero, fcNormal):
1034 case convolve(fcZero, fcNaN):
1035 case convolve(fcNormal, fcNaN):
1036 case convolve(fcInfinity, fcNaN):
1038 copySignificand(rhs);
1041 case convolve(fcNormal, fcInfinity):
1045 case convolve(fcNormal, fcZero):
1046 category = fcInfinity;
1049 case convolve(fcInfinity, fcInfinity):
1050 case convolve(fcZero, fcZero):
1052 // Arbitrary but deterministic value for significand
1053 APInt::tcSet(significandParts(), ~0U, partCount());
1056 case convolve(fcNormal, fcNormal):
1063 APFloat::changeSign()
1065 /* Look mummy, this one's easy. */
1070 APFloat::clearSign()
1072 /* So is this one. */
1077 APFloat::copySign(const APFloat &rhs)
1083 /* Normalized addition or subtraction. */
1085 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1090 fs = addOrSubtractSpecials(rhs, subtract);
1092 /* This return code means it was not a simple case. */
1093 if(fs == opDivByZero) {
1094 lostFraction lost_fraction;
1096 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1097 fs = normalize(rounding_mode, lost_fraction);
1099 /* Can only be zero if we lost no fraction. */
1100 assert(category != fcZero || lost_fraction == lfExactlyZero);
1103 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1104 positive zero unless rounding to minus infinity, except that
1105 adding two like-signed zeroes gives that zero. */
1106 if(category == fcZero) {
1107 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1108 sign = (rounding_mode == rmTowardNegative);
1114 /* Normalized addition. */
1116 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1118 return addOrSubtract(rhs, rounding_mode, false);
1121 /* Normalized subtraction. */
1123 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1125 return addOrSubtract(rhs, rounding_mode, true);
1128 /* Normalized multiply. */
1130 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1135 fs = multiplySpecials(rhs);
1137 if(category == fcNormal) {
1138 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1139 fs = normalize(rounding_mode, lost_fraction);
1140 if(lost_fraction != lfExactlyZero)
1141 fs = (opStatus) (fs | opInexact);
1147 /* Normalized divide. */
1149 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1154 fs = divideSpecials(rhs);
1156 if(category == fcNormal) {
1157 lostFraction lost_fraction = divideSignificand(rhs);
1158 fs = normalize(rounding_mode, lost_fraction);
1159 if(lost_fraction != lfExactlyZero)
1160 fs = (opStatus) (fs | opInexact);
1166 /* Normalized remainder. */
1168 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1172 unsigned int origSign = sign;
1173 fs = V.divide(rhs, rmNearestTiesToEven);
1174 if (fs == opDivByZero)
1177 int parts = partCount();
1178 integerPart *x = new integerPart[parts];
1179 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1180 rmNearestTiesToEven);
1181 if (fs==opInvalidOp)
1184 fs = V.convertFromInteger(x, parts, true, rmNearestTiesToEven);
1185 assert(fs==opOK); // should always work
1187 fs = V.multiply(rhs, rounding_mode);
1188 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1190 fs = subtract(V, rounding_mode);
1191 assert(fs==opOK || fs==opInexact); // likewise
1194 sign = origSign; // IEEE754 requires this
1199 /* Normalized fused-multiply-add. */
1201 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1202 const APFloat &addend,
1203 roundingMode rounding_mode)
1207 /* Post-multiplication sign, before addition. */
1208 sign ^= multiplicand.sign;
1210 /* If and only if all arguments are normal do we need to do an
1211 extended-precision calculation. */
1212 if(category == fcNormal
1213 && multiplicand.category == fcNormal
1214 && addend.category == fcNormal) {
1215 lostFraction lost_fraction;
1217 lost_fraction = multiplySignificand(multiplicand, &addend);
1218 fs = normalize(rounding_mode, lost_fraction);
1219 if(lost_fraction != lfExactlyZero)
1220 fs = (opStatus) (fs | opInexact);
1222 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1223 positive zero unless rounding to minus infinity, except that
1224 adding two like-signed zeroes gives that zero. */
1225 if(category == fcZero && sign != addend.sign)
1226 sign = (rounding_mode == rmTowardNegative);
1228 fs = multiplySpecials(multiplicand);
1230 /* FS can only be opOK or opInvalidOp. There is no more work
1231 to do in the latter case. The IEEE-754R standard says it is
1232 implementation-defined in this case whether, if ADDEND is a
1233 quiet NaN, we raise invalid op; this implementation does so.
1235 If we need to do the addition we can do so with normal
1238 fs = addOrSubtract(addend, rounding_mode, false);
1244 /* Comparison requires normalized numbers. */
1246 APFloat::compare(const APFloat &rhs) const
1250 assert(semantics == rhs.semantics);
1252 switch(convolve(category, rhs.category)) {
1256 case convolve(fcNaN, fcZero):
1257 case convolve(fcNaN, fcNormal):
1258 case convolve(fcNaN, fcInfinity):
1259 case convolve(fcNaN, fcNaN):
1260 case convolve(fcZero, fcNaN):
1261 case convolve(fcNormal, fcNaN):
1262 case convolve(fcInfinity, fcNaN):
1263 return cmpUnordered;
1265 case convolve(fcInfinity, fcNormal):
1266 case convolve(fcInfinity, fcZero):
1267 case convolve(fcNormal, fcZero):
1271 return cmpGreaterThan;
1273 case convolve(fcNormal, fcInfinity):
1274 case convolve(fcZero, fcInfinity):
1275 case convolve(fcZero, fcNormal):
1277 return cmpGreaterThan;
1281 case convolve(fcInfinity, fcInfinity):
1282 if(sign == rhs.sign)
1287 return cmpGreaterThan;
1289 case convolve(fcZero, fcZero):
1292 case convolve(fcNormal, fcNormal):
1296 /* Two normal numbers. Do they have the same sign? */
1297 if(sign != rhs.sign) {
1299 result = cmpLessThan;
1301 result = cmpGreaterThan;
1303 /* Compare absolute values; invert result if negative. */
1304 result = compareAbsoluteValue(rhs);
1307 if(result == cmpLessThan)
1308 result = cmpGreaterThan;
1309 else if(result == cmpGreaterThan)
1310 result = cmpLessThan;
1318 APFloat::convert(const fltSemantics &toSemantics,
1319 roundingMode rounding_mode)
1321 unsigned int newPartCount;
1324 newPartCount = partCountForBits(toSemantics.precision + 1);
1326 /* If our new form is wider, re-allocate our bit pattern into wider
1328 if(newPartCount > partCount()) {
1329 integerPart *newParts;
1331 newParts = new integerPart[newPartCount];
1332 APInt::tcSet(newParts, 0, newPartCount);
1333 APInt::tcAssign(newParts, significandParts(), partCount());
1335 significand.parts = newParts;
1338 if(category == fcNormal) {
1339 /* Re-interpret our bit-pattern. */
1340 exponent += toSemantics.precision - semantics->precision;
1341 semantics = &toSemantics;
1342 fs = normalize(rounding_mode, lfExactlyZero);
1344 semantics = &toSemantics;
1351 /* Convert a floating point number to an integer according to the
1352 rounding mode. If the rounded integer value is out of range this
1353 returns an invalid operation exception. If the rounded value is in
1354 range but the floating point number is not the exact integer, the C
1355 standard doesn't require an inexact exception to be raised. IEEE
1356 854 does require it so we do that.
1358 Note that for conversions to integer type the C standard requires
1359 round-to-zero to always be used. */
1361 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1363 roundingMode rounding_mode) const
1365 lostFraction lost_fraction;
1366 unsigned int msb, partsCount;
1369 /* Handle the three special cases first. */
1370 if(category == fcInfinity || category == fcNaN)
1373 partsCount = partCountForBits(width);
1375 if(category == fcZero) {
1376 APInt::tcSet(parts, 0, partsCount);
1380 /* Shift the bit pattern so the fraction is lost. */
1383 bits = (int) semantics->precision - 1 - exponent;
1386 lost_fraction = tmp.shiftSignificandRight(bits);
1388 tmp.shiftSignificandLeft(-bits);
1389 lost_fraction = lfExactlyZero;
1392 if(lost_fraction != lfExactlyZero
1393 && tmp.roundAwayFromZero(rounding_mode, lost_fraction))
1394 tmp.incrementSignificand();
1396 msb = tmp.significandMSB();
1398 /* Negative numbers cannot be represented as unsigned. */
1399 if(!isSigned && tmp.sign && msb != -1U)
1402 /* It takes exponent + 1 bits to represent the truncated floating
1403 point number without its sign. We lose a bit for the sign, but
1404 the maximally negative integer is a special case. */
1405 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1408 if(isSigned && msb + 1 == width
1409 && (!tmp.sign || tmp.significandLSB() != msb))
1412 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1415 APInt::tcNegate(parts, partsCount);
1417 if(lost_fraction == lfExactlyZero)
1424 APFloat::convertFromUnsignedInteger(integerPart *parts,
1425 unsigned int partCount,
1426 roundingMode rounding_mode)
1428 unsigned int msb, precision;
1429 lostFraction lost_fraction;
1431 msb = APInt::tcMSB(parts, partCount) + 1;
1432 precision = semantics->precision;
1434 category = fcNormal;
1435 exponent = precision - 1;
1437 if(msb > precision) {
1438 exponent += (msb - precision);
1439 lost_fraction = shiftRight(parts, partCount, msb - precision);
1442 lost_fraction = lfExactlyZero;
1444 /* Copy the bit image. */
1446 APInt::tcAssign(significandParts(), parts, partCountForBits(msb));
1448 return normalize(rounding_mode, lost_fraction);
1452 APFloat::convertFromInteger(const integerPart *parts,
1453 unsigned int partCount, bool isSigned,
1454 roundingMode rounding_mode)
1460 copy = new integerPart[partCount];
1461 APInt::tcAssign(copy, parts, partCount);
1463 width = partCount * integerPartWidth;
1466 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1468 APInt::tcNegate(copy, partCount);
1471 status = convertFromUnsignedInteger(copy, partCount, rounding_mode);
1478 APFloat::convertFromHexadecimalString(const char *p,
1479 roundingMode rounding_mode)
1481 lostFraction lost_fraction;
1482 integerPart *significand;
1483 unsigned int bitPos, partsCount;
1484 const char *dot, *firstSignificantDigit;
1488 category = fcNormal;
1490 significand = significandParts();
1491 partsCount = partCount();
1492 bitPos = partsCount * integerPartWidth;
1494 /* Skip leading zeroes and any(hexa)decimal point. */
1495 p = skipLeadingZeroesAndAnyDot(p, &dot);
1496 firstSignificantDigit = p;
1499 integerPart hex_value;
1506 hex_value = hexDigitValue(*p);
1507 if(hex_value == -1U) {
1508 lost_fraction = lfExactlyZero;
1514 /* Store the number whilst 4-bit nibbles remain. */
1517 hex_value <<= bitPos % integerPartWidth;
1518 significand[bitPos / integerPartWidth] |= hex_value;
1520 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1521 while(hexDigitValue(*p) != -1U)
1527 /* Hex floats require an exponent but not a hexadecimal point. */
1528 assert(*p == 'p' || *p == 'P');
1530 /* Ignore the exponent if we are zero. */
1531 if(p != firstSignificantDigit) {
1534 /* Implicit hexadecimal point? */
1538 /* Calculate the exponent adjustment implicit in the number of
1539 significant digits. */
1540 expAdjustment = dot - firstSignificantDigit;
1541 if(expAdjustment < 0)
1543 expAdjustment = expAdjustment * 4 - 1;
1545 /* Adjust for writing the significand starting at the most
1546 significant nibble. */
1547 expAdjustment += semantics->precision;
1548 expAdjustment -= partsCount * integerPartWidth;
1550 /* Adjust for the given exponent. */
1551 exponent = totalExponent(p, expAdjustment);
1554 return normalize(rounding_mode, lost_fraction);
1558 APFloat::convertFromString(const char *p, roundingMode rounding_mode) {
1559 /* Handle a leading minus sign. */
1565 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
1566 return convertFromHexadecimalString(p + 2, rounding_mode);
1568 assert(0 && "Decimal to binary conversions not yet implemented");
1572 // For good performance it is desirable for different APFloats
1573 // to produce different integers.
1575 APFloat::getHashValue() const {
1576 if (category==fcZero) return sign<<8 | semantics->precision ;
1577 else if (category==fcInfinity) return sign<<9 | semantics->precision;
1578 else if (category==fcNaN) return 1<<10 | semantics->precision;
1580 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
1581 const integerPart* p = significandParts();
1582 for (int i=partCount(); i>0; i--, p++)
1583 hash ^= ((uint32_t)*p) ^ (*p)>>32;
1588 // Conversion from APFloat to/from host float/double. It may eventually be
1589 // possible to eliminate these and have everybody deal with APFloats, but that
1590 // will take a while. This approach will not easily extend to long double.
1591 // Current implementation requires partCount()==1, which is correct at the
1592 // moment but could be made more general.
1594 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
1595 // the actual IEEE respresentation. We compensate for that here.
1598 APFloat::convertF80LongDoubleAPFloatToAPInt() const {
1599 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
1600 assert (partCount()==1);
1602 uint64_t myexponent, mysignificand;
1604 if (category==fcNormal) {
1605 myexponent = exponent+16383; //bias
1606 mysignificand = *significandParts();
1607 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
1608 myexponent = 0; // denormal
1609 } else if (category==fcZero) {
1612 } else if (category==fcInfinity) {
1613 myexponent = 0x7fff;
1614 mysignificand = 0x8000000000000000ULL;
1615 } else if (category==fcNaN) {
1616 myexponent = 0x7fff;
1617 mysignificand = *significandParts();
1622 words[0] = (((uint64_t)sign & 1) << 63) |
1623 ((myexponent & 0x7fff) << 48) |
1624 ((mysignificand >>16) & 0xffffffffffffLL);
1625 words[1] = mysignificand & 0xffff;
1626 APInt api(80, 2, words);
1631 APFloat::convertDoubleAPFloatToAPInt() const {
1632 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
1633 assert (partCount()==1);
1635 uint64_t myexponent, mysignificand;
1637 if (category==fcNormal) {
1638 myexponent = exponent+1023; //bias
1639 mysignificand = *significandParts();
1640 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
1641 myexponent = 0; // denormal
1642 } else if (category==fcZero) {
1645 } else if (category==fcInfinity) {
1648 } else if (category==fcNaN) {
1650 mysignificand = *significandParts();
1654 APInt api(64, (((((uint64_t)sign & 1) << 63) |
1655 ((myexponent & 0x7ff) << 52) |
1656 (mysignificand & 0xfffffffffffffLL))));
1661 APFloat::convertFloatAPFloatToAPInt() const {
1662 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
1663 assert (partCount()==1);
1665 uint32_t myexponent, mysignificand;
1667 if (category==fcNormal) {
1668 myexponent = exponent+127; //bias
1669 mysignificand = *significandParts();
1670 if (myexponent == 1 && !(mysignificand & 0x400000))
1671 myexponent = 0; // denormal
1672 } else if (category==fcZero) {
1675 } else if (category==fcInfinity) {
1678 } else if (category==fcNaN) {
1680 mysignificand = *significandParts();
1684 APInt api(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
1685 (mysignificand & 0x7fffff)));
1690 APFloat::convertToAPInt() const {
1691 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
1692 return convertFloatAPFloatToAPInt();
1693 else if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
1694 return convertDoubleAPFloatToAPInt();
1695 else if (semantics == (const llvm::fltSemantics* const)&x87DoubleExtended)
1696 return convertF80LongDoubleAPFloatToAPInt();
1702 APFloat::convertToFloat() const {
1703 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
1704 APInt api = convertToAPInt();
1705 return api.bitsToFloat();
1709 APFloat::convertToDouble() const {
1710 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
1711 APInt api = convertToAPInt();
1712 return api.bitsToDouble();
1715 /// Integer bit is explicit in this format. Current Intel book does not
1716 /// define meaning of:
1717 /// exponent = all 1's, integer bit not set.
1718 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
1719 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
1721 APFloat::initFromF80LongDoubleAPInt(const APInt &api) {
1722 assert(api.getBitWidth()==80);
1723 uint64_t i1 = api.getRawData()[0];
1724 uint64_t i2 = api.getRawData()[1];
1725 uint64_t myexponent = (i1 >> 48) & 0x7fff;
1726 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
1729 initialize(&APFloat::x87DoubleExtended);
1730 assert(partCount()==1);
1733 if (myexponent==0 && mysignificand==0) {
1734 // exponent, significand meaningless
1736 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
1737 // exponent, significand meaningless
1738 category = fcInfinity;
1739 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
1740 // exponent meaningless
1742 *significandParts() = mysignificand;
1744 category = fcNormal;
1745 exponent = myexponent - 16383;
1746 *significandParts() = mysignificand;
1747 if (myexponent==0) // denormal
1753 APFloat::initFromDoubleAPInt(const APInt &api) {
1754 assert(api.getBitWidth()==64);
1755 uint64_t i = *api.getRawData();
1756 uint64_t myexponent = (i >> 52) & 0x7ff;
1757 uint64_t mysignificand = i & 0xfffffffffffffLL;
1759 initialize(&APFloat::IEEEdouble);
1760 assert(partCount()==1);
1763 if (myexponent==0 && mysignificand==0) {
1764 // exponent, significand meaningless
1766 } else if (myexponent==0x7ff && mysignificand==0) {
1767 // exponent, significand meaningless
1768 category = fcInfinity;
1769 } else if (myexponent==0x7ff && mysignificand!=0) {
1770 // exponent meaningless
1772 *significandParts() = mysignificand;
1774 category = fcNormal;
1775 exponent = myexponent - 1023;
1776 *significandParts() = mysignificand;
1777 if (myexponent==0) // denormal
1780 *significandParts() |= 0x10000000000000LL; // integer bit
1785 APFloat::initFromFloatAPInt(const APInt & api) {
1786 assert(api.getBitWidth()==32);
1787 uint32_t i = (uint32_t)*api.getRawData();
1788 uint32_t myexponent = (i >> 23) & 0xff;
1789 uint32_t mysignificand = i & 0x7fffff;
1791 initialize(&APFloat::IEEEsingle);
1792 assert(partCount()==1);
1795 if (myexponent==0 && mysignificand==0) {
1796 // exponent, significand meaningless
1798 } else if (myexponent==0xff && mysignificand==0) {
1799 // exponent, significand meaningless
1800 category = fcInfinity;
1801 } else if (myexponent==0xff && (mysignificand & 0x400000)) {
1802 // sign, exponent, significand meaningless
1804 *significandParts() = mysignificand;
1806 category = fcNormal;
1807 exponent = myexponent - 127; //bias
1808 *significandParts() = mysignificand;
1809 if (myexponent==0) // denormal
1812 *significandParts() |= 0x800000; // integer bit
1816 /// Treat api as containing the bits of a floating point number. Currently
1817 /// we infer the floating point type from the size of the APInt. FIXME: This
1818 /// breaks when we get to PPC128 and IEEE128 (but both cannot exist in the
1819 /// same compile...)
1821 APFloat::initFromAPInt(const APInt& api) {
1822 if (api.getBitWidth() == 32)
1823 return initFromFloatAPInt(api);
1824 else if (api.getBitWidth()==64)
1825 return initFromDoubleAPInt(api);
1826 else if (api.getBitWidth()==80)
1827 return initFromF80LongDoubleAPInt(api);
1832 APFloat::APFloat(const APInt& api) {
1836 APFloat::APFloat(float f) {
1837 APInt api = APInt(32, 0);
1838 initFromAPInt(api.floatToBits(f));
1841 APFloat::APFloat(double d) {
1842 APInt api = APInt(64, 0);
1843 initFromAPInt(api.doubleToBits(d));