1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/Support/MathExtras.h"
21 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
23 /* Assumed in hexadecimal significand parsing. */
24 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
28 /* Represents floating point arithmetic semantics. */
30 /* The largest E such that 2^E is representable; this matches the
31 definition of IEEE 754. */
32 exponent_t maxExponent;
34 /* The smallest E such that 2^E is a normalized number; this
35 matches the definition of IEEE 754. */
36 exponent_t minExponent;
38 /* Number of bits in the significand. This includes the integer
40 unsigned char precision;
42 /* If the target format has an implicit integer bit. */
43 bool implicitIntegerBit;
46 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
47 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
48 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
49 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false };
50 const fltSemantics APFloat::Bogus = { 0, 0, 0, false };
53 /* Put a bunch of private, handy routines in an anonymous namespace. */
57 partCountForBits(unsigned int bits)
59 return ((bits) + integerPartWidth - 1) / integerPartWidth;
63 digitValue(unsigned int c)
75 hexDigitValue (unsigned int c)
94 /* This is ugly and needs cleaning up, but I don't immediately see
95 how whilst remaining safe. */
97 totalExponent(const char *p, int exponentAdjustment)
99 integerPart unsignedExponent;
100 bool negative, overflow;
103 /* Move past the exponent letter and sign to the digits. */
105 negative = *p == '-';
106 if(*p == '-' || *p == '+')
109 unsignedExponent = 0;
114 value = digitValue(*p);
119 unsignedExponent = unsignedExponent * 10 + value;
120 if(unsignedExponent > 65535)
124 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
128 exponent = unsignedExponent;
130 exponent = -exponent;
131 exponent += exponentAdjustment;
132 if(exponent > 65535 || exponent < -65536)
137 exponent = negative ? -65536: 65535;
143 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
158 /* Return the trailing fraction of a hexadecimal number.
159 DIGITVALUE is the first hex digit of the fraction, P points to
162 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
164 unsigned int hexDigit;
166 /* If the first trailing digit isn't 0 or 8 we can work out the
167 fraction immediately. */
169 return lfMoreThanHalf;
170 else if(digitValue < 8 && digitValue > 0)
171 return lfLessThanHalf;
173 /* Otherwise we need to find the first non-zero digit. */
177 hexDigit = hexDigitValue(*p);
179 /* If we ran off the end it is exactly zero or one-half, otherwise
182 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
184 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
187 /* Return the fraction lost were a bignum truncated. */
189 lostFractionThroughTruncation(integerPart *parts,
190 unsigned int partCount,
195 lsb = APInt::tcLSB(parts, partCount);
197 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
199 return lfExactlyZero;
201 return lfExactlyHalf;
202 if(bits <= partCount * integerPartWidth
203 && APInt::tcExtractBit(parts, bits - 1))
204 return lfMoreThanHalf;
206 return lfLessThanHalf;
209 /* Shift DST right BITS bits noting lost fraction. */
211 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
213 lostFraction lost_fraction;
215 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
217 APInt::tcShiftRight(dst, parts, bits);
219 return lost_fraction;
225 APFloat::initialize(const fltSemantics *ourSemantics)
229 semantics = ourSemantics;
232 significand.parts = new integerPart[count];
236 APFloat::freeSignificand()
239 delete [] significand.parts;
243 APFloat::assign(const APFloat &rhs)
245 assert(semantics == rhs.semantics);
248 category = rhs.category;
249 exponent = rhs.exponent;
250 if(category == fcNormal)
251 copySignificand(rhs);
255 APFloat::copySignificand(const APFloat &rhs)
257 assert(category == fcNormal);
258 assert(rhs.partCount() >= partCount());
260 APInt::tcAssign(significandParts(), rhs.significandParts(),
265 APFloat::operator=(const APFloat &rhs)
268 if(semantics != rhs.semantics) {
270 initialize(rhs.semantics);
279 APFloat::operator==(const APFloat &rhs) const {
282 if (semantics != rhs.semantics ||
283 category != rhs.category)
285 if (category==fcQNaN)
287 else if (category==fcZero || category==fcInfinity)
288 return sign==rhs.sign;
290 if (sign!=rhs.sign || exponent!=rhs.exponent)
293 const integerPart* p=significandParts();
294 const integerPart* q=rhs.significandParts();
295 for (; i>0; i--, p++, q++) {
303 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
305 initialize(&ourSemantics);
308 exponent = ourSemantics.precision - 1;
309 significandParts()[0] = value;
310 normalize(rmNearestTiesToEven, lfExactlyZero);
313 APFloat::APFloat(const fltSemantics &ourSemantics,
314 fltCategory ourCategory, bool negative)
316 initialize(&ourSemantics);
317 category = ourCategory;
319 if(category == fcNormal)
323 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
325 initialize(&ourSemantics);
326 convertFromString(text, rmNearestTiesToEven);
329 APFloat::APFloat(const APFloat &rhs)
331 initialize(rhs.semantics);
341 APFloat::partCount() const
343 return partCountForBits(semantics->precision + 1);
347 APFloat::semanticsPrecision(const fltSemantics &semantics)
349 return semantics.precision;
353 APFloat::significandParts() const
355 return const_cast<APFloat *>(this)->significandParts();
359 APFloat::significandParts()
361 assert(category == fcNormal);
364 return significand.parts;
366 return &significand.part;
369 /* Combine the effect of two lost fractions. */
371 APFloat::combineLostFractions(lostFraction moreSignificant,
372 lostFraction lessSignificant)
374 if(lessSignificant != lfExactlyZero) {
375 if(moreSignificant == lfExactlyZero)
376 moreSignificant = lfLessThanHalf;
377 else if(moreSignificant == lfExactlyHalf)
378 moreSignificant = lfMoreThanHalf;
381 return moreSignificant;
385 APFloat::zeroSignificand()
388 APInt::tcSet(significandParts(), 0, partCount());
391 /* Increment an fcNormal floating point number's significand. */
393 APFloat::incrementSignificand()
397 carry = APInt::tcIncrement(significandParts(), partCount());
399 /* Our callers should never cause us to overflow. */
403 /* Add the significand of the RHS. Returns the carry flag. */
405 APFloat::addSignificand(const APFloat &rhs)
409 parts = significandParts();
411 assert(semantics == rhs.semantics);
412 assert(exponent == rhs.exponent);
414 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
417 /* Subtract the significand of the RHS with a borrow flag. Returns
420 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
424 parts = significandParts();
426 assert(semantics == rhs.semantics);
427 assert(exponent == rhs.exponent);
429 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
433 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
434 on to the full-precision result of the multiplication. Returns the
437 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
439 unsigned int omsb; // One, not zero, based MSB.
440 unsigned int partsCount, newPartsCount, precision;
441 integerPart *lhsSignificand;
442 integerPart scratch[4];
443 integerPart *fullSignificand;
444 lostFraction lost_fraction;
446 assert(semantics == rhs.semantics);
448 precision = semantics->precision;
449 newPartsCount = partCountForBits(precision * 2);
451 if(newPartsCount > 4)
452 fullSignificand = new integerPart[newPartsCount];
454 fullSignificand = scratch;
456 lhsSignificand = significandParts();
457 partsCount = partCount();
459 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
460 rhs.significandParts(), partsCount);
462 lost_fraction = lfExactlyZero;
463 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
464 exponent += rhs.exponent;
467 Significand savedSignificand = significand;
468 const fltSemantics *savedSemantics = semantics;
469 fltSemantics extendedSemantics;
471 unsigned int extendedPrecision;
473 /* Normalize our MSB. */
474 extendedPrecision = precision + precision - 1;
475 if(omsb != extendedPrecision)
477 APInt::tcShiftLeft(fullSignificand, newPartsCount,
478 extendedPrecision - omsb);
479 exponent -= extendedPrecision - omsb;
482 /* Create new semantics. */
483 extendedSemantics = *semantics;
484 extendedSemantics.precision = extendedPrecision;
486 if(newPartsCount == 1)
487 significand.part = fullSignificand[0];
489 significand.parts = fullSignificand;
490 semantics = &extendedSemantics;
492 APFloat extendedAddend(*addend);
493 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
494 assert(status == opOK);
495 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
497 /* Restore our state. */
498 if(newPartsCount == 1)
499 fullSignificand[0] = significand.part;
500 significand = savedSignificand;
501 semantics = savedSemantics;
503 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
506 exponent -= (precision - 1);
508 if(omsb > precision) {
509 unsigned int bits, significantParts;
512 bits = omsb - precision;
513 significantParts = partCountForBits(omsb);
514 lf = shiftRight(fullSignificand, significantParts, bits);
515 lost_fraction = combineLostFractions(lf, lost_fraction);
519 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
521 if(newPartsCount > 4)
522 delete [] fullSignificand;
524 return lost_fraction;
527 /* Multiply the significands of LHS and RHS to DST. */
529 APFloat::divideSignificand(const APFloat &rhs)
531 unsigned int bit, i, partsCount;
532 const integerPart *rhsSignificand;
533 integerPart *lhsSignificand, *dividend, *divisor;
534 integerPart scratch[4];
535 lostFraction lost_fraction;
537 assert(semantics == rhs.semantics);
539 lhsSignificand = significandParts();
540 rhsSignificand = rhs.significandParts();
541 partsCount = partCount();
544 dividend = new integerPart[partsCount * 2];
548 divisor = dividend + partsCount;
550 /* Copy the dividend and divisor as they will be modified in-place. */
551 for(i = 0; i < partsCount; i++) {
552 dividend[i] = lhsSignificand[i];
553 divisor[i] = rhsSignificand[i];
554 lhsSignificand[i] = 0;
557 exponent -= rhs.exponent;
559 unsigned int precision = semantics->precision;
561 /* Normalize the divisor. */
562 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
565 APInt::tcShiftLeft(divisor, partsCount, bit);
568 /* Normalize the dividend. */
569 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
572 APInt::tcShiftLeft(dividend, partsCount, bit);
575 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
577 APInt::tcShiftLeft(dividend, partsCount, 1);
578 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
582 for(bit = precision; bit; bit -= 1) {
583 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
584 APInt::tcSubtract(dividend, divisor, 0, partsCount);
585 APInt::tcSetBit(lhsSignificand, bit - 1);
588 APInt::tcShiftLeft(dividend, partsCount, 1);
591 /* Figure out the lost fraction. */
592 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
595 lost_fraction = lfMoreThanHalf;
597 lost_fraction = lfExactlyHalf;
598 else if(APInt::tcIsZero(dividend, partsCount))
599 lost_fraction = lfExactlyZero;
601 lost_fraction = lfLessThanHalf;
606 return lost_fraction;
610 APFloat::significandMSB() const
612 return APInt::tcMSB(significandParts(), partCount());
616 APFloat::significandLSB() const
618 return APInt::tcLSB(significandParts(), partCount());
621 /* Note that a zero result is NOT normalized to fcZero. */
623 APFloat::shiftSignificandRight(unsigned int bits)
625 /* Our exponent should not overflow. */
626 assert((exponent_t) (exponent + bits) >= exponent);
630 return shiftRight(significandParts(), partCount(), bits);
633 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
635 APFloat::shiftSignificandLeft(unsigned int bits)
637 assert(bits < semantics->precision);
640 unsigned int partsCount = partCount();
642 APInt::tcShiftLeft(significandParts(), partsCount, bits);
645 assert(!APInt::tcIsZero(significandParts(), partsCount));
650 APFloat::compareAbsoluteValue(const APFloat &rhs) const
654 assert(semantics == rhs.semantics);
655 assert(category == fcNormal);
656 assert(rhs.category == fcNormal);
658 compare = exponent - rhs.exponent;
660 /* If exponents are equal, do an unsigned bignum comparison of the
663 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
667 return cmpGreaterThan;
674 /* Handle overflow. Sign is preserved. We either become infinity or
675 the largest finite number. */
677 APFloat::handleOverflow(roundingMode rounding_mode)
680 if(rounding_mode == rmNearestTiesToEven
681 || rounding_mode == rmNearestTiesToAway
682 || (rounding_mode == rmTowardPositive && !sign)
683 || (rounding_mode == rmTowardNegative && sign))
685 category = fcInfinity;
686 return (opStatus) (opOverflow | opInexact);
689 /* Otherwise we become the largest finite number. */
691 exponent = semantics->maxExponent;
692 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
693 semantics->precision);
698 /* This routine must work for fcZero of both signs, and fcNormal
701 APFloat::roundAwayFromZero(roundingMode rounding_mode,
702 lostFraction lost_fraction)
704 /* QNaNs and infinities should not have lost fractions. */
705 assert(category == fcNormal || category == fcZero);
707 /* Our caller has already handled this case. */
708 assert(lost_fraction != lfExactlyZero);
710 switch(rounding_mode) {
714 case rmNearestTiesToAway:
715 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
717 case rmNearestTiesToEven:
718 if(lost_fraction == lfMoreThanHalf)
721 /* Our zeroes don't have a significand to test. */
722 if(lost_fraction == lfExactlyHalf && category != fcZero)
723 return significandParts()[0] & 1;
730 case rmTowardPositive:
731 return sign == false;
733 case rmTowardNegative:
739 APFloat::normalize(roundingMode rounding_mode,
740 lostFraction lost_fraction)
742 unsigned int omsb; /* One, not zero, based MSB. */
745 if(category != fcNormal)
748 /* Before rounding normalize the exponent of fcNormal numbers. */
749 omsb = significandMSB() + 1;
752 /* OMSB is numbered from 1. We want to place it in the integer
753 bit numbered PRECISON if possible, with a compensating change in
755 exponentChange = omsb - semantics->precision;
757 /* If the resulting exponent is too high, overflow according to
758 the rounding mode. */
759 if(exponent + exponentChange > semantics->maxExponent)
760 return handleOverflow(rounding_mode);
762 /* Subnormal numbers have exponent minExponent, and their MSB
763 is forced based on that. */
764 if(exponent + exponentChange < semantics->minExponent)
765 exponentChange = semantics->minExponent - exponent;
767 /* Shifting left is easy as we don't lose precision. */
768 if(exponentChange < 0) {
769 assert(lost_fraction == lfExactlyZero);
771 shiftSignificandLeft(-exponentChange);
776 if(exponentChange > 0) {
779 /* Shift right and capture any new lost fraction. */
780 lf = shiftSignificandRight(exponentChange);
782 lost_fraction = combineLostFractions(lf, lost_fraction);
784 /* Keep OMSB up-to-date. */
785 if(omsb > (unsigned) exponentChange)
786 omsb -= (unsigned) exponentChange;
792 /* Now round the number according to rounding_mode given the lost
795 /* As specified in IEEE 754, since we do not trap we do not report
796 underflow for exact results. */
797 if(lost_fraction == lfExactlyZero) {
798 /* Canonicalize zeroes. */
805 /* Increment the significand if we're rounding away from zero. */
806 if(roundAwayFromZero(rounding_mode, lost_fraction)) {
808 exponent = semantics->minExponent;
810 incrementSignificand();
811 omsb = significandMSB() + 1;
813 /* Did the significand increment overflow? */
814 if(omsb == (unsigned) semantics->precision + 1) {
815 /* Renormalize by incrementing the exponent and shifting our
816 significand right one. However if we already have the
817 maximum exponent we overflow to infinity. */
818 if(exponent == semantics->maxExponent) {
819 category = fcInfinity;
821 return (opStatus) (opOverflow | opInexact);
824 shiftSignificandRight(1);
830 /* The normal case - we were and are not denormal, and any
831 significand increment above didn't overflow. */
832 if(omsb == semantics->precision)
835 /* We have a non-zero denormal. */
836 assert(omsb < semantics->precision);
837 assert(exponent == semantics->minExponent);
839 /* Canonicalize zeroes. */
843 /* The fcZero case is a denormal that underflowed to zero. */
844 return (opStatus) (opUnderflow | opInexact);
848 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
850 switch(convolve(category, rhs.category)) {
854 case convolve(fcQNaN, fcZero):
855 case convolve(fcQNaN, fcNormal):
856 case convolve(fcQNaN, fcInfinity):
857 case convolve(fcQNaN, fcQNaN):
858 case convolve(fcNormal, fcZero):
859 case convolve(fcInfinity, fcNormal):
860 case convolve(fcInfinity, fcZero):
863 case convolve(fcZero, fcQNaN):
864 case convolve(fcNormal, fcQNaN):
865 case convolve(fcInfinity, fcQNaN):
869 case convolve(fcNormal, fcInfinity):
870 case convolve(fcZero, fcInfinity):
871 category = fcInfinity;
872 sign = rhs.sign ^ subtract;
875 case convolve(fcZero, fcNormal):
877 sign = rhs.sign ^ subtract;
880 case convolve(fcZero, fcZero):
881 /* Sign depends on rounding mode; handled by caller. */
884 case convolve(fcInfinity, fcInfinity):
885 /* Differently signed infinities can only be validly
887 if(sign ^ rhs.sign != subtract) {
894 case convolve(fcNormal, fcNormal):
899 /* Add or subtract two normal numbers. */
901 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
904 lostFraction lost_fraction;
907 /* Determine if the operation on the absolute values is effectively
908 an addition or subtraction. */
909 subtract ^= (sign ^ rhs.sign);
911 /* Are we bigger exponent-wise than the RHS? */
912 bits = exponent - rhs.exponent;
914 /* Subtraction is more subtle than one might naively expect. */
916 APFloat temp_rhs(rhs);
920 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
921 lost_fraction = lfExactlyZero;
922 } else if (bits > 0) {
923 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
924 shiftSignificandLeft(1);
927 lost_fraction = shiftSignificandRight(-bits - 1);
928 temp_rhs.shiftSignificandLeft(1);
933 carry = temp_rhs.subtractSignificand
934 (*this, lost_fraction != lfExactlyZero);
935 copySignificand(temp_rhs);
938 carry = subtractSignificand
939 (temp_rhs, lost_fraction != lfExactlyZero);
942 /* Invert the lost fraction - it was on the RHS and
944 if(lost_fraction == lfLessThanHalf)
945 lost_fraction = lfMoreThanHalf;
946 else if(lost_fraction == lfMoreThanHalf)
947 lost_fraction = lfLessThanHalf;
949 /* The code above is intended to ensure that no borrow is
954 APFloat temp_rhs(rhs);
956 lost_fraction = temp_rhs.shiftSignificandRight(bits);
957 carry = addSignificand(temp_rhs);
959 lost_fraction = shiftSignificandRight(-bits);
960 carry = addSignificand(rhs);
963 /* We have a guard bit; generating a carry cannot happen. */
967 return lost_fraction;
971 APFloat::multiplySpecials(const APFloat &rhs)
973 switch(convolve(category, rhs.category)) {
977 case convolve(fcQNaN, fcZero):
978 case convolve(fcQNaN, fcNormal):
979 case convolve(fcQNaN, fcInfinity):
980 case convolve(fcQNaN, fcQNaN):
981 case convolve(fcZero, fcQNaN):
982 case convolve(fcNormal, fcQNaN):
983 case convolve(fcInfinity, fcQNaN):
987 case convolve(fcNormal, fcInfinity):
988 case convolve(fcInfinity, fcNormal):
989 case convolve(fcInfinity, fcInfinity):
990 category = fcInfinity;
993 case convolve(fcZero, fcNormal):
994 case convolve(fcNormal, fcZero):
995 case convolve(fcZero, fcZero):
999 case convolve(fcZero, fcInfinity):
1000 case convolve(fcInfinity, fcZero):
1004 case convolve(fcNormal, fcNormal):
1010 APFloat::divideSpecials(const APFloat &rhs)
1012 switch(convolve(category, rhs.category)) {
1016 case convolve(fcQNaN, fcZero):
1017 case convolve(fcQNaN, fcNormal):
1018 case convolve(fcQNaN, fcInfinity):
1019 case convolve(fcQNaN, fcQNaN):
1020 case convolve(fcInfinity, fcZero):
1021 case convolve(fcInfinity, fcNormal):
1022 case convolve(fcZero, fcInfinity):
1023 case convolve(fcZero, fcNormal):
1026 case convolve(fcZero, fcQNaN):
1027 case convolve(fcNormal, fcQNaN):
1028 case convolve(fcInfinity, fcQNaN):
1032 case convolve(fcNormal, fcInfinity):
1036 case convolve(fcNormal, fcZero):
1037 category = fcInfinity;
1040 case convolve(fcInfinity, fcInfinity):
1041 case convolve(fcZero, fcZero):
1045 case convolve(fcNormal, fcNormal):
1052 APFloat::changeSign()
1054 /* Look mummy, this one's easy. */
1058 /* Normalized addition or subtraction. */
1060 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1065 fs = addOrSubtractSpecials(rhs, subtract);
1067 /* This return code means it was not a simple case. */
1068 if(fs == opDivByZero) {
1069 lostFraction lost_fraction;
1071 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1072 fs = normalize(rounding_mode, lost_fraction);
1074 /* Can only be zero if we lost no fraction. */
1075 assert(category != fcZero || lost_fraction == lfExactlyZero);
1078 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1079 positive zero unless rounding to minus infinity, except that
1080 adding two like-signed zeroes gives that zero. */
1081 if(category == fcZero) {
1082 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1083 sign = (rounding_mode == rmTowardNegative);
1089 /* Normalized addition. */
1091 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1093 return addOrSubtract(rhs, rounding_mode, false);
1096 /* Normalized subtraction. */
1098 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1100 return addOrSubtract(rhs, rounding_mode, true);
1103 /* Normalized multiply. */
1105 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1110 fs = multiplySpecials(rhs);
1112 if(category == fcNormal) {
1113 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1114 fs = normalize(rounding_mode, lost_fraction);
1115 if(lost_fraction != lfExactlyZero)
1116 fs = (opStatus) (fs | opInexact);
1122 /* Normalized divide. */
1124 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1129 fs = divideSpecials(rhs);
1131 if(category == fcNormal) {
1132 lostFraction lost_fraction = divideSignificand(rhs);
1133 fs = normalize(rounding_mode, lost_fraction);
1134 if(lost_fraction != lfExactlyZero)
1135 fs = (opStatus) (fs | opInexact);
1141 /* Normalized fused-multiply-add. */
1143 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1144 const APFloat &addend,
1145 roundingMode rounding_mode)
1149 /* Post-multiplication sign, before addition. */
1150 sign ^= multiplicand.sign;
1152 /* If and only if all arguments are normal do we need to do an
1153 extended-precision calculation. */
1154 if(category == fcNormal
1155 && multiplicand.category == fcNormal
1156 && addend.category == fcNormal) {
1157 lostFraction lost_fraction;
1159 lost_fraction = multiplySignificand(multiplicand, &addend);
1160 fs = normalize(rounding_mode, lost_fraction);
1161 if(lost_fraction != lfExactlyZero)
1162 fs = (opStatus) (fs | opInexact);
1164 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1165 positive zero unless rounding to minus infinity, except that
1166 adding two like-signed zeroes gives that zero. */
1167 if(category == fcZero && sign != addend.sign)
1168 sign = (rounding_mode == rmTowardNegative);
1170 fs = multiplySpecials(multiplicand);
1172 /* FS can only be opOK or opInvalidOp. There is no more work
1173 to do in the latter case. The IEEE-754R standard says it is
1174 implementation-defined in this case whether, if ADDEND is a
1175 quiet QNaN, we raise invalid op; this implementation does so.
1177 If we need to do the addition we can do so with normal
1180 fs = addOrSubtract(addend, rounding_mode, false);
1186 /* Comparison requires normalized numbers. */
1188 APFloat::compare(const APFloat &rhs) const
1192 assert(semantics == rhs.semantics);
1194 switch(convolve(category, rhs.category)) {
1198 case convolve(fcQNaN, fcZero):
1199 case convolve(fcQNaN, fcNormal):
1200 case convolve(fcQNaN, fcInfinity):
1201 case convolve(fcQNaN, fcQNaN):
1202 case convolve(fcZero, fcQNaN):
1203 case convolve(fcNormal, fcQNaN):
1204 case convolve(fcInfinity, fcQNaN):
1205 return cmpUnordered;
1207 case convolve(fcInfinity, fcNormal):
1208 case convolve(fcInfinity, fcZero):
1209 case convolve(fcNormal, fcZero):
1213 return cmpGreaterThan;
1215 case convolve(fcNormal, fcInfinity):
1216 case convolve(fcZero, fcInfinity):
1217 case convolve(fcZero, fcNormal):
1219 return cmpGreaterThan;
1223 case convolve(fcInfinity, fcInfinity):
1224 if(sign == rhs.sign)
1229 return cmpGreaterThan;
1231 case convolve(fcZero, fcZero):
1234 case convolve(fcNormal, fcNormal):
1238 /* Two normal numbers. Do they have the same sign? */
1239 if(sign != rhs.sign) {
1241 result = cmpLessThan;
1243 result = cmpGreaterThan;
1245 /* Compare absolute values; invert result if negative. */
1246 result = compareAbsoluteValue(rhs);
1249 if(result == cmpLessThan)
1250 result = cmpGreaterThan;
1251 else if(result == cmpGreaterThan)
1252 result = cmpLessThan;
1260 APFloat::convert(const fltSemantics &toSemantics,
1261 roundingMode rounding_mode)
1263 unsigned int newPartCount;
1266 newPartCount = partCountForBits(toSemantics.precision + 1);
1268 /* If our new form is wider, re-allocate our bit pattern into wider
1270 if(newPartCount > partCount()) {
1271 integerPart *newParts;
1273 newParts = new integerPart[newPartCount];
1274 APInt::tcSet(newParts, 0, newPartCount);
1275 APInt::tcAssign(newParts, significandParts(), partCount());
1277 significand.parts = newParts;
1280 if(category == fcNormal) {
1281 /* Re-interpret our bit-pattern. */
1282 exponent += toSemantics.precision - semantics->precision;
1283 semantics = &toSemantics;
1284 fs = normalize(rounding_mode, lfExactlyZero);
1286 semantics = &toSemantics;
1293 /* Convert a floating point number to an integer according to the
1294 rounding mode. If the rounded integer value is out of range this
1295 returns an invalid operation exception. If the rounded value is in
1296 range but the floating point number is not the exact integer, the C
1297 standard doesn't require an inexact exception to be raised. IEEE
1298 854 does require it so we do that.
1300 Note that for conversions to integer type the C standard requires
1301 round-to-zero to always be used. */
1303 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1305 roundingMode rounding_mode) const
1307 lostFraction lost_fraction;
1308 unsigned int msb, partsCount;
1311 /* Handle the three special cases first. */
1312 if(category == fcInfinity || category == fcQNaN)
1315 partsCount = partCountForBits(width);
1317 if(category == fcZero) {
1318 APInt::tcSet(parts, 0, partsCount);
1322 /* Shift the bit pattern so the fraction is lost. */
1325 bits = (int) semantics->precision - 1 - exponent;
1328 lost_fraction = tmp.shiftSignificandRight(bits);
1330 tmp.shiftSignificandLeft(-bits);
1331 lost_fraction = lfExactlyZero;
1334 if(lost_fraction != lfExactlyZero
1335 && tmp.roundAwayFromZero(rounding_mode, lost_fraction))
1336 tmp.incrementSignificand();
1338 msb = tmp.significandMSB();
1340 /* Negative numbers cannot be represented as unsigned. */
1341 if(!isSigned && tmp.sign && msb != -1U)
1344 /* It takes exponent + 1 bits to represent the truncated floating
1345 point number without its sign. We lose a bit for the sign, but
1346 the maximally negative integer is a special case. */
1347 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1350 if(isSigned && msb + 1 == width
1351 && (!tmp.sign || tmp.significandLSB() != msb))
1354 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1357 APInt::tcNegate(parts, partsCount);
1359 if(lost_fraction == lfExactlyZero)
1366 APFloat::convertFromUnsignedInteger(integerPart *parts,
1367 unsigned int partCount,
1368 roundingMode rounding_mode)
1370 unsigned int msb, precision;
1371 lostFraction lost_fraction;
1373 msb = APInt::tcMSB(parts, partCount) + 1;
1374 precision = semantics->precision;
1376 category = fcNormal;
1377 exponent = precision - 1;
1379 if(msb > precision) {
1380 exponent += (msb - precision);
1381 lost_fraction = shiftRight(parts, partCount, msb - precision);
1384 lost_fraction = lfExactlyZero;
1386 /* Copy the bit image. */
1388 APInt::tcAssign(significandParts(), parts, partCountForBits(msb));
1390 return normalize(rounding_mode, lost_fraction);
1394 APFloat::convertFromInteger(const integerPart *parts,
1395 unsigned int partCount, bool isSigned,
1396 roundingMode rounding_mode)
1402 copy = new integerPart[partCount];
1403 APInt::tcAssign(copy, parts, partCount);
1405 width = partCount * integerPartWidth;
1408 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1410 APInt::tcNegate(copy, partCount);
1413 status = convertFromUnsignedInteger(copy, partCount, rounding_mode);
1420 APFloat::convertFromHexadecimalString(const char *p,
1421 roundingMode rounding_mode)
1423 lostFraction lost_fraction;
1424 integerPart *significand;
1425 unsigned int bitPos, partsCount;
1426 const char *dot, *firstSignificantDigit;
1430 category = fcNormal;
1432 significand = significandParts();
1433 partsCount = partCount();
1434 bitPos = partsCount * integerPartWidth;
1436 /* Skip leading zeroes and any(hexa)decimal point. */
1437 p = skipLeadingZeroesAndAnyDot(p, &dot);
1438 firstSignificantDigit = p;
1441 integerPart hex_value;
1448 hex_value = hexDigitValue(*p);
1449 if(hex_value == -1U) {
1450 lost_fraction = lfExactlyZero;
1456 /* Store the number whilst 4-bit nibbles remain. */
1459 hex_value <<= bitPos % integerPartWidth;
1460 significand[bitPos / integerPartWidth] |= hex_value;
1462 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1463 while(hexDigitValue(*p) != -1U)
1469 /* Hex floats require an exponent but not a hexadecimal point. */
1470 assert(*p == 'p' || *p == 'P');
1472 /* Ignore the exponent if we are zero. */
1473 if(p != firstSignificantDigit) {
1476 /* Implicit hexadecimal point? */
1480 /* Calculate the exponent adjustment implicit in the number of
1481 significant digits. */
1482 expAdjustment = dot - firstSignificantDigit;
1483 if(expAdjustment < 0)
1485 expAdjustment = expAdjustment * 4 - 1;
1487 /* Adjust for writing the significand starting at the most
1488 significant nibble. */
1489 expAdjustment += semantics->precision;
1490 expAdjustment -= partsCount * integerPartWidth;
1492 /* Adjust for the given exponent. */
1493 exponent = totalExponent(p, expAdjustment);
1496 return normalize(rounding_mode, lost_fraction);
1500 APFloat::convertFromString(const char *p, roundingMode rounding_mode) {
1501 /* Handle a leading minus sign. */
1507 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
1508 return convertFromHexadecimalString(p + 2, rounding_mode);
1510 assert(0 && "Decimal to binary conversions not yet implemented");
1514 // For good performance it is desirable for different APFloats
1515 // to produce different integers.
1517 APFloat::getHashValue() const {
1518 if (category==fcZero) return sign<<8 | semantics->precision ;
1519 else if (category==fcInfinity) return sign<<9 | semantics->precision;
1520 else if (category==fcQNaN) return 1<<10 | semantics->precision;
1522 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
1523 const integerPart* p = significandParts();
1524 for (int i=partCount(); i>0; i--, p++)
1525 hash ^= ((uint32_t)*p) ^ (*p)>>32;
1530 // Conversion from APFloat to/from host float/double. It may eventually be
1531 // possible to eliminate these and have everybody deal with APFloats, but that
1532 // will take a while. This approach will not easily extend to long double.
1533 // Current implementation requires partCount()==1, which is correct at the
1534 // moment but could be made more general.
1537 APFloat::convertToDouble() const {
1538 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
1539 assert (partCount()==1);
1541 uint64_t myexponent, mysign, mysignificand;
1543 if (category==fcNormal) {
1545 mysignificand = *significandParts();
1546 myexponent = exponent+1023; //bias
1547 } else if (category==fcZero) {
1551 } else if (category==fcInfinity) {
1555 } else if (category==fcQNaN) {
1558 mysignificand = 0xfffffffffffffLL;
1562 return BitsToDouble(((mysign & 1) << 63) | ((myexponent & 0x7ff) << 52) |
1563 (mysignificand & 0xfffffffffffffLL));
1567 APFloat::convertToFloat() const {
1568 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
1569 assert (partCount()==1);
1571 uint32_t mysign, myexponent, mysignificand;
1573 if (category==fcNormal) {
1575 myexponent = exponent+127; //bias
1576 mysignificand = *significandParts();
1577 } else if (category==fcZero) {
1581 } else if (category==fcInfinity) {
1585 } else if (category==fcQNaN) {
1588 mysignificand = 0x7fffff;
1592 return BitsToFloat(((mysign&1) << 31) | ((myexponent&0xff) << 23) |
1593 (mysignificand & 0x7fffff));
1596 APFloat::APFloat(double d) {
1597 uint64_t i = DoubleToBits(d);
1598 uint64_t mysign = i >> 63;
1599 uint64_t myexponent = (i >> 52) & 0x7ff;
1600 uint64_t mysignificand = i & 0xfffffffffffffLL;
1602 initialize(&APFloat::IEEEdouble);
1603 assert(partCount()==1);
1605 if (myexponent==0 && mysignificand==0) {
1606 // exponent, significand meaningless
1609 } else if (myexponent==0x7ff && mysignificand==0) {
1610 // exponent, significand meaningless
1611 category = fcInfinity;
1613 } else if (myexponent==0x7ff && (mysignificand & 0x8000000000000LL)) {
1614 // sign, exponent, significand meaningless
1618 category = fcNormal;
1619 exponent = myexponent - 1023;
1620 *significandParts() = mysignificand | 0x100000000000000LL;
1624 APFloat::APFloat(float f) {
1625 uint32_t i = FloatToBits(f);
1626 uint32_t mysign = i >> 31;
1627 uint32_t myexponent = (i >> 23) & 0xff;
1628 uint32_t mysignificand = i & 0x7fffff;
1630 initialize(&APFloat::IEEEsingle);
1631 assert(partCount()==1);
1633 if (myexponent==0 && mysignificand==0) {
1634 // exponent, significand meaningless
1637 } else if (myexponent==0xff && mysignificand==0) {
1638 // exponent, significand meaningless
1639 category = fcInfinity;
1641 } else if (myexponent==0xff && (mysignificand & 0x400000)) {
1642 // sign, exponent, significand meaningless
1645 category = fcNormal;
1647 exponent = myexponent - 127; //bias
1648 *significandParts() = mysignificand | 0x800000; // integer bit