lib/Support/APFloat.cpp

   1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Neil Booth and is distributed under the
   6 // University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision floating
  11 // point values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include <cassert>
  16 #include "llvm/ADT/APFloat.h"
  17
  18 using namespace llvm;
  19
  20 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
  21
  22 /* Assumed in hexadecimal significand parsing.  */
  23 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
  24
  25 namespace llvm {
  26
  27   /* Represents floating point arithmetic semantics.  */
  28   struct fltSemantics {
  29     /* The largest E such that 2^E is representable; this matches the
  30        definition of IEEE 754.  */
  31     exponent_t maxExponent;
  32
  33     /* The smallest E such that 2^E is a normalized number; this
  34        matches the definition of IEEE 754.  */
  35     exponent_t minExponent;
  36
  37     /* Number of bits in the significand.  This includes the integer
  38        bit.  */
  39     unsigned char precision;
  40
  41     /* If the target format has an implicit integer bit.  */
  42     bool implicitIntegerBit;
  43   };
  44
  45   const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
  46   const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
  47   const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
  48   const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false };
  49 }
  50
  51 /* Put a bunch of private, handy routines in an anonymous namespace.  */
  52 namespace {
  53
  54   inline unsigned int
  55   partCountForBits(unsigned int bits)
  56   {
  57     return ((bits) + integerPartWidth - 1) / integerPartWidth;
  58   }
  59
  60   unsigned int
  61   digitValue(unsigned int c)
  62   {
  63     unsigned int r;
  64
  65     r = c - '0';
  66     if(r <= 9)
  67       return r;
  68
  69     return -1U;
  70   }
  71
  72   unsigned int
  73   hexDigitValue (unsigned int c)
  74   {
  75     unsigned int r;
  76
  77     r = c - '0';
  78     if(r <= 9)
  79       return r;
  80
  81     r = c - 'A';
  82     if(r <= 5)
  83       return r + 10;
  84
  85     r = c - 'a';
  86     if(r <= 5)
  87       return r + 10;
  88
  89     return -1U;
  90   }
  91
  92   /* This is ugly and needs cleaning up, but I don't immediately see
  93      how whilst remaining safe.  */
  94   static int
  95   totalExponent(const char *p, int exponentAdjustment)
  96   {
  97     integerPart unsignedExponent;
  98     bool negative, overflow;
  99     long exponent;
 100
 101     /* Move past the exponent letter and sign to the digits.  */
 102     p++;
 103     negative = *p == '-';
 104     if(*p == '-' || *p == '+')
 105       p++;
 106
 107     unsignedExponent = 0;
 108     overflow = false;
 109     for(;;) {
 110       unsigned int value;
 111
 112       value = digitValue(*p);
 113       if(value == -1U)
 114         break;
 115
 116       p++;
 117       unsignedExponent = unsignedExponent * 10 + value;
 118       if(unsignedExponent > 65535)
 119         overflow = true;
 120     }
 121
 122     if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
 123       overflow = true;
 124
 125     if(!overflow) {
 126       exponent = unsignedExponent;
 127       if(negative)
 128         exponent = -exponent;
 129       exponent += exponentAdjustment;
 130       if(exponent > 65535 || exponent < -65536)
 131         overflow = true;
 132     }
 133
 134     if(overflow)
 135       exponent = negative ? -65536: 65535;
 136
 137     return exponent;
 138   }
 139
 140   const char *
 141   skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
 142   {
 143     *dot = 0;
 144     while(*p == '0')
 145       p++;
 146
 147     if(*p == '.') {
 148       *dot = p++;
 149       while(*p == '0')
 150         p++;
 151     }
 152
 153     return p;
 154   }
 155
 156   /* Return the trailing fraction of a hexadecimal number.
 157      DIGITVALUE is the first hex digit of the fraction, P points to
 158      the next digit.  */
 159   lostFraction
 160   trailingHexadecimalFraction(const char *p, unsigned int digitValue)
 161   {
 162     unsigned int hexDigit;
 163
 164     /* If the first trailing digit isn't 0 or 8 we can work out the
 165        fraction immediately.  */
 166     if(digitValue > 8)
 167       return lfMoreThanHalf;
 168     else if(digitValue < 8 && digitValue > 0)
 169       return lfLessThanHalf;
 170
 171     /* Otherwise we need to find the first non-zero digit.  */
 172     while(*p == '0')
 173       p++;
 174
 175     hexDigit = hexDigitValue(*p);
 176
 177     /* If we ran off the end it is exactly zero or one-half, otherwise
 178        a little more.  */
 179     if(hexDigit == -1U)
 180       return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
 181     else
 182       return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
 183   }
 184
 185   /* Return the fraction lost were a bignum truncated.  */
 186   lostFraction
 187   lostFractionThroughTruncation(integerPart *parts,
 188                                 unsigned int partCount,
 189                                 unsigned int bits)
 190   {
 191     unsigned int lsb;
 192
 193     lsb = APInt::tcLSB(parts, partCount);
 194
 195     /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
 196     if(bits <= lsb)
 197       return lfExactlyZero;
 198     if(bits == lsb + 1)
 199       return lfExactlyHalf;
 200     if(bits <= partCount * integerPartWidth
 201        && APInt::tcExtractBit(parts, bits - 1))
 202       return lfMoreThanHalf;
 203
 204     return lfLessThanHalf;
 205   }
 206
 207   /* Shift DST right BITS bits noting lost fraction.  */
 208   lostFraction
 209   shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
 210   {
 211     lostFraction lost_fraction;
 212
 213     lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
 214
 215     APInt::tcShiftRight(dst, parts, bits);
 216
 217     return lost_fraction;
 218   }
 219 }
 220
 221 /* Constructors.  */
 222 void
 223 APFloat::initialize(const fltSemantics *ourSemantics)
 224 {
 225   unsigned int count;
 226
 227   semantics = ourSemantics;
 228   count = partCount();
 229   if(count > 1)
 230     significand.parts = new integerPart[count];
 231 }
 232
 233 void
 234 APFloat::freeSignificand()
 235 {
 236   if(partCount() > 1)
 237     delete [] significand.parts;
 238 }
 239
 240 void
 241 APFloat::assign(const APFloat &rhs)
 242 {
 243   assert(semantics == rhs.semantics);
 244
 245   sign = rhs.sign;
 246   category = rhs.category;
 247   exponent = rhs.exponent;
 248   if(category == fcNormal)
 249     copySignificand(rhs);
 250 }
 251
 252 void
 253 APFloat::copySignificand(const APFloat &rhs)
 254 {
 255   assert(category == fcNormal);
 256   assert(rhs.partCount() >= partCount());
 257
 258   APInt::tcAssign(significandParts(), rhs.significandParts(),
 259                   partCount());
 260 }
 261
 262 APFloat &
 263 APFloat::operator=(const APFloat &rhs)
 264 {
 265   if(this != &rhs) {
 266     if(semantics != rhs.semantics) {
 267       freeSignificand();
 268       initialize(rhs.semantics);
 269     }
 270     assign(rhs);
 271   }
 272
 273   return *this;
 274 }
 275
 276 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
 277 {
 278   initialize(&ourSemantics);
 279   sign = 0;
 280   zeroSignificand();
 281   exponent = ourSemantics.precision - 1;
 282   significandParts()[0] = value;
 283   normalize(rmNearestTiesToEven, lfExactlyZero);
 284 }
 285
 286 APFloat::APFloat(const fltSemantics &ourSemantics,
 287                  fltCategory ourCategory, bool negative)
 288 {
 289   initialize(&ourSemantics);
 290   category = ourCategory;
 291   sign = negative;
 292   if(category == fcNormal)
 293     category = fcZero;
 294 }
 295
 296 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
 297 {
 298   initialize(&ourSemantics);
 299   convertFromString(text, rmNearestTiesToEven);
 300 }
 301
 302 APFloat::APFloat(const APFloat &rhs)
 303 {
 304   initialize(rhs.semantics);
 305   assign(rhs);
 306 }
 307
 308 APFloat::~APFloat()
 309 {
 310   freeSignificand();
 311 }
 312
 313 unsigned int
 314 APFloat::partCount() const
 315 {
 316   return partCountForBits(semantics->precision + 1);
 317 }
 318
 319 unsigned int
 320 APFloat::semanticsPrecision(const fltSemantics &semantics)
 321 {
 322   return semantics.precision;
 323 }
 324
 325 const integerPart *
 326 APFloat::significandParts() const
 327 {
 328   return const_cast<APFloat *>(this)->significandParts();
 329 }
 330
 331 integerPart *
 332 APFloat::significandParts()
 333 {
 334   assert(category == fcNormal);
 335
 336   if(partCount() > 1)
 337     return significand.parts;
 338   else
 339     return &significand.part;
 340 }
 341
 342 /* Combine the effect of two lost fractions.  */
 343 lostFraction
 344 APFloat::combineLostFractions(lostFraction moreSignificant,
 345                               lostFraction lessSignificant)
 346 {
 347   if(lessSignificant != lfExactlyZero) {
 348     if(moreSignificant == lfExactlyZero)
 349       moreSignificant = lfLessThanHalf;
 350     else if(moreSignificant == lfExactlyHalf)
 351       moreSignificant = lfMoreThanHalf;
 352   }
 353
 354   return moreSignificant;
 355 }
 356
 357 void
 358 APFloat::zeroSignificand()
 359 {
 360   category = fcNormal;
 361   APInt::tcSet(significandParts(), 0, partCount());
 362 }
 363
 364 /* Increment an fcNormal floating point number's significand.  */
 365 void
 366 APFloat::incrementSignificand()
 367 {
 368   integerPart carry;
 369
 370   carry = APInt::tcIncrement(significandParts(), partCount());
 371
 372   /* Our callers should never cause us to overflow.  */
 373   assert(carry == 0);
 374 }
 375
 376 /* Add the significand of the RHS.  Returns the carry flag.  */
 377 integerPart
 378 APFloat::addSignificand(const APFloat &rhs)
 379 {
 380   integerPart *parts;
 381
 382   parts = significandParts();
 383
 384   assert(semantics == rhs.semantics);
 385   assert(exponent == rhs.exponent);
 386
 387   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
 388 }
 389
 390 /* Subtract the significand of the RHS with a borrow flag.  Returns
 391    the borrow flag.  */
 392 integerPart
 393 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
 394 {
 395   integerPart *parts;
 396
 397   parts = significandParts();
 398
 399   assert(semantics == rhs.semantics);
 400   assert(exponent == rhs.exponent);
 401
 402   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
 403                            partCount());
 404 }
 405
 406 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
 407    on to the full-precision result of the multiplication.  Returns the
 408    lost fraction.  */
 409 lostFraction
 410 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
 411 {
 412   unsigned int omsb;    // One, not zero, based MSB.
 413   unsigned int partsCount, newPartsCount, precision;
 414   integerPart *lhsSignificand;
 415   integerPart scratch[4];
 416   integerPart *fullSignificand;
 417   lostFraction lost_fraction;
 418
 419   assert(semantics == rhs.semantics);
 420
 421   precision = semantics->precision;
 422   newPartsCount = partCountForBits(precision * 2);
 423
 424   if(newPartsCount > 4)
 425     fullSignificand = new integerPart[newPartsCount];
 426   else
 427     fullSignificand = scratch;
 428
 429   lhsSignificand = significandParts();
 430   partsCount = partCount();
 431
 432   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
 433                         rhs.significandParts(), partsCount);
 434
 435   lost_fraction = lfExactlyZero;
 436   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
 437   exponent += rhs.exponent;
 438
 439   if(addend) {
 440     Significand savedSignificand = significand;
 441     const fltSemantics *savedSemantics = semantics;
 442     fltSemantics extendedSemantics;
 443     opStatus status;
 444     unsigned int extendedPrecision;
 445
 446     /* Normalize our MSB.  */
 447     extendedPrecision = precision + precision - 1;
 448     if(omsb != extendedPrecision)
 449       {
 450         APInt::tcShiftLeft(fullSignificand, newPartsCount,
 451                            extendedPrecision - omsb);
 452         exponent -= extendedPrecision - omsb;
 453       }
 454
 455     /* Create new semantics.  */
 456     extendedSemantics = *semantics;
 457     extendedSemantics.precision = extendedPrecision;
 458
 459     if(newPartsCount == 1)
 460       significand.part = fullSignificand[0];
 461     else
 462       significand.parts = fullSignificand;
 463     semantics = &extendedSemantics;
 464
 465     APFloat extendedAddend(*addend);
 466     status = extendedAddend.convert(extendedSemantics, rmTowardZero);
 467     assert(status == opOK);
 468     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
 469
 470     /* Restore our state.  */
 471     if(newPartsCount == 1)
 472       fullSignificand[0] = significand.part;
 473     significand = savedSignificand;
 474     semantics = savedSemantics;
 475
 476     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
 477   }
 478
 479   exponent -= (precision - 1);
 480
 481   if(omsb > precision) {
 482     unsigned int bits, significantParts;
 483     lostFraction lf;
 484
 485     bits = omsb - precision;
 486     significantParts = partCountForBits(omsb);
 487     lf = shiftRight(fullSignificand, significantParts, bits);
 488     lost_fraction = combineLostFractions(lf, lost_fraction);
 489     exponent += bits;
 490   }
 491
 492   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
 493
 494   if(newPartsCount > 4)
 495     delete [] fullSignificand;
 496
 497   return lost_fraction;
 498 }
 499
 500 /* Multiply the significands of LHS and RHS to DST.  */
 501 lostFraction
 502 APFloat::divideSignificand(const APFloat &rhs)
 503 {
 504   unsigned int bit, i, partsCount;
 505   const integerPart *rhsSignificand;
 506   integerPart *lhsSignificand, *dividend, *divisor;
 507   integerPart scratch[4];
 508   lostFraction lost_fraction;
 509
 510   assert(semantics == rhs.semantics);
 511
 512   lhsSignificand = significandParts();
 513   rhsSignificand = rhs.significandParts();
 514   partsCount = partCount();
 515
 516   if(partsCount > 2)
 517     dividend = new integerPart[partsCount * 2];
 518   else
 519     dividend = scratch;
 520
 521   divisor = dividend + partsCount;
 522
 523   /* Copy the dividend and divisor as they will be modified in-place.  */
 524   for(i = 0; i < partsCount; i++) {
 525     dividend[i] = lhsSignificand[i];
 526     divisor[i] = rhsSignificand[i];
 527     lhsSignificand[i] = 0;
 528   }
 529
 530   exponent -= rhs.exponent;
 531
 532   unsigned int precision = semantics->precision;
 533
 534   /* Normalize the divisor.  */
 535   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
 536   if(bit) {
 537     exponent += bit;
 538     APInt::tcShiftLeft(divisor, partsCount, bit);
 539   }
 540
 541   /* Normalize the dividend.  */
 542   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
 543   if(bit) {
 544     exponent -= bit;
 545     APInt::tcShiftLeft(dividend, partsCount, bit);
 546   }
 547
 548   if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
 549     exponent--;
 550     APInt::tcShiftLeft(dividend, partsCount, 1);
 551     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
 552   }
 553
 554   /* Long division.  */
 555   for(bit = precision; bit; bit -= 1) {
 556     if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
 557       APInt::tcSubtract(dividend, divisor, 0, partsCount);
 558       APInt::tcSetBit(lhsSignificand, bit - 1);
 559     }
 560
 561     APInt::tcShiftLeft(dividend, partsCount, 1);
 562   }
 563
 564   /* Figure out the lost fraction.  */
 565   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
 566
 567   if(cmp > 0)
 568     lost_fraction = lfMoreThanHalf;
 569   else if(cmp == 0)
 570     lost_fraction = lfExactlyHalf;
 571   else if(APInt::tcIsZero(dividend, partsCount))
 572     lost_fraction = lfExactlyZero;
 573   else
 574     lost_fraction = lfLessThanHalf;
 575
 576   if(partsCount > 2)
 577     delete [] dividend;
 578
 579   return lost_fraction;
 580 }
 581
 582 unsigned int
 583 APFloat::significandMSB() const
 584 {
 585   return APInt::tcMSB(significandParts(), partCount());
 586 }
 587
 588 unsigned int
 589 APFloat::significandLSB() const
 590 {
 591   return APInt::tcLSB(significandParts(), partCount());
 592 }
 593
 594 /* Note that a zero result is NOT normalized to fcZero.  */
 595 lostFraction
 596 APFloat::shiftSignificandRight(unsigned int bits)
 597 {
 598   /* Our exponent should not overflow.  */
 599   assert((exponent_t) (exponent + bits) >= exponent);
 600
 601   exponent += bits;
 602
 603   return shiftRight(significandParts(), partCount(), bits);
 604 }
 605
 606 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
 607 void
 608 APFloat::shiftSignificandLeft(unsigned int bits)
 609 {
 610   assert(bits < semantics->precision);
 611
 612   if(bits) {
 613     unsigned int partsCount = partCount();
 614
 615     APInt::tcShiftLeft(significandParts(), partsCount, bits);
 616     exponent -= bits;
 617
 618     assert(!APInt::tcIsZero(significandParts(), partsCount));
 619   }
 620 }
 621
 622 APFloat::cmpResult
 623 APFloat::compareAbsoluteValue(const APFloat &rhs) const
 624 {
 625   int compare;
 626
 627   assert(semantics == rhs.semantics);
 628   assert(category == fcNormal);
 629   assert(rhs.category == fcNormal);
 630
 631   compare = exponent - rhs.exponent;
 632
 633   /* If exponents are equal, do an unsigned bignum comparison of the
 634      significands.  */
 635   if(compare == 0)
 636     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
 637                                partCount());
 638
 639   if(compare > 0)
 640     return cmpGreaterThan;
 641   else if(compare < 0)
 642     return cmpLessThan;
 643   else
 644     return cmpEqual;
 645 }
 646
 647 /* Handle overflow.  Sign is preserved.  We either become infinity or
 648    the largest finite number.  */
 649 APFloat::opStatus
 650 APFloat::handleOverflow(roundingMode rounding_mode)
 651 {
 652   /* Infinity?  */
 653   if(rounding_mode == rmNearestTiesToEven
 654      || rounding_mode == rmNearestTiesToAway
 655      || (rounding_mode == rmTowardPositive && !sign)
 656      || (rounding_mode == rmTowardNegative && sign))
 657     {
 658       category = fcInfinity;
 659       return (opStatus) (opOverflow | opInexact);
 660     }
 661
 662   /* Otherwise we become the largest finite number.  */
 663   category = fcNormal;
 664   exponent = semantics->maxExponent;
 665   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
 666                                    semantics->precision);
 667
 668   return opInexact;
 669 }
 670
 671 /* This routine must work for fcZero of both signs, and fcNormal
 672    numbers.  */
 673 bool
 674 APFloat::roundAwayFromZero(roundingMode rounding_mode,
 675                            lostFraction lost_fraction)
 676 {
 677   /* QNaNs and infinities should not have lost fractions.  */
 678   assert(category == fcNormal || category == fcZero);
 679
 680   /* Our caller has already handled this case.  */
 681   assert(lost_fraction != lfExactlyZero);
 682
 683   switch(rounding_mode) {
 684   default:
 685     assert(0);
 686
 687   case rmNearestTiesToAway:
 688     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
 689
 690   case rmNearestTiesToEven:
 691     if(lost_fraction == lfMoreThanHalf)
 692       return true;
 693
 694     /* Our zeroes don't have a significand to test.  */
 695     if(lost_fraction == lfExactlyHalf && category != fcZero)
 696       return significandParts()[0] & 1;
 697
 698     return false;
 699
 700   case rmTowardZero:
 701     return false;
 702
 703   case rmTowardPositive:
 704     return sign == false;
 705
 706   case rmTowardNegative:
 707     return sign == true;
 708   }
 709 }
 710
 711 APFloat::opStatus
 712 APFloat::normalize(roundingMode rounding_mode,
 713                    lostFraction lost_fraction)
 714 {
 715   unsigned int omsb;            /* One, not zero, based MSB.  */
 716   int exponentChange;
 717
 718   if(category != fcNormal)
 719     return opOK;
 720
 721   /* Before rounding normalize the exponent of fcNormal numbers.  */
 722   omsb = significandMSB() + 1;
 723
 724   if(omsb) {
 725     /* OMSB is numbered from 1.  We want to place it in the integer
 726        bit numbered PRECISON if possible, with a compensating change in
 727        the exponent.  */
 728     exponentChange = omsb - semantics->precision;
 729
 730     /* If the resulting exponent is too high, overflow according to
 731        the rounding mode.  */
 732     if(exponent + exponentChange > semantics->maxExponent)
 733       return handleOverflow(rounding_mode);
 734
 735     /* Subnormal numbers have exponent minExponent, and their MSB
 736        is forced based on that.  */
 737     if(exponent + exponentChange < semantics->minExponent)
 738       exponentChange = semantics->minExponent - exponent;
 739
 740     /* Shifting left is easy as we don't lose precision.  */
 741     if(exponentChange < 0) {
 742       assert(lost_fraction == lfExactlyZero);
 743
 744       shiftSignificandLeft(-exponentChange);
 745
 746       return opOK;
 747     }
 748
 749     if(exponentChange > 0) {
 750       lostFraction lf;
 751
 752       /* Shift right and capture any new lost fraction.  */
 753       lf = shiftSignificandRight(exponentChange);
 754
 755       lost_fraction = combineLostFractions(lf, lost_fraction);
 756
 757       /* Keep OMSB up-to-date.  */
 758       if(omsb > (unsigned) exponentChange)
 759         omsb -= (unsigned) exponentChange;
 760       else
 761         omsb = 0;
 762     }
 763   }
 764
 765   /* Now round the number according to rounding_mode given the lost
 766      fraction.  */
 767
 768   /* As specified in IEEE 754, since we do not trap we do not report
 769      underflow for exact results.  */
 770   if(lost_fraction == lfExactlyZero) {
 771     /* Canonicalize zeroes.  */
 772     if(omsb == 0)
 773       category = fcZero;
 774
 775     return opOK;
 776   }
 777
 778   /* Increment the significand if we're rounding away from zero.  */
 779   if(roundAwayFromZero(rounding_mode, lost_fraction)) {
 780     if(omsb == 0)
 781       exponent = semantics->minExponent;
 782
 783     incrementSignificand();
 784     omsb = significandMSB() + 1;
 785
 786     /* Did the significand increment overflow?  */
 787     if(omsb == (unsigned) semantics->precision + 1) {
 788       /* Renormalize by incrementing the exponent and shifting our
 789          significand right one.  However if we already have the
 790          maximum exponent we overflow to infinity.  */
 791       if(exponent == semantics->maxExponent) {
 792         category = fcInfinity;
 793
 794         return (opStatus) (opOverflow | opInexact);
 795       }
 796
 797       shiftSignificandRight(1);
 798
 799       return opInexact;
 800     }
 801   }
 802
 803   /* The normal case - we were and are not denormal, and any
 804      significand increment above didn't overflow.  */
 805   if(omsb == semantics->precision)
 806     return opInexact;
 807
 808   /* We have a non-zero denormal.  */
 809   assert(omsb < semantics->precision);
 810   assert(exponent == semantics->minExponent);
 811
 812   /* Canonicalize zeroes.  */
 813   if(omsb == 0)
 814     category = fcZero;
 815
 816   /* The fcZero case is a denormal that underflowed to zero.  */
 817   return (opStatus) (opUnderflow | opInexact);
 818 }
 819
 820 APFloat::opStatus
 821 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
 822 {
 823   switch(convolve(category, rhs.category)) {
 824   default:
 825     assert(0);
 826
 827   case convolve(fcQNaN, fcZero):
 828   case convolve(fcQNaN, fcNormal):
 829   case convolve(fcQNaN, fcInfinity):
 830   case convolve(fcQNaN, fcQNaN):
 831   case convolve(fcNormal, fcZero):
 832   case convolve(fcInfinity, fcNormal):
 833   case convolve(fcInfinity, fcZero):
 834     return opOK;
 835
 836   case convolve(fcZero, fcQNaN):
 837   case convolve(fcNormal, fcQNaN):
 838   case convolve(fcInfinity, fcQNaN):
 839     category = fcQNaN;
 840     return opOK;
 841
 842   case convolve(fcNormal, fcInfinity):
 843   case convolve(fcZero, fcInfinity):
 844     category = fcInfinity;
 845     sign = rhs.sign ^ subtract;
 846     return opOK;
 847
 848   case convolve(fcZero, fcNormal):
 849     assign(rhs);
 850     sign = rhs.sign ^ subtract;
 851     return opOK;
 852
 853   case convolve(fcZero, fcZero):
 854     /* Sign depends on rounding mode; handled by caller.  */
 855     return opOK;
 856
 857   case convolve(fcInfinity, fcInfinity):
 858     /* Differently signed infinities can only be validly
 859        subtracted.  */
 860     if(sign ^ rhs.sign != subtract) {
 861       category = fcQNaN;
 862       return opInvalidOp;
 863     }
 864
 865     return opOK;
 866
 867   case convolve(fcNormal, fcNormal):
 868     return opDivByZero;
 869   }
 870 }
 871
 872 /* Add or subtract two normal numbers.  */
 873 lostFraction
 874 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
 875 {
 876   integerPart carry;
 877   lostFraction lost_fraction;
 878   int bits;
 879
 880   /* Determine if the operation on the absolute values is effectively
 881      an addition or subtraction.  */
 882   subtract ^= (sign ^ rhs.sign);
 883
 884   /* Are we bigger exponent-wise than the RHS?  */
 885   bits = exponent - rhs.exponent;
 886
 887   /* Subtraction is more subtle than one might naively expect.  */
 888   if(subtract) {
 889     APFloat temp_rhs(rhs);
 890     bool reverse;
 891
 892     if(bits == 0) {
 893       reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
 894       lost_fraction = lfExactlyZero;
 895     } else if(bits > 0) {
 896       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
 897       shiftSignificandLeft(1);
 898       reverse = false;
 899     } else if(bits < 0) {
 900       lost_fraction = shiftSignificandRight(-bits - 1);
 901       temp_rhs.shiftSignificandLeft(1);
 902       reverse = true;
 903     }
 904
 905     if(reverse) {
 906       carry = temp_rhs.subtractSignificand
 907         (*this, lost_fraction != lfExactlyZero);
 908       copySignificand(temp_rhs);
 909       sign = !sign;
 910     } else {
 911       carry = subtractSignificand
 912         (temp_rhs, lost_fraction != lfExactlyZero);
 913     }
 914
 915     /* Invert the lost fraction - it was on the RHS and
 916        subtracted.  */
 917     if(lost_fraction == lfLessThanHalf)
 918       lost_fraction = lfMoreThanHalf;
 919     else if(lost_fraction == lfMoreThanHalf)
 920       lost_fraction = lfLessThanHalf;
 921
 922     /* The code above is intended to ensure that no borrow is
 923        necessary.  */
 924     assert(!carry);
 925   } else {
 926     if(bits > 0) {
 927       APFloat temp_rhs(rhs);
 928
 929       lost_fraction = temp_rhs.shiftSignificandRight(bits);
 930       carry = addSignificand(temp_rhs);
 931     } else {
 932       lost_fraction = shiftSignificandRight(-bits);
 933       carry = addSignificand(rhs);
 934     }
 935
 936     /* We have a guard bit; generating a carry cannot happen.  */
 937     assert(!carry);
 938   }
 939
 940   return lost_fraction;
 941 }
 942
 943 APFloat::opStatus
 944 APFloat::multiplySpecials(const APFloat &rhs)
 945 {
 946   switch(convolve(category, rhs.category)) {
 947   default:
 948     assert(0);
 949
 950   case convolve(fcQNaN, fcZero):
 951   case convolve(fcQNaN, fcNormal):
 952   case convolve(fcQNaN, fcInfinity):
 953   case convolve(fcQNaN, fcQNaN):
 954   case convolve(fcZero, fcQNaN):
 955   case convolve(fcNormal, fcQNaN):
 956   case convolve(fcInfinity, fcQNaN):
 957     category = fcQNaN;
 958     return opOK;
 959
 960   case convolve(fcNormal, fcInfinity):
 961   case convolve(fcInfinity, fcNormal):
 962   case convolve(fcInfinity, fcInfinity):
 963     category = fcInfinity;
 964     return opOK;
 965
 966   case convolve(fcZero, fcNormal):
 967   case convolve(fcNormal, fcZero):
 968   case convolve(fcZero, fcZero):
 969     category = fcZero;
 970     return opOK;
 971
 972   case convolve(fcZero, fcInfinity):
 973   case convolve(fcInfinity, fcZero):
 974     category = fcQNaN;
 975     return opInvalidOp;
 976
 977   case convolve(fcNormal, fcNormal):
 978     return opOK;
 979   }
 980 }
 981
 982 APFloat::opStatus
 983 APFloat::divideSpecials(const APFloat &rhs)
 984 {
 985   switch(convolve(category, rhs.category)) {
 986   default:
 987     assert(0);
 988
 989   case convolve(fcQNaN, fcZero):
 990   case convolve(fcQNaN, fcNormal):
 991   case convolve(fcQNaN, fcInfinity):
 992   case convolve(fcQNaN, fcQNaN):
 993   case convolve(fcInfinity, fcZero):
 994   case convolve(fcInfinity, fcNormal):
 995   case convolve(fcZero, fcInfinity):
 996   case convolve(fcZero, fcNormal):
 997     return opOK;
 998
 999   case convolve(fcZero, fcQNaN):
1000   case convolve(fcNormal, fcQNaN):
1001   case convolve(fcInfinity, fcQNaN):
1002     category = fcQNaN;
1003     return opOK;
1004
1005   case convolve(fcNormal, fcInfinity):
1006     category = fcZero;
1007     return opOK;
1008
1009   case convolve(fcNormal, fcZero):
1010     category = fcInfinity;
1011     return opDivByZero;
1012
1013   case convolve(fcInfinity, fcInfinity):
1014   case convolve(fcZero, fcZero):
1015     category = fcQNaN;
1016     return opInvalidOp;
1017
1018   case convolve(fcNormal, fcNormal):
1019     return opOK;
1020   }
1021 }
1022
1023 /* Change sign.  */
1024 void
1025 APFloat::changeSign()
1026 {
1027   /* Look mummy, this one's easy.  */
1028   sign = !sign;
1029 }
1030
1031 /* Normalized addition or subtraction.  */
1032 APFloat::opStatus
1033 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1034                        bool subtract)
1035 {
1036   opStatus fs;
1037
1038   fs = addOrSubtractSpecials(rhs, subtract);
1039
1040   /* This return code means it was not a simple case.  */
1041   if(fs == opDivByZero) {
1042     lostFraction lost_fraction;
1043
1044     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1045     fs = normalize(rounding_mode, lost_fraction);
1046
1047     /* Can only be zero if we lost no fraction.  */
1048     assert(category != fcZero || lost_fraction == lfExactlyZero);
1049   }
1050
1051   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1052      positive zero unless rounding to minus infinity, except that
1053      adding two like-signed zeroes gives that zero.  */
1054   if(category == fcZero) {
1055     if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1056       sign = (rounding_mode == rmTowardNegative);
1057   }
1058
1059   return fs;
1060 }
1061
1062 /* Normalized addition.  */
1063 APFloat::opStatus
1064 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1065 {
1066   return addOrSubtract(rhs, rounding_mode, false);
1067 }
1068
1069 /* Normalized subtraction.  */
1070 APFloat::opStatus
1071 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1072 {
1073   return addOrSubtract(rhs, rounding_mode, true);
1074 }
1075
1076 /* Normalized multiply.  */
1077 APFloat::opStatus
1078 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1079 {
1080   opStatus fs;
1081
1082   sign ^= rhs.sign;
1083   fs = multiplySpecials(rhs);
1084
1085   if(category == fcNormal) {
1086     lostFraction lost_fraction = multiplySignificand(rhs, 0);
1087     fs = normalize(rounding_mode, lost_fraction);
1088     if(lost_fraction != lfExactlyZero)
1089       fs = (opStatus) (fs | opInexact);
1090   }
1091
1092   return fs;
1093 }
1094
1095 /* Normalized divide.  */
1096 APFloat::opStatus
1097 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1098 {
1099   opStatus fs;
1100
1101   sign ^= rhs.sign;
1102   fs = divideSpecials(rhs);
1103
1104   if(category == fcNormal) {
1105     lostFraction lost_fraction = divideSignificand(rhs);
1106     fs = normalize(rounding_mode, lost_fraction);
1107     if(lost_fraction != lfExactlyZero)
1108       fs = (opStatus) (fs | opInexact);
1109   }
1110
1111   return fs;
1112 }
1113
1114 /* Normalized fused-multiply-add.  */
1115 APFloat::opStatus
1116 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1117                           const APFloat &addend,
1118                           roundingMode rounding_mode)
1119 {
1120   opStatus fs;
1121
1122   /* Post-multiplication sign, before addition.  */
1123   sign ^= multiplicand.sign;
1124
1125   /* If and only if all arguments are normal do we need to do an
1126      extended-precision calculation.  */
1127   if(category == fcNormal
1128      && multiplicand.category == fcNormal
1129      && addend.category == fcNormal) {
1130     lostFraction lost_fraction;
1131
1132     lost_fraction = multiplySignificand(multiplicand, &addend);
1133     fs = normalize(rounding_mode, lost_fraction);
1134     if(lost_fraction != lfExactlyZero)
1135       fs = (opStatus) (fs | opInexact);
1136
1137     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1138        positive zero unless rounding to minus infinity, except that
1139        adding two like-signed zeroes gives that zero.  */
1140     if(category == fcZero && sign != addend.sign)
1141       sign = (rounding_mode == rmTowardNegative);
1142   } else {
1143     fs = multiplySpecials(multiplicand);
1144
1145     /* FS can only be opOK or opInvalidOp.  There is no more work
1146        to do in the latter case.  The IEEE-754R standard says it is
1147        implementation-defined in this case whether, if ADDEND is a
1148        quiet QNaN, we raise invalid op; this implementation does so.
1149
1150        If we need to do the addition we can do so with normal
1151        precision.  */
1152     if(fs == opOK)
1153       fs = addOrSubtract(addend, rounding_mode, false);
1154   }
1155
1156   return fs;
1157 }
1158
1159 /* Comparison requires normalized numbers.  */
1160 APFloat::cmpResult
1161 APFloat::compare(const APFloat &rhs) const
1162 {
1163   cmpResult result;
1164
1165   assert(semantics == rhs.semantics);
1166
1167   switch(convolve(category, rhs.category)) {
1168   default:
1169     assert(0);
1170
1171   case convolve(fcQNaN, fcZero):
1172   case convolve(fcQNaN, fcNormal):
1173   case convolve(fcQNaN, fcInfinity):
1174   case convolve(fcQNaN, fcQNaN):
1175   case convolve(fcZero, fcQNaN):
1176   case convolve(fcNormal, fcQNaN):
1177   case convolve(fcInfinity, fcQNaN):
1178     return cmpUnordered;
1179
1180   case convolve(fcInfinity, fcNormal):
1181   case convolve(fcInfinity, fcZero):
1182   case convolve(fcNormal, fcZero):
1183     if(sign)
1184       return cmpLessThan;
1185     else
1186       return cmpGreaterThan;
1187
1188   case convolve(fcNormal, fcInfinity):
1189   case convolve(fcZero, fcInfinity):
1190   case convolve(fcZero, fcNormal):
1191     if(rhs.sign)
1192       return cmpGreaterThan;
1193     else
1194       return cmpLessThan;
1195
1196   case convolve(fcInfinity, fcInfinity):
1197     if(sign == rhs.sign)
1198       return cmpEqual;
1199     else if(sign)
1200       return cmpLessThan;
1201     else
1202       return cmpGreaterThan;
1203
1204   case convolve(fcZero, fcZero):
1205     return cmpEqual;
1206
1207   case convolve(fcNormal, fcNormal):
1208     break;
1209   }
1210
1211   /* Two normal numbers.  Do they have the same sign?  */
1212   if(sign != rhs.sign) {
1213     if(sign)
1214       result = cmpLessThan;
1215     else
1216       result = cmpGreaterThan;
1217   } else {
1218     /* Compare absolute values; invert result if negative.  */
1219     result = compareAbsoluteValue(rhs);
1220
1221     if(sign) {
1222       if(result == cmpLessThan)
1223         result = cmpGreaterThan;
1224       else if(result == cmpGreaterThan)
1225         result = cmpLessThan;
1226     }
1227   }
1228
1229   return result;
1230 }
1231
1232 APFloat::opStatus
1233 APFloat::convert(const fltSemantics &toSemantics,
1234                  roundingMode rounding_mode)
1235 {
1236   unsigned int newPartCount;
1237   opStatus fs;
1238
1239   newPartCount = partCountForBits(toSemantics.precision + 1);
1240
1241   /* If our new form is wider, re-allocate our bit pattern into wider
1242      storage.  */
1243   if(newPartCount > partCount()) {
1244     integerPart *newParts;
1245
1246     newParts = new integerPart[newPartCount];
1247     APInt::tcSet(newParts, 0, newPartCount);
1248     APInt::tcAssign(newParts, significandParts(), partCount());
1249     freeSignificand();
1250     significand.parts = newParts;
1251   }
1252
1253   if(category == fcNormal) {
1254     /* Re-interpret our bit-pattern.  */
1255     exponent += toSemantics.precision - semantics->precision;
1256     semantics = &toSemantics;
1257     fs = normalize(rounding_mode, lfExactlyZero);
1258   } else {
1259     semantics = &toSemantics;
1260     fs = opOK;
1261   }
1262
1263   return fs;
1264 }
1265
1266 /* Convert a floating point number to an integer according to the
1267    rounding mode.  If the rounded integer value is out of range this
1268    returns an invalid operation exception.  If the rounded value is in
1269    range but the floating point number is not the exact integer, the C
1270    standard doesn't require an inexact exception to be raised.  IEEE
1271    854 does require it so we do that.
1272
1273    Note that for conversions to integer type the C standard requires
1274    round-to-zero to always be used.  */
1275 APFloat::opStatus
1276 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1277                           bool isSigned,
1278                           roundingMode rounding_mode) const
1279 {
1280   lostFraction lost_fraction;
1281   unsigned int msb, partsCount;
1282   int bits;
1283
1284   /* Handle the three special cases first.  */
1285   if(category == fcInfinity || category == fcQNaN)
1286     return opInvalidOp;
1287
1288   partsCount = partCountForBits(width);
1289
1290   if(category == fcZero) {
1291     APInt::tcSet(parts, 0, partsCount);
1292     return opOK;
1293   }
1294
1295   /* Shift the bit pattern so the fraction is lost.  */
1296   APFloat tmp(*this);
1297
1298   bits = (int) semantics->precision - 1 - exponent;
1299
1300   if(bits > 0) {
1301     lost_fraction = tmp.shiftSignificandRight(bits);
1302   } else {
1303     tmp.shiftSignificandLeft(-bits);
1304     lost_fraction = lfExactlyZero;
1305   }
1306
1307   if(lost_fraction != lfExactlyZero
1308      && tmp.roundAwayFromZero(rounding_mode, lost_fraction))
1309     tmp.incrementSignificand();
1310
1311   msb = tmp.significandMSB();
1312
1313   /* Negative numbers cannot be represented as unsigned.  */
1314   if(!isSigned && tmp.sign && msb != -1U)
1315     return opInvalidOp;
1316
1317   /* It takes exponent + 1 bits to represent the truncated floating
1318      point number without its sign.  We lose a bit for the sign, but
1319      the maximally negative integer is a special case.  */
1320   if(msb + 1 > width)           /* !! Not same as msb >= width !! */
1321     return opInvalidOp;
1322
1323   if(isSigned && msb + 1 == width
1324      && (!tmp.sign || tmp.significandLSB() != msb))
1325     return opInvalidOp;
1326
1327   APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1328
1329   if(tmp.sign)
1330     APInt::tcNegate(parts, partsCount);
1331
1332   if(lost_fraction == lfExactlyZero)
1333     return opOK;
1334   else
1335     return opInexact;
1336 }
1337
1338 APFloat::opStatus
1339 APFloat::convertFromUnsignedInteger(integerPart *parts,
1340                                     unsigned int partCount,
1341                                     roundingMode rounding_mode)
1342 {
1343   unsigned int msb, precision;
1344   lostFraction lost_fraction;
1345
1346   msb = APInt::tcMSB(parts, partCount) + 1;
1347   precision = semantics->precision;
1348
1349   category = fcNormal;
1350   exponent = precision - 1;
1351
1352   if(msb > precision) {
1353     exponent += (msb - precision);
1354     lost_fraction = shiftRight(parts, partCount, msb - precision);
1355     msb = precision;
1356   } else
1357     lost_fraction = lfExactlyZero;
1358
1359   /* Copy the bit image.  */
1360   zeroSignificand();
1361   APInt::tcAssign(significandParts(), parts, partCountForBits(msb));
1362
1363   return normalize(rounding_mode, lost_fraction);
1364 }
1365
1366 APFloat::opStatus
1367 APFloat::convertFromInteger(const integerPart *parts,
1368                             unsigned int partCount, bool isSigned,
1369                             roundingMode rounding_mode)
1370 {
1371   unsigned int width;
1372   opStatus status;
1373   integerPart *copy;
1374
1375   copy = new integerPart[partCount];
1376   APInt::tcAssign(copy, parts, partCount);
1377
1378   width = partCount * integerPartWidth;
1379
1380   sign = false;
1381   if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1382     sign = true;
1383     APInt::tcNegate(copy, partCount);
1384   }
1385
1386   status = convertFromUnsignedInteger(copy, partCount, rounding_mode);
1387   delete [] copy;
1388
1389   return status;
1390 }
1391
1392 APFloat::opStatus
1393 APFloat::convertFromHexadecimalString(const char *p,
1394                                       roundingMode rounding_mode)
1395 {
1396   lostFraction lost_fraction;
1397   integerPart *significand;
1398   unsigned int bitPos, partsCount;
1399   const char *dot, *firstSignificantDigit;
1400
1401   zeroSignificand();
1402   exponent = 0;
1403   category = fcNormal;
1404
1405   significand = significandParts();
1406   partsCount = partCount();
1407   bitPos = partsCount * integerPartWidth;
1408
1409   /* Skip leading zeroes and any(hexa)decimal point.  */
1410   p = skipLeadingZeroesAndAnyDot(p, &dot);
1411   firstSignificantDigit = p;
1412
1413   for(;;) {
1414     integerPart hex_value;
1415
1416     if(*p == '.') {
1417       assert(dot == 0);
1418       dot = p++;
1419     }
1420
1421     hex_value = hexDigitValue(*p);
1422     if(hex_value == -1U) {
1423       lost_fraction = lfExactlyZero;
1424       break;
1425     }
1426
1427     p++;
1428
1429     /* Store the number whilst 4-bit nibbles remain.  */
1430     if(bitPos) {
1431       bitPos -= 4;
1432       hex_value <<= bitPos % integerPartWidth;
1433       significand[bitPos / integerPartWidth] |= hex_value;
1434     } else {
1435       lost_fraction = trailingHexadecimalFraction(p, hex_value);
1436       while(hexDigitValue(*p) != -1U)
1437         p++;
1438       break;
1439     }
1440   }
1441
1442   /* Hex floats require an exponent but not a hexadecimal point.  */
1443   assert(*p == 'p' || *p == 'P');
1444
1445   /* Ignore the exponent if we are zero.  */
1446   if(p != firstSignificantDigit) {
1447     int expAdjustment;
1448
1449     /* Implicit hexadecimal point?  */
1450     if(!dot)
1451       dot = p;
1452
1453     /* Calculate the exponent adjustment implicit in the number of
1454        significant digits.  */
1455     expAdjustment = dot - firstSignificantDigit;
1456     if(expAdjustment < 0)
1457       expAdjustment++;
1458     expAdjustment = expAdjustment * 4 - 1;
1459
1460     /* Adjust for writing the significand starting at the most
1461        significant nibble.  */
1462     expAdjustment += semantics->precision;
1463     expAdjustment -= partsCount * integerPartWidth;
1464
1465     /* Adjust for the given exponent.  */
1466     exponent = totalExponent(p, expAdjustment);
1467   }
1468
1469   return normalize(rounding_mode, lost_fraction);
1470 }
1471
1472 APFloat::opStatus
1473 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
1474 {
1475   /* Handle a leading minus sign.  */
1476   if(*p == '-')
1477     sign = 1, p++;
1478   else
1479     sign = 0;
1480
1481   if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
1482     return convertFromHexadecimalString(p + 2, rounding_mode);
1483   else
1484     {
1485       assert(0 && "Decimal to binary conversions not yet imlemented");
1486       abort();
1487     }
1488 }