1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
23 #include <cstring>
24 #include <limits.h>
25 
26 using namespace llvm;
27 
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
31 ///
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
35 
36 /* Assumed in hexadecimal significand parsing, and conversion to
37    hexadecimal strings.  */
38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
39 
40 namespace llvm {
41 
42   /* Represents floating point arithmetic semantics.  */
43   struct fltSemantics {
44     /* The largest E such that 2^E is representable; this matches the
45        definition of IEEE 754.  */
46     APFloat::ExponentType maxExponent;
47 
48     /* The smallest E such that 2^E is a normalized number; this
49        matches the definition of IEEE 754.  */
50     APFloat::ExponentType minExponent;
51 
52     /* Number of bits in the significand.  This includes the integer
53        bit.  */
54     unsigned int precision;
55 
56     /* Number of bits actually used in the semantics. */
57     unsigned int sizeInBits;
58   };
59 
60   const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
61   const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
62   const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
63   const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
64   const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
65   const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
66 
67   /* The PowerPC format consists of two doubles.  It does not map cleanly
68      onto the usual format above.  It is approximated using twice the
69      mantissa bits.  Note that for exponents near the double minimum,
70      we no longer can represent the full 106 mantissa bits, so those
71      will be treated as denormal numbers.
72 
73      FIXME: While this approximation is equivalent to what GCC uses for
74      compile-time arithmetic on PPC double-double numbers, it is not able
75      to represent all possible values held by a PPC double-double number,
76      for example: (long double) 1.0 + (long double) 0x1p-106
77      Should this be replaced by a full emulation of PPC double-double?  */
78   const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
79 
80   /* A tight upper bound on number of parts required to hold the value
81      pow(5, power) is
82 
83        power * 815 / (351 * integerPartWidth) + 1
84 
85      However, whilst the result may require only this many parts,
86      because we are multiplying two values to get it, the
87      multiplication may require an extra part with the excess part
88      being zero (consider the trivial case of 1 * 1, tcFullMultiply
89      requires two parts to hold the single-part result).  So we add an
90      extra one to guarantee enough space whilst multiplying.  */
91   const unsigned int maxExponent = 16383;
92   const unsigned int maxPrecision = 113;
93   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
94   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
95                                                 / (351 * integerPartWidth));
96 }
97 
98 /* A bunch of private, handy routines.  */
99 
100 static inline unsigned int
partCountForBits(unsigned int bits)101 partCountForBits(unsigned int bits)
102 {
103   return ((bits) + integerPartWidth - 1) / integerPartWidth;
104 }
105 
106 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
107 static inline unsigned int
decDigitValue(unsigned int c)108 decDigitValue(unsigned int c)
109 {
110   return c - '0';
111 }
112 
113 /* Return the value of a decimal exponent of the form
114    [+-]ddddddd.
115 
116    If the exponent overflows, returns a large exponent with the
117    appropriate sign.  */
118 static int
readExponent(StringRef::iterator begin,StringRef::iterator end,APFloat::opStatus & fp)119 readExponent(StringRef::iterator begin, StringRef::iterator end, APFloat::opStatus &fp)
120 {
121   bool isNegative;
122   unsigned int absExponent;
123   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
124   StringRef::iterator p = begin;
125 
126   fp = APFloat::opOK;
127 
128   //assert(p != end && "Exponent has no digits"); // qq
129   if (p == end) {
130       fp = APFloat::opInvalidOp;
131       return 0;
132   }
133 
134   isNegative = (*p == '-');
135   if (*p == '-' || *p == '+') {
136     p++;
137     //assert(p != end && "Exponent has no digits");
138     if (p == end) {
139       fp = APFloat::opInvalidOp;
140       return 0;
141     }
142   }
143 
144   absExponent = decDigitValue(*p++);
145   //assert(absExponent < 10U && "Invalid character in exponent");
146   if (absExponent >= 10U) {
147       fp = APFloat::opInvalidOp;
148       return 0;
149   }
150 
151   for (; p != end; ++p) {
152     unsigned int value;
153 
154     value = decDigitValue(*p);
155     //assert(value < 10U && "Invalid character in exponent");
156     if (value >= 10U) {
157         fp = APFloat::opInvalidOp;
158         return 0;
159     }
160 
161     value += absExponent * 10;
162     if (absExponent >= overlargeExponent) {
163       absExponent = overlargeExponent;
164       p = end;  /* outwit assert below */
165       break;
166     }
167     absExponent = value;
168   }
169 
170   //assert(p == end && "Invalid exponent in exponent");
171   if (p != end) {
172       fp = APFloat::opInvalidOp;
173       return 0;
174   }
175 
176   if (isNegative)
177     return -(int) absExponent;
178   else
179     return (int) absExponent;
180 }
181 
182 /* This is ugly and needs cleaning up, but I don't immediately see
183    how whilst remaining safe.  */
184 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)185 totalExponent(StringRef::iterator p, StringRef::iterator end,
186               int exponentAdjustment)
187 {
188   int unsignedExponent;
189   bool negative, overflow;
190   int exponent = 0;
191 
192   assert(p != end && "Exponent has no digits");
193 
194   negative = *p == '-';
195   if (*p == '-' || *p == '+') {
196     p++;
197     assert(p != end && "Exponent has no digits");
198   }
199 
200   unsignedExponent = 0;
201   overflow = false;
202   for (; p != end; ++p) {
203     unsigned int value;
204 
205     value = decDigitValue(*p);
206     assert(value < 10U && "Invalid character in exponent");
207 
208     unsignedExponent = unsignedExponent * 10 + value;
209     if (unsignedExponent > 32767) {
210       overflow = true;
211       break;
212     }
213   }
214 
215   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
216     overflow = true;
217 
218   if (!overflow) {
219     exponent = unsignedExponent;
220     if (negative)
221       exponent = -exponent;
222     exponent += exponentAdjustment;
223     if (exponent > 32767 || exponent < -32768)
224       overflow = true;
225   }
226 
227   if (overflow)
228     exponent = negative ? -32768: 32767;
229 
230   return exponent;
231 }
232 
233 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)234 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
235                            StringRef::iterator *dot)
236 {
237   StringRef::iterator p = begin;
238   *dot = end;
239   while (p != end && *p == '0')
240     p++;
241 
242   if (p != end && *p == '.') {
243     *dot = p++;
244 
245     assert(end - begin != 1 && "Significand has no digits");
246 
247     while (p != end && *p == '0')
248       p++;
249   }
250 
251   return p;
252 }
253 
254 /* Given a normal decimal floating point number of the form
255 
256      dddd.dddd[eE][+-]ddd
257 
258    where the decimal point and exponent are optional, fill out the
259    structure D.  Exponent is appropriate if the significand is
260    treated as an integer, and normalizedExponent if the significand
261    is taken to have the decimal point after a single leading
262    non-zero digit.
263 
264    If the value is zero, V->firstSigDigit points to a non-digit, and
265    the return exponent is zero.
266 */
267 struct decimalInfo {
268   const char *firstSigDigit;
269   const char *lastSigDigit;
270   int exponent;
271   int normalizedExponent;
272 };
273 
274 APFloat::opStatus
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)275 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
276                  decimalInfo *D)
277 {
278   StringRef::iterator dot = end;
279   StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
280   APFloat::opStatus fp;
281 
282   D->firstSigDigit = p;
283   D->exponent = 0;
284   D->normalizedExponent = 0;
285 
286   for (; p != end; ++p) {
287     if (*p == '.') {
288       //assert(dot == end && "String contains multiple dots");
289       if (dot != end)
290           return APFloat::opInvalidOp;
291       dot = p++;
292       if (p == end)
293         break;
294     }
295     if (decDigitValue(*p) >= 10U)
296       break;
297   }
298 
299   if (p != end) {
300     //assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
301     if (*p != 'e' && *p != 'E')
302         return APFloat::opInvalidOp;
303     //assert(p != begin && "Significand has no digits");
304     if (p == begin)
305         return APFloat::opInvalidOp;
306     //assert((dot == end || p - begin != 1) && "Significand has no digits");
307     if (dot != end && p - begin == 1)
308         return APFloat::opInvalidOp;
309 
310     /* p points to the first non-digit in the string */
311     D->exponent = readExponent(p + 1, end, fp); // qq
312     if (fp)
313         return fp;
314 
315     /* Implied decimal point?  */
316     if (dot == end)
317       dot = p;
318   }
319 
320   /* If number is all zeroes accept any exponent.  */
321   if (p != D->firstSigDigit) {
322     /* Drop insignificant trailing zeroes.  */
323     if (p != begin) {
324       do
325         do
326           p--;
327         while (p != begin && *p == '0');
328       while (p != begin && *p == '.');
329     }
330 
331     /* Adjust the exponents for any decimal point.  */
332     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
333     D->normalizedExponent = (D->exponent +
334               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
335                                       - (dot > D->firstSigDigit && dot < p)));
336   }
337 
338   D->lastSigDigit = p;
339 
340   return APFloat::opOK;
341 }
342 
343 /* Return the trailing fraction of a hexadecimal number.
344    DIGITVALUE is the first hex digit of the fraction, P points to
345    the next digit.  */
346 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)347 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
348                             unsigned int digitValue)
349 {
350   unsigned int hexDigit;
351 
352   /* If the first trailing digit isn't 0 or 8 we can work out the
353      fraction immediately.  */
354   if (digitValue > 8)
355     return lfMoreThanHalf;
356   else if (digitValue < 8 && digitValue > 0)
357     return lfLessThanHalf;
358 
359   // Otherwise we need to find the first non-zero digit.
360   while (p != end && (*p == '0' || *p == '.'))
361     p++;
362 
363   assert(p != end && "Invalid trailing hexadecimal fraction!");
364 
365   hexDigit = hexDigitValue(*p);
366 
367   /* If we ran off the end it is exactly zero or one-half, otherwise
368      a little more.  */
369   if (hexDigit == -1U)
370     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
371   else
372     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
373 }
374 
375 /* Return the fraction lost were a bignum truncated losing the least
376    significant BITS bits.  */
377 static lostFraction
lostFractionThroughTruncation(const integerPart * parts,unsigned int partCount,unsigned int bits)378 lostFractionThroughTruncation(const integerPart *parts,
379                               unsigned int partCount,
380                               unsigned int bits)
381 {
382   unsigned int lsb;
383 
384   lsb = APInt::tcLSB(parts, partCount);
385 
386   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
387   if (bits <= lsb)
388     return lfExactlyZero;
389   if (bits == lsb + 1)
390     return lfExactlyHalf;
391   if (bits <= partCount * integerPartWidth &&
392       APInt::tcExtractBit(parts, bits - 1))
393     return lfMoreThanHalf;
394 
395   return lfLessThanHalf;
396 }
397 
398 /* Shift DST right BITS bits noting lost fraction.  */
399 static lostFraction
shiftRight(integerPart * dst,unsigned int parts,unsigned int bits)400 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
401 {
402   lostFraction lost_fraction;
403 
404   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
405 
406   APInt::tcShiftRight(dst, parts, bits);
407 
408   return lost_fraction;
409 }
410 
411 /* Combine the effect of two lost fractions.  */
412 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)413 combineLostFractions(lostFraction moreSignificant,
414                      lostFraction lessSignificant)
415 {
416   if (lessSignificant != lfExactlyZero) {
417     if (moreSignificant == lfExactlyZero)
418       moreSignificant = lfLessThanHalf;
419     else if (moreSignificant == lfExactlyHalf)
420       moreSignificant = lfMoreThanHalf;
421   }
422 
423   return moreSignificant;
424 }
425 
426 /* The error from the true value, in half-ulps, on multiplying two
427    floating point numbers, which differ from the value they
428    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
429    than the returned value.
430 
431    See "How to Read Floating Point Numbers Accurately" by William D
432    Clinger.  */
433 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)434 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
435 {
436   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
437 
438   if (HUerr1 + HUerr2 == 0)
439     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
440   else
441     return inexactMultiply + 2 * (HUerr1 + HUerr2);
442 }
443 
444 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
445    when the least significant BITS are truncated.  BITS cannot be
446    zero.  */
447 static integerPart
ulpsFromBoundary(const integerPart * parts,unsigned int bits,bool isNearest)448 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
449 {
450   unsigned int count, partBits;
451   integerPart part, boundary;
452 
453   assert(bits != 0);
454 
455   bits--;
456   count = bits / integerPartWidth;
457   partBits = bits % integerPartWidth + 1;
458 
459   part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
460 
461   if (isNearest)
462     boundary = (integerPart) 1 << (partBits - 1);
463   else
464     boundary = 0;
465 
466   if (count == 0) {
467     if (part - boundary <= boundary - part)
468       return part - boundary;
469     else
470       return boundary - part;
471   }
472 
473   if (part == boundary) {
474     while (--count)
475       if (parts[count])
476         return ~(integerPart) 0; /* A lot.  */
477 
478     return parts[0];
479   } else if (part == boundary - 1) {
480     while (--count)
481       if (~parts[count])
482         return ~(integerPart) 0; /* A lot.  */
483 
484     return -parts[0];
485   }
486 
487   return ~(integerPart) 0; /* A lot.  */
488 }
489 
490 /* Place pow(5, power) in DST, and return the number of parts used.
491    DST must be at least one part larger than size of the answer.  */
492 static unsigned int
powerOf5(integerPart * dst,unsigned int power)493 powerOf5(integerPart *dst, unsigned int power)
494 {
495   static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
496                                                   15625, 78125 };
497   integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
498   pow5s[0] = 78125 * 5;
499 
500   unsigned int partsCount[16] = { 1 };
501   integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
502   unsigned int result;
503   assert(power <= maxExponent);
504 
505   p1 = dst;
506   p2 = scratch;
507 
508   *p1 = firstEightPowers[power & 7];
509   power >>= 3;
510 
511   result = 1;
512   pow5 = pow5s;
513 
514   for (unsigned int n = 0; power; power >>= 1, n++) {
515     unsigned int pc;
516 
517     pc = partsCount[n];
518 
519     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
520     if (pc == 0) {
521       pc = partsCount[n - 1];
522       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
523       pc *= 2;
524       if (pow5[pc - 1] == 0)
525         pc--;
526       partsCount[n] = pc;
527     }
528 
529     if (power & 1) {
530       integerPart *tmp;
531 
532       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
533       result += pc;
534       if (p2[result - 1] == 0)
535         result--;
536 
537       /* Now result is in p1 with partsCount parts and p2 is scratch
538          space.  */
539       tmp = p1, p1 = p2, p2 = tmp;
540     }
541 
542     pow5 += pc;
543   }
544 
545   if (p1 != dst)
546     APInt::tcAssign(dst, p1, result);
547 
548   return result;
549 }
550 
551 /* Zero at the end to avoid modular arithmetic when adding one; used
552    when rounding up during hexadecimal output.  */
553 static const char hexDigitsLower[] = "0123456789abcdef0";
554 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
555 static const char infinityL[] = "infinity";
556 static const char infinityU[] = "INFINITY";
557 static const char NaNL[] = "nan";
558 static const char NaNU[] = "NAN";
559 
560 /* Write out an integerPart in hexadecimal, starting with the most
561    significant nibble.  Write out exactly COUNT hexdigits, return
562    COUNT.  */
563 static unsigned int
partAsHex(char * dst,integerPart part,unsigned int count,const char * hexDigitChars)564 partAsHex (char *dst, integerPart part, unsigned int count,
565            const char *hexDigitChars)
566 {
567   unsigned int result = count;
568 
569   assert(count != 0 && count <= integerPartWidth / 4);
570 
571   part >>= (integerPartWidth - 4 * count);
572   while (count--) {
573     dst[count] = hexDigitChars[part & 0xf];
574     part >>= 4;
575   }
576 
577   return result;
578 }
579 
580 /* Write out an unsigned decimal integer.  */
581 static char *
writeUnsignedDecimal(char * dst,unsigned int n)582 writeUnsignedDecimal (char *dst, unsigned int n)
583 {
584   char buff[40], *p;
585 
586   p = buff;
587   do
588     *p++ = '0' + n % 10;
589   while (n /= 10);
590 
591   do
592     *dst++ = *--p;
593   while (p != buff);
594 
595   return dst;
596 }
597 
598 /* Write out a signed decimal integer.  */
599 static char *
writeSignedDecimal(char * dst,int value)600 writeSignedDecimal (char *dst, int value)
601 {
602   if (value < 0) {
603     *dst++ = '-';
604     dst = writeUnsignedDecimal(dst, -(unsigned) value);
605   } else
606     dst = writeUnsignedDecimal(dst, value);
607 
608   return dst;
609 }
610 
611 /* Constructors.  */
612 void
initialize(const fltSemantics * ourSemantics)613 APFloat::initialize(const fltSemantics *ourSemantics)
614 {
615   unsigned int count;
616 
617   semantics = ourSemantics;
618   count = partCount();
619   if (count > 1)
620     significand.parts = new integerPart[count];
621 }
622 
623 void
freeSignificand()624 APFloat::freeSignificand()
625 {
626   if (needsCleanup())
627     delete [] significand.parts;
628 }
629 
630 void
assign(const APFloat & rhs)631 APFloat::assign(const APFloat &rhs)
632 {
633   assert(semantics == rhs.semantics);
634 
635   sign = rhs.sign;
636   category = rhs.category;
637   exponent = rhs.exponent;
638   if (isFiniteNonZero() || category == fcNaN)
639     copySignificand(rhs);
640 }
641 
642 void
copySignificand(const APFloat & rhs)643 APFloat::copySignificand(const APFloat &rhs)
644 {
645   assert(isFiniteNonZero() || category == fcNaN);
646   assert(rhs.partCount() >= partCount());
647 
648   APInt::tcAssign(significandParts(), rhs.significandParts(),
649                   partCount());
650 }
651 
652 /* Make this number a NaN, with an arbitrary but deterministic value
653    for the significand.  If double or longer, this is a signalling NaN,
654    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)655 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
656 {
657   category = fcNaN;
658   sign = Negative;
659 
660   integerPart *significand = significandParts();
661   unsigned numParts = partCount();
662 
663   // Set the significand bits to the fill.
664   if (!fill || fill->getNumWords() < numParts)
665     APInt::tcSet(significand, 0, numParts);
666   if (fill) {
667     APInt::tcAssign(significand, fill->getRawData(),
668                     std::min(fill->getNumWords(), numParts));
669 
670     // Zero out the excess bits of the significand.
671     unsigned bitsToPreserve = semantics->precision - 1;
672     unsigned part = bitsToPreserve / 64;
673     bitsToPreserve %= 64;
674     significand[part] &= ((1ULL << bitsToPreserve) - 1);
675     for (part++; part != numParts; ++part)
676       significand[part] = 0;
677   }
678 
679   unsigned QNaNBit = semantics->precision - 2;
680 
681   if (SNaN) {
682     // We always have to clear the QNaN bit to make it an SNaN.
683     APInt::tcClearBit(significand, QNaNBit);
684 
685     // If there are no bits set in the payload, we have to set
686     // *something* to make it a NaN instead of an infinity;
687     // conventionally, this is the next bit down from the QNaN bit.
688     if (APInt::tcIsZero(significand, numParts))
689       APInt::tcSetBit(significand, QNaNBit - 1);
690   } else {
691     // We always have to set the QNaN bit to make it a QNaN.
692     APInt::tcSetBit(significand, QNaNBit);
693   }
694 
695   // For x87 extended precision, we want to make a NaN, not a
696   // pseudo-NaN.  Maybe we should expose the ability to make
697   // pseudo-NaNs?
698   if (semantics == &APFloat::x87DoubleExtended)
699     APInt::tcSetBit(significand, QNaNBit + 1);
700 }
701 
makeNaN(const fltSemantics & Sem,bool SNaN,bool Negative,const APInt * fill)702 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
703                          const APInt *fill) {
704   APFloat value(Sem, uninitialized);
705   value.makeNaN(SNaN, Negative, fill);
706   return value;
707 }
708 
709 APFloat &
operator =(const APFloat & rhs)710 APFloat::operator=(const APFloat &rhs)
711 {
712   if (this != &rhs) {
713     if (semantics != rhs.semantics) {
714       freeSignificand();
715       initialize(rhs.semantics);
716     }
717     assign(rhs);
718   }
719 
720   return *this;
721 }
722 
723 APFloat &
operator =(APFloat && rhs)724 APFloat::operator=(APFloat &&rhs) {
725   freeSignificand();
726 
727   semantics = rhs.semantics;
728   significand = rhs.significand;
729   exponent = rhs.exponent;
730   category = rhs.category;
731   sign = rhs.sign;
732 
733   rhs.semantics = &Bogus;
734   return *this;
735 }
736 
737 bool
isDenormal() const738 APFloat::isDenormal() const {
739   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
740          (APInt::tcExtractBit(significandParts(),
741                               semantics->precision - 1) == 0);
742 }
743 
744 bool
isSmallest() const745 APFloat::isSmallest() const {
746   // The smallest number by magnitude in our format will be the smallest
747   // denormal, i.e. the floating point number with exponent being minimum
748   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
749   return isFiniteNonZero() && exponent == semantics->minExponent &&
750     significandMSB() == 0;
751 }
752 
isSignificandAllOnes() const753 bool APFloat::isSignificandAllOnes() const {
754   // Test if the significand excluding the integral bit is all ones. This allows
755   // us to test for binade boundaries.
756   const integerPart *Parts = significandParts();
757   const unsigned PartCount = partCount();
758   for (unsigned i = 0; i < PartCount - 1; i++)
759     if (~Parts[i])
760       return false;
761 
762   // Set the unused high bits to all ones when we compare.
763   const unsigned NumHighBits =
764     PartCount*integerPartWidth - semantics->precision + 1;
765   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
766          "fill than integerPartWidth");
767   const integerPart HighBitFill =
768     ~integerPart(0) << (integerPartWidth - NumHighBits);
769   if (~(Parts[PartCount - 1] | HighBitFill))
770     return false;
771 
772   return true;
773 }
774 
isSignificandAllZeros() const775 bool APFloat::isSignificandAllZeros() const {
776   // Test if the significand excluding the integral bit is all zeros. This
777   // allows us to test for binade boundaries.
778   const integerPart *Parts = significandParts();
779   const unsigned PartCount = partCount();
780 
781   for (unsigned i = 0; i < PartCount - 1; i++)
782     if (Parts[i])
783       return false;
784 
785   const unsigned NumHighBits =
786     PartCount*integerPartWidth - semantics->precision + 1;
787   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
788          "clear than integerPartWidth");
789   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
790 
791   if (Parts[PartCount - 1] & HighBitMask)
792     return false;
793 
794   return true;
795 }
796 
797 bool
isLargest() const798 APFloat::isLargest() const {
799   // The largest number by magnitude in our format will be the floating point
800   // number with maximum exponent and with significand that is all ones.
801   return isFiniteNonZero() && exponent == semantics->maxExponent
802     && isSignificandAllOnes();
803 }
804 
805 bool
isInteger() const806 APFloat::isInteger() const {
807   // This could be made more efficient; I'm going for obviously correct.
808   if (!isFinite()) return false;
809   APFloat truncated = *this;
810   truncated.roundToIntegral(rmTowardZero);
811   return compare(truncated) == cmpEqual;
812 }
813 
814 bool
bitwiseIsEqual(const APFloat & rhs) const815 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
816   if (this == &rhs)
817     return true;
818   if (semantics != rhs.semantics ||
819       category != rhs.category ||
820       sign != rhs.sign)
821     return false;
822   if (category==fcZero || category==fcInfinity)
823     return true;
824 
825   if (isFiniteNonZero() && exponent != rhs.exponent)
826     return false;
827 
828   return std::equal(significandParts(), significandParts() + partCount(),
829                     rhs.significandParts());
830 }
831 
APFloat(const fltSemantics & ourSemantics,integerPart value)832 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
833   initialize(&ourSemantics);
834   sign = 0;
835   category = fcNormal;
836   zeroSignificand();
837   exponent = ourSemantics.precision - 1;
838   significandParts()[0] = value;
839   normalize(rmNearestTiesToEven, lfExactlyZero);
840 }
841 
APFloat(const fltSemantics & ourSemantics)842 APFloat::APFloat(const fltSemantics &ourSemantics) {
843   initialize(&ourSemantics);
844   category = fcZero;
845   sign = false;
846 }
847 
APFloat(const fltSemantics & ourSemantics,uninitializedTag tag)848 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
849   // Allocates storage if necessary but does not initialize it.
850   initialize(&ourSemantics);
851 }
852 
APFloat(const fltSemantics & ourSemantics,StringRef text)853 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
854   initialize(&ourSemantics);
855   convertFromString(text, rmNearestTiesToEven);
856 }
857 
APFloat(const APFloat & rhs)858 APFloat::APFloat(const APFloat &rhs) {
859   initialize(rhs.semantics);
860   assign(rhs);
861 }
862 
APFloat(APFloat && rhs)863 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
864   *this = std::move(rhs);
865 }
866 
~APFloat()867 APFloat::~APFloat()
868 {
869   freeSignificand();
870 }
871 
872 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const873 void APFloat::Profile(FoldingSetNodeID& ID) const {
874   ID.Add(bitcastToAPInt());
875 }
876 
877 unsigned int
partCount() const878 APFloat::partCount() const
879 {
880   return partCountForBits(semantics->precision + 1);
881 }
882 
883 unsigned int
semanticsPrecision(const fltSemantics & semantics)884 APFloat::semanticsPrecision(const fltSemantics &semantics)
885 {
886   return semantics.precision;
887 }
888 APFloat::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)889 APFloat::semanticsMaxExponent(const fltSemantics &semantics)
890 {
891   return semantics.maxExponent;
892 }
893 APFloat::ExponentType
semanticsMinExponent(const fltSemantics & semantics)894 APFloat::semanticsMinExponent(const fltSemantics &semantics)
895 {
896   return semantics.minExponent;
897 }
898 unsigned int
semanticsSizeInBits(const fltSemantics & semantics)899 APFloat::semanticsSizeInBits(const fltSemantics &semantics)
900 {
901   return semantics.sizeInBits;
902 }
903 
904 const integerPart *
significandParts() const905 APFloat::significandParts() const
906 {
907   return const_cast<APFloat *>(this)->significandParts();
908 }
909 
910 integerPart *
significandParts()911 APFloat::significandParts()
912 {
913   if (partCount() > 1)
914     return significand.parts;
915   else
916     return &significand.part;
917 }
918 
919 void
zeroSignificand()920 APFloat::zeroSignificand()
921 {
922   APInt::tcSet(significandParts(), 0, partCount());
923 }
924 
925 /* Increment an fcNormal floating point number's significand.  */
926 void
incrementSignificand()927 APFloat::incrementSignificand()
928 {
929   integerPart carry;
930 
931   carry = APInt::tcIncrement(significandParts(), partCount());
932 
933   /* Our callers should never cause us to overflow.  */
934   assert(carry == 0);
935   (void)carry;
936 }
937 
938 /* Add the significand of the RHS.  Returns the carry flag.  */
939 integerPart
addSignificand(const APFloat & rhs)940 APFloat::addSignificand(const APFloat &rhs)
941 {
942   integerPart *parts;
943 
944   parts = significandParts();
945 
946   assert(semantics == rhs.semantics);
947   assert(exponent == rhs.exponent);
948 
949   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
950 }
951 
952 /* Subtract the significand of the RHS with a borrow flag.  Returns
953    the borrow flag.  */
954 integerPart
subtractSignificand(const APFloat & rhs,integerPart borrow)955 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
956 {
957   integerPart *parts;
958 
959   parts = significandParts();
960 
961   assert(semantics == rhs.semantics);
962   assert(exponent == rhs.exponent);
963 
964   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
965                            partCount());
966 }
967 
968 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
969    on to the full-precision result of the multiplication.  Returns the
970    lost fraction.  */
971 lostFraction
multiplySignificand(const APFloat & rhs,const APFloat * addend)972 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
973 {
974   unsigned int omsb;        // One, not zero, based MSB.
975   unsigned int partsCount, newPartsCount, precision;
976   integerPart *lhsSignificand;
977   integerPart scratch[4];
978   integerPart *fullSignificand;
979   lostFraction lost_fraction;
980   bool ignored;
981 
982   assert(semantics == rhs.semantics);
983 
984   precision = semantics->precision;
985 
986   // Allocate space for twice as many bits as the original significand, plus one
987   // extra bit for the addition to overflow into.
988   newPartsCount = partCountForBits(precision * 2 + 1);
989 
990   if (newPartsCount > 4)
991     fullSignificand = new integerPart[newPartsCount];
992   else
993     fullSignificand = scratch;
994 
995   lhsSignificand = significandParts();
996   partsCount = partCount();
997 
998   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
999                         rhs.significandParts(), partsCount, partsCount);
1000 
1001   lost_fraction = lfExactlyZero;
1002   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1003   exponent += rhs.exponent;
1004 
1005   // Assume the operands involved in the multiplication are single-precision
1006   // FP, and the two multiplicants are:
1007   //   *this = a23 . a22 ... a0 * 2^e1
1008   //     rhs = b23 . b22 ... b0 * 2^e2
1009   // the result of multiplication is:
1010   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1011   // Note that there are three significant bits at the left-hand side of the
1012   // radix point: two for the multiplication, and an overflow bit for the
1013   // addition (that will always be zero at this point). Move the radix point
1014   // toward left by two bits, and adjust exponent accordingly.
1015   exponent += 2;
1016 
1017   if (addend && addend->isNonZero()) {
1018     // The intermediate result of the multiplication has "2 * precision"
1019     // signicant bit; adjust the addend to be consistent with mul result.
1020     //
1021     Significand savedSignificand = significand;
1022     const fltSemantics *savedSemantics = semantics;
1023     fltSemantics extendedSemantics;
1024     opStatus status;
1025     unsigned int extendedPrecision;
1026 
1027     // Normalize our MSB to one below the top bit to allow for overflow.
1028     extendedPrecision = 2 * precision + 1;
1029     if (omsb != extendedPrecision - 1) {
1030       assert(extendedPrecision > omsb);
1031       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1032                          (extendedPrecision - 1) - omsb);
1033       exponent -= (extendedPrecision - 1) - omsb;
1034     }
1035 
1036     /* Create new semantics.  */
1037     extendedSemantics = *semantics;
1038     extendedSemantics.precision = extendedPrecision;
1039 
1040     if (newPartsCount == 1)
1041       significand.part = fullSignificand[0];
1042     else
1043       significand.parts = fullSignificand;
1044     semantics = &extendedSemantics;
1045 
1046     APFloat extendedAddend(*addend);
1047     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1048     assert(status == opOK);
1049     (void)status;
1050 
1051     // Shift the significand of the addend right by one bit. This guarantees
1052     // that the high bit of the significand is zero (same as fullSignificand),
1053     // so the addition will overflow (if it does overflow at all) into the top bit.
1054     lost_fraction = extendedAddend.shiftSignificandRight(1);
1055     assert(lost_fraction == lfExactlyZero &&
1056            "Lost precision while shifting addend for fused-multiply-add.");
1057 
1058     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1059 
1060     /* Restore our state.  */
1061     if (newPartsCount == 1)
1062       fullSignificand[0] = significand.part;
1063     significand = savedSignificand;
1064     semantics = savedSemantics;
1065 
1066     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1067   }
1068 
1069   // Convert the result having "2 * precision" significant-bits back to the one
1070   // having "precision" significant-bits. First, move the radix point from
1071   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1072   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1073   exponent -= precision + 1;
1074 
1075   // In case MSB resides at the left-hand side of radix point, shift the
1076   // mantissa right by some amount to make sure the MSB reside right before
1077   // the radix point (i.e. "MSB . rest-significant-bits").
1078   //
1079   // Note that the result is not normalized when "omsb < precision". So, the
1080   // caller needs to call APFloat::normalize() if normalized value is expected.
1081   if (omsb > precision) {
1082     unsigned int bits, significantParts;
1083     lostFraction lf;
1084 
1085     bits = omsb - precision;
1086     significantParts = partCountForBits(omsb);
1087     lf = shiftRight(fullSignificand, significantParts, bits);
1088     lost_fraction = combineLostFractions(lf, lost_fraction);
1089     exponent += bits;
1090   }
1091 
1092   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1093 
1094   if (newPartsCount > 4)
1095     delete [] fullSignificand;
1096 
1097   return lost_fraction;
1098 }
1099 
1100 /* Multiply the significands of LHS and RHS to DST.  */
1101 lostFraction
divideSignificand(const APFloat & rhs)1102 APFloat::divideSignificand(const APFloat &rhs)
1103 {
1104   unsigned int bit, i, partsCount;
1105   const integerPart *rhsSignificand;
1106   integerPart *lhsSignificand, *dividend, *divisor;
1107   integerPart scratch[4];
1108   lostFraction lost_fraction;
1109 
1110   assert(semantics == rhs.semantics);
1111 
1112   lhsSignificand = significandParts();
1113   rhsSignificand = rhs.significandParts();
1114   partsCount = partCount();
1115 
1116   if (partsCount > 2)
1117     dividend = new integerPart[partsCount * 2];
1118   else
1119     dividend = scratch;
1120 
1121   divisor = dividend + partsCount;
1122 
1123   /* Copy the dividend and divisor as they will be modified in-place.  */
1124   for (i = 0; i < partsCount; i++) {
1125     dividend[i] = lhsSignificand[i];
1126     divisor[i] = rhsSignificand[i];
1127     lhsSignificand[i] = 0;
1128   }
1129 
1130   exponent -= rhs.exponent;
1131 
1132   unsigned int precision = semantics->precision;
1133 
1134   /* Normalize the divisor.  */
1135   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1136   if (bit) {
1137     exponent += bit;
1138     APInt::tcShiftLeft(divisor, partsCount, bit);
1139   }
1140 
1141   /* Normalize the dividend.  */
1142   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1143   if (bit) {
1144     exponent -= bit;
1145     APInt::tcShiftLeft(dividend, partsCount, bit);
1146   }
1147 
1148   /* Ensure the dividend >= divisor initially for the loop below.
1149      Incidentally, this means that the division loop below is
1150      guaranteed to set the integer bit to one.  */
1151   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1152     exponent--;
1153     APInt::tcShiftLeft(dividend, partsCount, 1);
1154     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1155   }
1156 
1157   /* Long division.  */
1158   for (bit = precision; bit; bit -= 1) {
1159     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1160       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1161       APInt::tcSetBit(lhsSignificand, bit - 1);
1162     }
1163 
1164     APInt::tcShiftLeft(dividend, partsCount, 1);
1165   }
1166 
1167   /* Figure out the lost fraction.  */
1168   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1169 
1170   if (cmp > 0)
1171     lost_fraction = lfMoreThanHalf;
1172   else if (cmp == 0)
1173     lost_fraction = lfExactlyHalf;
1174   else if (APInt::tcIsZero(dividend, partsCount))
1175     lost_fraction = lfExactlyZero;
1176   else
1177     lost_fraction = lfLessThanHalf;
1178 
1179   if (partsCount > 2)
1180     delete [] dividend;
1181 
1182   return lost_fraction;
1183 }
1184 
1185 unsigned int
significandMSB() const1186 APFloat::significandMSB() const
1187 {
1188   return APInt::tcMSB(significandParts(), partCount());
1189 }
1190 
1191 unsigned int
significandLSB() const1192 APFloat::significandLSB() const
1193 {
1194   return APInt::tcLSB(significandParts(), partCount());
1195 }
1196 
1197 /* Note that a zero result is NOT normalized to fcZero.  */
1198 lostFraction
shiftSignificandRight(unsigned int bits)1199 APFloat::shiftSignificandRight(unsigned int bits)
1200 {
1201   /* Our exponent should not overflow.  */
1202   assert((ExponentType) (exponent + bits) >= exponent);
1203 
1204   exponent += bits;
1205 
1206   return shiftRight(significandParts(), partCount(), bits);
1207 }
1208 
1209 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1210 void
shiftSignificandLeft(unsigned int bits)1211 APFloat::shiftSignificandLeft(unsigned int bits)
1212 {
1213   assert(bits < semantics->precision);
1214 
1215   if (bits) {
1216     unsigned int partsCount = partCount();
1217 
1218     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1219     exponent -= bits;
1220 
1221     assert(!APInt::tcIsZero(significandParts(), partsCount));
1222   }
1223 }
1224 
1225 APFloat::cmpResult
compareAbsoluteValue(const APFloat & rhs) const1226 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1227 {
1228   int compare;
1229 
1230   assert(semantics == rhs.semantics);
1231   assert(isFiniteNonZero());
1232   assert(rhs.isFiniteNonZero());
1233 
1234   compare = exponent - rhs.exponent;
1235 
1236   /* If exponents are equal, do an unsigned bignum comparison of the
1237      significands.  */
1238   if (compare == 0)
1239     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1240                                partCount());
1241 
1242   if (compare > 0)
1243     return cmpGreaterThan;
1244   else if (compare < 0)
1245     return cmpLessThan;
1246   else
1247     return cmpEqual;
1248 }
1249 
1250 /* Handle overflow.  Sign is preserved.  We either become infinity or
1251    the largest finite number.  */
1252 APFloat::opStatus
handleOverflow(roundingMode rounding_mode)1253 APFloat::handleOverflow(roundingMode rounding_mode)
1254 {
1255   /* Infinity?  */
1256   if (rounding_mode == rmNearestTiesToEven ||
1257       rounding_mode == rmNearestTiesToAway ||
1258       (rounding_mode == rmTowardPositive && !sign) ||
1259       (rounding_mode == rmTowardNegative && sign)) {
1260     category = fcInfinity;
1261     return (opStatus) (opOverflow | opInexact);
1262   }
1263 
1264   /* Otherwise we become the largest finite number.  */
1265   category = fcNormal;
1266   exponent = semantics->maxExponent;
1267   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1268                                    semantics->precision);
1269 
1270   return opInexact;
1271 }
1272 
1273 /* Returns TRUE if, when truncating the current number, with BIT the
1274    new LSB, with the given lost fraction and rounding mode, the result
1275    would need to be rounded away from zero (i.e., by increasing the
1276    signficand).  This routine must work for fcZero of both signs, and
1277    fcNormal numbers.  */
1278 bool
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1279 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1280                            lostFraction lost_fraction,
1281                            unsigned int bit) const
1282 {
1283   /* NaNs and infinities should not have lost fractions.  */
1284   assert(isFiniteNonZero() || category == fcZero);
1285 
1286   /* Current callers never pass this so we don't handle it.  */
1287   assert(lost_fraction != lfExactlyZero);
1288 
1289   switch (rounding_mode) {
1290   case rmNearestTiesToAway:
1291     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1292 
1293   case rmNearestTiesToEven:
1294     if (lost_fraction == lfMoreThanHalf)
1295       return true;
1296 
1297     /* Our zeroes don't have a significand to test.  */
1298     if (lost_fraction == lfExactlyHalf && category != fcZero)
1299       return APInt::tcExtractBit(significandParts(), bit);
1300 
1301     return false;
1302 
1303   case rmTowardZero:
1304     return false;
1305 
1306   case rmTowardPositive:
1307     return !sign;
1308 
1309   case rmTowardNegative:
1310     return sign;
1311   }
1312   llvm_unreachable("Invalid rounding mode found");
1313 }
1314 
1315 APFloat::opStatus
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1316 APFloat::normalize(roundingMode rounding_mode,
1317                    lostFraction lost_fraction)
1318 {
1319   unsigned int omsb;                /* One, not zero, based MSB.  */
1320   int exponentChange;
1321 
1322   if (!isFiniteNonZero())
1323     return opOK;
1324 
1325   /* Before rounding normalize the exponent of fcNormal numbers.  */
1326   omsb = significandMSB() + 1;
1327 
1328   if (omsb) {
1329     /* OMSB is numbered from 1.  We want to place it in the integer
1330        bit numbered PRECISION if possible, with a compensating change in
1331        the exponent.  */
1332     exponentChange = omsb - semantics->precision;
1333 
1334     /* If the resulting exponent is too high, overflow according to
1335        the rounding mode.  */
1336     if (exponent + exponentChange > semantics->maxExponent)
1337       return handleOverflow(rounding_mode);
1338 
1339     /* Subnormal numbers have exponent minExponent, and their MSB
1340        is forced based on that.  */
1341     if (exponent + exponentChange < semantics->minExponent)
1342       exponentChange = semantics->minExponent - exponent;
1343 
1344     /* Shifting left is easy as we don't lose precision.  */
1345     if (exponentChange < 0) {
1346       assert(lost_fraction == lfExactlyZero);
1347 
1348       shiftSignificandLeft(-exponentChange);
1349 
1350       return opOK;
1351     }
1352 
1353     if (exponentChange > 0) {
1354       lostFraction lf;
1355 
1356       /* Shift right and capture any new lost fraction.  */
1357       lf = shiftSignificandRight(exponentChange);
1358 
1359       lost_fraction = combineLostFractions(lf, lost_fraction);
1360 
1361       /* Keep OMSB up-to-date.  */
1362       if (omsb > (unsigned) exponentChange)
1363         omsb -= exponentChange;
1364       else
1365         omsb = 0;
1366     }
1367   }
1368 
1369   /* Now round the number according to rounding_mode given the lost
1370      fraction.  */
1371 
1372   /* As specified in IEEE 754, since we do not trap we do not report
1373      underflow for exact results.  */
1374   if (lost_fraction == lfExactlyZero) {
1375     /* Canonicalize zeroes.  */
1376     if (omsb == 0)
1377       category = fcZero;
1378 
1379     return opOK;
1380   }
1381 
1382   /* Increment the significand if we're rounding away from zero.  */
1383   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1384     if (omsb == 0)
1385       exponent = semantics->minExponent;
1386 
1387     incrementSignificand();
1388     omsb = significandMSB() + 1;
1389 
1390     /* Did the significand increment overflow?  */
1391     if (omsb == (unsigned) semantics->precision + 1) {
1392       /* Renormalize by incrementing the exponent and shifting our
1393          significand right one.  However if we already have the
1394          maximum exponent we overflow to infinity.  */
1395       if (exponent == semantics->maxExponent) {
1396         category = fcInfinity;
1397 
1398         return (opStatus) (opOverflow | opInexact);
1399       }
1400 
1401       shiftSignificandRight(1);
1402 
1403       return opInexact;
1404     }
1405   }
1406 
1407   /* The normal case - we were and are not denormal, and any
1408      significand increment above didn't overflow.  */
1409   if (omsb == semantics->precision)
1410     return opInexact;
1411 
1412   /* We have a non-zero denormal.  */
1413   assert(omsb < semantics->precision);
1414 
1415   /* Canonicalize zeroes.  */
1416   if (omsb == 0)
1417     category = fcZero;
1418 
1419   /* The fcZero case is a denormal that underflowed to zero.  */
1420   return (opStatus) (opUnderflow | opInexact);
1421 }
1422 
1423 APFloat::opStatus
addOrSubtractSpecials(const APFloat & rhs,bool subtract)1424 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1425 {
1426   switch (PackCategoriesIntoKey(category, rhs.category)) {
1427   default:
1428     llvm_unreachable(nullptr);
1429 
1430   case PackCategoriesIntoKey(fcNaN, fcZero):
1431   case PackCategoriesIntoKey(fcNaN, fcNormal):
1432   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1433   case PackCategoriesIntoKey(fcNaN, fcNaN):
1434   case PackCategoriesIntoKey(fcNormal, fcZero):
1435   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1436   case PackCategoriesIntoKey(fcInfinity, fcZero):
1437     return opOK;
1438 
1439   case PackCategoriesIntoKey(fcZero, fcNaN):
1440   case PackCategoriesIntoKey(fcNormal, fcNaN):
1441   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1442     // We need to be sure to flip the sign here for subtraction because we
1443     // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1444     sign = rhs.sign ^ subtract;
1445     category = fcNaN;
1446     copySignificand(rhs);
1447     return opOK;
1448 
1449   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1450   case PackCategoriesIntoKey(fcZero, fcInfinity):
1451     category = fcInfinity;
1452     sign = rhs.sign ^ subtract;
1453     return opOK;
1454 
1455   case PackCategoriesIntoKey(fcZero, fcNormal):
1456     assign(rhs);
1457     sign = rhs.sign ^ subtract;
1458     return opOK;
1459 
1460   case PackCategoriesIntoKey(fcZero, fcZero):
1461     /* Sign depends on rounding mode; handled by caller.  */
1462     return opOK;
1463 
1464   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1465     /* Differently signed infinities can only be validly
1466        subtracted.  */
1467     if (((sign ^ rhs.sign)!=0) != subtract) {
1468       makeNaN();
1469       return APFloat::opInvalidOp;
1470     }
1471 
1472     return opOK;
1473 
1474   case PackCategoriesIntoKey(fcNormal, fcNormal):
1475     return opDivByZero;
1476   }
1477 }
1478 
1479 /* Add or subtract two normal numbers.  */
1480 lostFraction
addOrSubtractSignificand(const APFloat & rhs,bool subtract)1481 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1482 {
1483   integerPart carry;
1484   lostFraction lost_fraction;
1485   int bits;
1486 
1487   /* Determine if the operation on the absolute values is effectively
1488      an addition or subtraction.  */
1489   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1490 
1491   /* Are we bigger exponent-wise than the RHS?  */
1492   bits = exponent - rhs.exponent;
1493 
1494   /* Subtraction is more subtle than one might naively expect.  */
1495   if (subtract) {
1496     APFloat temp_rhs(rhs);
1497     bool reverse;
1498 
1499     if (bits == 0) {
1500       reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1501       lost_fraction = lfExactlyZero;
1502     } else if (bits > 0) {
1503       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1504       shiftSignificandLeft(1);
1505       reverse = false;
1506     } else {
1507       lost_fraction = shiftSignificandRight(-bits - 1);
1508       temp_rhs.shiftSignificandLeft(1);
1509       reverse = true;
1510     }
1511 
1512     if (reverse) {
1513       carry = temp_rhs.subtractSignificand
1514         (*this, lost_fraction != lfExactlyZero);
1515       copySignificand(temp_rhs);
1516       sign = !sign;
1517     } else {
1518       carry = subtractSignificand
1519         (temp_rhs, lost_fraction != lfExactlyZero);
1520     }
1521 
1522     /* Invert the lost fraction - it was on the RHS and
1523        subtracted.  */
1524     if (lost_fraction == lfLessThanHalf)
1525       lost_fraction = lfMoreThanHalf;
1526     else if (lost_fraction == lfMoreThanHalf)
1527       lost_fraction = lfLessThanHalf;
1528 
1529     /* The code above is intended to ensure that no borrow is
1530        necessary.  */
1531     assert(!carry);
1532     (void)carry;
1533   } else {
1534     if (bits > 0) {
1535       APFloat temp_rhs(rhs);
1536 
1537       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1538       carry = addSignificand(temp_rhs);
1539     } else {
1540       lost_fraction = shiftSignificandRight(-bits);
1541       carry = addSignificand(rhs);
1542     }
1543 
1544     /* We have a guard bit; generating a carry cannot happen.  */
1545     assert(!carry);
1546     (void)carry;
1547   }
1548 
1549   return lost_fraction;
1550 }
1551 
1552 APFloat::opStatus
multiplySpecials(const APFloat & rhs)1553 APFloat::multiplySpecials(const APFloat &rhs)
1554 {
1555   switch (PackCategoriesIntoKey(category, rhs.category)) {
1556   default:
1557     llvm_unreachable(nullptr);
1558 
1559   case PackCategoriesIntoKey(fcNaN, fcZero):
1560   case PackCategoriesIntoKey(fcNaN, fcNormal):
1561   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1562   case PackCategoriesIntoKey(fcNaN, fcNaN):
1563     sign = false;
1564     return opOK;
1565 
1566   case PackCategoriesIntoKey(fcZero, fcNaN):
1567   case PackCategoriesIntoKey(fcNormal, fcNaN):
1568   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1569     sign = false;
1570     category = fcNaN;
1571     copySignificand(rhs);
1572     return opOK;
1573 
1574   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1575   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1576   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1577     category = fcInfinity;
1578     return opOK;
1579 
1580   case PackCategoriesIntoKey(fcZero, fcNormal):
1581   case PackCategoriesIntoKey(fcNormal, fcZero):
1582   case PackCategoriesIntoKey(fcZero, fcZero):
1583     category = fcZero;
1584     return opOK;
1585 
1586   case PackCategoriesIntoKey(fcZero, fcInfinity):
1587   case PackCategoriesIntoKey(fcInfinity, fcZero):
1588     makeNaN();
1589     return opInvalidOp;
1590 
1591   case PackCategoriesIntoKey(fcNormal, fcNormal):
1592     return opOK;
1593   }
1594 }
1595 
1596 APFloat::opStatus
divideSpecials(const APFloat & rhs)1597 APFloat::divideSpecials(const APFloat &rhs)
1598 {
1599   switch (PackCategoriesIntoKey(category, rhs.category)) {
1600   default:
1601     llvm_unreachable(nullptr);
1602 
1603   case PackCategoriesIntoKey(fcZero, fcNaN):
1604   case PackCategoriesIntoKey(fcNormal, fcNaN):
1605   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1606     category = fcNaN;
1607     copySignificand(rhs);
1608   case PackCategoriesIntoKey(fcNaN, fcZero):
1609   case PackCategoriesIntoKey(fcNaN, fcNormal):
1610   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1611   case PackCategoriesIntoKey(fcNaN, fcNaN):
1612     sign = false;
1613   case PackCategoriesIntoKey(fcInfinity, fcZero):
1614   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1615   case PackCategoriesIntoKey(fcZero, fcInfinity):
1616   case PackCategoriesIntoKey(fcZero, fcNormal):
1617     return opOK;
1618 
1619   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1620     category = fcZero;
1621     return opOK;
1622 
1623   case PackCategoriesIntoKey(fcNormal, fcZero):
1624     category = fcInfinity;
1625     return opDivByZero;
1626 
1627   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1628   case PackCategoriesIntoKey(fcZero, fcZero):
1629     makeNaN();
1630     return opInvalidOp;
1631 
1632   case PackCategoriesIntoKey(fcNormal, fcNormal):
1633     return opOK;
1634   }
1635 }
1636 
1637 APFloat::opStatus
modSpecials(const APFloat & rhs)1638 APFloat::modSpecials(const APFloat &rhs)
1639 {
1640   switch (PackCategoriesIntoKey(category, rhs.category)) {
1641   default:
1642     llvm_unreachable(nullptr);
1643 
1644   case PackCategoriesIntoKey(fcNaN, fcZero):
1645   case PackCategoriesIntoKey(fcNaN, fcNormal):
1646   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1647   case PackCategoriesIntoKey(fcNaN, fcNaN):
1648   case PackCategoriesIntoKey(fcZero, fcInfinity):
1649   case PackCategoriesIntoKey(fcZero, fcNormal):
1650   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1651     return opOK;
1652 
1653   case PackCategoriesIntoKey(fcZero, fcNaN):
1654   case PackCategoriesIntoKey(fcNormal, fcNaN):
1655   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1656     sign = false;
1657     category = fcNaN;
1658     copySignificand(rhs);
1659     return opOK;
1660 
1661   case PackCategoriesIntoKey(fcNormal, fcZero):
1662   case PackCategoriesIntoKey(fcInfinity, fcZero):
1663   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1664   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1665   case PackCategoriesIntoKey(fcZero, fcZero):
1666     makeNaN();
1667     return opInvalidOp;
1668 
1669   case PackCategoriesIntoKey(fcNormal, fcNormal):
1670     return opOK;
1671   }
1672 }
1673 
1674 /* Change sign.  */
1675 void
changeSign()1676 APFloat::changeSign()
1677 {
1678   /* Look mummy, this one's easy.  */
1679   sign = !sign;
1680 }
1681 
1682 void
clearSign()1683 APFloat::clearSign()
1684 {
1685   /* So is this one. */
1686   sign = 0;
1687 }
1688 
1689 void
copySign(const APFloat & rhs)1690 APFloat::copySign(const APFloat &rhs)
1691 {
1692   /* And this one. */
1693   sign = rhs.sign;
1694 }
1695 
1696 /* Normalized addition or subtraction.  */
1697 APFloat::opStatus
addOrSubtract(const APFloat & rhs,roundingMode rounding_mode,bool subtract)1698 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1699                        bool subtract)
1700 {
1701   opStatus fs;
1702 
1703   fs = addOrSubtractSpecials(rhs, subtract);
1704 
1705   /* This return code means it was not a simple case.  */
1706   if (fs == opDivByZero) {
1707     lostFraction lost_fraction;
1708 
1709     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1710     fs = normalize(rounding_mode, lost_fraction);
1711 
1712     /* Can only be zero if we lost no fraction.  */
1713     assert(category != fcZero || lost_fraction == lfExactlyZero);
1714   }
1715 
1716   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1717      positive zero unless rounding to minus infinity, except that
1718      adding two like-signed zeroes gives that zero.  */
1719   if (category == fcZero) {
1720     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1721       sign = (rounding_mode == rmTowardNegative);
1722   }
1723 
1724   return fs;
1725 }
1726 
1727 /* Normalized addition.  */
1728 APFloat::opStatus
add(const APFloat & rhs,roundingMode rounding_mode)1729 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1730 {
1731   return addOrSubtract(rhs, rounding_mode, false);
1732 }
1733 
1734 /* Normalized subtraction.  */
1735 APFloat::opStatus
subtract(const APFloat & rhs,roundingMode rounding_mode)1736 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1737 {
1738   return addOrSubtract(rhs, rounding_mode, true);
1739 }
1740 
1741 /* Normalized multiply.  */
1742 APFloat::opStatus
multiply(const APFloat & rhs,roundingMode rounding_mode)1743 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1744 {
1745   opStatus fs;
1746 
1747   sign ^= rhs.sign;
1748   fs = multiplySpecials(rhs);
1749 
1750   if (isFiniteNonZero()) {
1751     lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1752     fs = normalize(rounding_mode, lost_fraction);
1753     if (lost_fraction != lfExactlyZero)
1754       fs = (opStatus) (fs | opInexact);
1755   }
1756 
1757   return fs;
1758 }
1759 
1760 /* Normalized divide.  */
1761 APFloat::opStatus
divide(const APFloat & rhs,roundingMode rounding_mode)1762 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1763 {
1764   opStatus fs;
1765 
1766   sign ^= rhs.sign;
1767   fs = divideSpecials(rhs);
1768 
1769   if (isFiniteNonZero()) {
1770     lostFraction lost_fraction = divideSignificand(rhs);
1771     fs = normalize(rounding_mode, lost_fraction);
1772     if (lost_fraction != lfExactlyZero)
1773       fs = (opStatus) (fs | opInexact);
1774   }
1775 
1776   return fs;
1777 }
1778 
1779 /* Normalized remainder.  This is not currently correct in all cases.  */
1780 APFloat::opStatus
remainder(const APFloat & rhs)1781 APFloat::remainder(const APFloat &rhs)
1782 {
1783   opStatus fs;
1784   APFloat V = *this;
1785   unsigned int origSign = sign;
1786 
1787   fs = V.divide(rhs, rmNearestTiesToEven);
1788   if (fs == opDivByZero)
1789     return fs;
1790 
1791   int parts = partCount();
1792   integerPart *x = new integerPart[parts];
1793   bool ignored;
1794   fs = V.convertToInteger(x, parts * integerPartWidth, true,
1795                           rmNearestTiesToEven, &ignored);
1796   if (fs==opInvalidOp)
1797     return fs;
1798 
1799   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1800                                         rmNearestTiesToEven);
1801   assert(fs==opOK);   // should always work
1802 
1803   fs = V.multiply(rhs, rmNearestTiesToEven);
1804   assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1805 
1806   fs = subtract(V, rmNearestTiesToEven);
1807   assert(fs==opOK || fs==opInexact);   // likewise
1808 
1809   if (isZero())
1810     sign = origSign;    // IEEE754 requires this
1811   delete[] x;
1812   return fs;
1813 }
1814 
1815 /* Normalized llvm frem (C fmod).
1816    This is not currently correct in all cases.  */
1817 APFloat::opStatus
mod(const APFloat & rhs)1818 APFloat::mod(const APFloat &rhs)
1819 {
1820   opStatus fs;
1821   fs = modSpecials(rhs);
1822 
1823   if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1824     APFloat V = *this;
1825     unsigned int origSign = sign;
1826 
1827     fs = V.divide(rhs, rmNearestTiesToEven);
1828     if (fs == opDivByZero)
1829       return fs;
1830 
1831     int parts = partCount();
1832     integerPart *x = new integerPart[parts];
1833     bool ignored;
1834     fs = V.convertToInteger(x, parts * integerPartWidth, true,
1835                             rmTowardZero, &ignored);
1836     if (fs==opInvalidOp)
1837       return fs;
1838 
1839     fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1840                                           rmNearestTiesToEven);
1841     assert(fs==opOK);   // should always work
1842 
1843     fs = V.multiply(rhs, rmNearestTiesToEven);
1844     assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1845 
1846     fs = subtract(V, rmNearestTiesToEven);
1847     assert(fs==opOK || fs==opInexact);   // likewise
1848 
1849     if (isZero())
1850       sign = origSign;    // IEEE754 requires this
1851     delete[] x;
1852   }
1853   return fs;
1854 }
1855 
1856 /* Normalized fused-multiply-add.  */
1857 APFloat::opStatus
fusedMultiplyAdd(const APFloat & multiplicand,const APFloat & addend,roundingMode rounding_mode)1858 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1859                           const APFloat &addend,
1860                           roundingMode rounding_mode)
1861 {
1862   opStatus fs;
1863 
1864   /* Post-multiplication sign, before addition.  */
1865   sign ^= multiplicand.sign;
1866 
1867   /* If and only if all arguments are normal do we need to do an
1868      extended-precision calculation.  */
1869   if (isFiniteNonZero() &&
1870       multiplicand.isFiniteNonZero() &&
1871       addend.isFinite()) {
1872     lostFraction lost_fraction;
1873 
1874     lost_fraction = multiplySignificand(multiplicand, &addend);
1875     fs = normalize(rounding_mode, lost_fraction);
1876     if (lost_fraction != lfExactlyZero)
1877       fs = (opStatus) (fs | opInexact);
1878 
1879     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1880        positive zero unless rounding to minus infinity, except that
1881        adding two like-signed zeroes gives that zero.  */
1882     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1883       sign = (rounding_mode == rmTowardNegative);
1884   } else {
1885     fs = multiplySpecials(multiplicand);
1886 
1887     /* FS can only be opOK or opInvalidOp.  There is no more work
1888        to do in the latter case.  The IEEE-754R standard says it is
1889        implementation-defined in this case whether, if ADDEND is a
1890        quiet NaN, we raise invalid op; this implementation does so.
1891 
1892        If we need to do the addition we can do so with normal
1893        precision.  */
1894     if (fs == opOK)
1895       fs = addOrSubtract(addend, rounding_mode, false);
1896   }
1897 
1898   return fs;
1899 }
1900 
1901 /* Rounding-mode corrrect round to integral value.  */
roundToIntegral(roundingMode rounding_mode)1902 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1903   opStatus fs;
1904 
1905   // If the exponent is large enough, we know that this value is already
1906   // integral, and the arithmetic below would potentially cause it to saturate
1907   // to +/-Inf.  Bail out early instead.
1908   if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1909     return opOK;
1910 
1911   // The algorithm here is quite simple: we add 2^(p-1), where p is the
1912   // precision of our format, and then subtract it back off again.  The choice
1913   // of rounding modes for the addition/subtraction determines the rounding mode
1914   // for our integral rounding as well.
1915   // NOTE: When the input value is negative, we do subtraction followed by
1916   // addition instead.
1917   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1918   IntegerConstant <<= semanticsPrecision(*semantics)-1;
1919   APFloat MagicConstant(*semantics);
1920   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1921                                       rmNearestTiesToEven);
1922   MagicConstant.copySign(*this);
1923 
1924   if (fs != opOK)
1925     return fs;
1926 
1927   // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1928   bool inputSign = isNegative();
1929 
1930   fs = add(MagicConstant, rounding_mode);
1931   if (fs != opOK && fs != opInexact)
1932     return fs;
1933 
1934   fs = subtract(MagicConstant, rounding_mode);
1935 
1936   // Restore the input sign.
1937   if (inputSign != isNegative())
1938     changeSign();
1939 
1940   return fs;
1941 }
1942 
1943 
1944 /* Comparison requires normalized numbers.  */
1945 APFloat::cmpResult
compare(const APFloat & rhs) const1946 APFloat::compare(const APFloat &rhs) const
1947 {
1948   cmpResult result;
1949 
1950   assert(semantics == rhs.semantics);
1951 
1952   switch (PackCategoriesIntoKey(category, rhs.category)) {
1953   default:
1954     llvm_unreachable(nullptr);
1955 
1956   case PackCategoriesIntoKey(fcNaN, fcZero):
1957   case PackCategoriesIntoKey(fcNaN, fcNormal):
1958   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1959   case PackCategoriesIntoKey(fcNaN, fcNaN):
1960   case PackCategoriesIntoKey(fcZero, fcNaN):
1961   case PackCategoriesIntoKey(fcNormal, fcNaN):
1962   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1963     return cmpUnordered;
1964 
1965   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1966   case PackCategoriesIntoKey(fcInfinity, fcZero):
1967   case PackCategoriesIntoKey(fcNormal, fcZero):
1968     if (sign)
1969       return cmpLessThan;
1970     else
1971       return cmpGreaterThan;
1972 
1973   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1974   case PackCategoriesIntoKey(fcZero, fcInfinity):
1975   case PackCategoriesIntoKey(fcZero, fcNormal):
1976     if (rhs.sign)
1977       return cmpGreaterThan;
1978     else
1979       return cmpLessThan;
1980 
1981   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1982     if (sign == rhs.sign)
1983       return cmpEqual;
1984     else if (sign)
1985       return cmpLessThan;
1986     else
1987       return cmpGreaterThan;
1988 
1989   case PackCategoriesIntoKey(fcZero, fcZero):
1990     return cmpEqual;
1991 
1992   case PackCategoriesIntoKey(fcNormal, fcNormal):
1993     break;
1994   }
1995 
1996   /* Two normal numbers.  Do they have the same sign?  */
1997   if (sign != rhs.sign) {
1998     if (sign)
1999       result = cmpLessThan;
2000     else
2001       result = cmpGreaterThan;
2002   } else {
2003     /* Compare absolute values; invert result if negative.  */
2004     result = compareAbsoluteValue(rhs);
2005 
2006     if (sign) {
2007       if (result == cmpLessThan)
2008         result = cmpGreaterThan;
2009       else if (result == cmpGreaterThan)
2010         result = cmpLessThan;
2011     }
2012   }
2013 
2014   return result;
2015 }
2016 
2017 /// APFloat::convert - convert a value of one floating point type to another.
2018 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2019 /// records whether the transformation lost information, i.e. whether
2020 /// converting the result back to the original type will produce the
2021 /// original value (this is almost the same as return value==fsOK, but there
2022 /// are edge cases where this is not so).
2023 
2024 APFloat::opStatus
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2025 APFloat::convert(const fltSemantics &toSemantics,
2026                  roundingMode rounding_mode, bool *losesInfo)
2027 {
2028   lostFraction lostFraction;
2029   unsigned int newPartCount, oldPartCount;
2030   opStatus fs;
2031   int shift;
2032   const fltSemantics &fromSemantics = *semantics;
2033 
2034   lostFraction = lfExactlyZero;
2035   newPartCount = partCountForBits(toSemantics.precision + 1);
2036   oldPartCount = partCount();
2037   shift = toSemantics.precision - fromSemantics.precision;
2038 
2039   bool X86SpecialNan = false;
2040   if (&fromSemantics == &APFloat::x87DoubleExtended &&
2041       &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
2042       (!(*significandParts() & 0x8000000000000000ULL) ||
2043        !(*significandParts() & 0x4000000000000000ULL))) {
2044     // x86 has some unusual NaNs which cannot be represented in any other
2045     // format; note them here.
2046     X86SpecialNan = true;
2047   }
2048 
2049   // If this is a truncation of a denormal number, and the target semantics
2050   // has larger exponent range than the source semantics (this can happen
2051   // when truncating from PowerPC double-double to double format), the
2052   // right shift could lose result mantissa bits.  Adjust exponent instead
2053   // of performing excessive shift.
2054   if (shift < 0 && isFiniteNonZero()) {
2055     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2056     if (exponent + exponentChange < toSemantics.minExponent)
2057       exponentChange = toSemantics.minExponent - exponent;
2058     if (exponentChange < shift)
2059       exponentChange = shift;
2060     if (exponentChange < 0) {
2061       shift -= exponentChange;
2062       exponent += exponentChange;
2063     }
2064   }
2065 
2066   // If this is a truncation, perform the shift before we narrow the storage.
2067   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2068     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2069 
2070   // Fix the storage so it can hold to new value.
2071   if (newPartCount > oldPartCount) {
2072     // The new type requires more storage; make it available.
2073     integerPart *newParts;
2074     newParts = new integerPart[newPartCount];
2075     APInt::tcSet(newParts, 0, newPartCount);
2076     if (isFiniteNonZero() || category==fcNaN)
2077       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2078     freeSignificand();
2079     significand.parts = newParts;
2080   } else if (newPartCount == 1 && oldPartCount != 1) {
2081     // Switch to built-in storage for a single part.
2082     integerPart newPart = 0;
2083     if (isFiniteNonZero() || category==fcNaN)
2084       newPart = significandParts()[0];
2085     freeSignificand();
2086     significand.part = newPart;
2087   }
2088 
2089   // Now that we have the right storage, switch the semantics.
2090   semantics = &toSemantics;
2091 
2092   // If this is an extension, perform the shift now that the storage is
2093   // available.
2094   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2095     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2096 
2097   if (isFiniteNonZero()) {
2098     fs = normalize(rounding_mode, lostFraction);
2099     *losesInfo = (fs != opOK);
2100   } else if (category == fcNaN) {
2101     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2102 
2103     // For x87 extended precision, we want to make a NaN, not a special NaN if
2104     // the input wasn't special either.
2105     if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2106       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2107 
2108     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2109     // does not give you back the same bits.  This is dubious, and we
2110     // don't currently do it.  You're really supposed to get
2111     // an invalid operation signal at runtime, but nobody does that.
2112     fs = opOK;
2113   } else {
2114     *losesInfo = false;
2115     fs = opOK;
2116   }
2117 
2118   return fs;
2119 }
2120 
2121 /* Convert a floating point number to an integer according to the
2122    rounding mode.  If the rounded integer value is out of range this
2123    returns an invalid operation exception and the contents of the
2124    destination parts are unspecified.  If the rounded value is in
2125    range but the floating point number is not the exact integer, the C
2126    standard doesn't require an inexact exception to be raised.  IEEE
2127    854 does require it so we do that.
2128 
2129    Note that for conversions to integer type the C standard requires
2130    round-to-zero to always be used.  */
2131 APFloat::opStatus
convertToSignExtendedInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2132 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2133                                       bool isSigned,
2134                                       roundingMode rounding_mode,
2135                                       bool *isExact) const
2136 {
2137   lostFraction lost_fraction;
2138   const integerPart *src;
2139   unsigned int dstPartsCount, truncatedBits;
2140 
2141   *isExact = false;
2142 
2143   /* Handle the three special cases first.  */
2144   if (category == fcInfinity || category == fcNaN)
2145     return opInvalidOp;
2146 
2147   dstPartsCount = partCountForBits(width);
2148 
2149   if (category == fcZero) {
2150     APInt::tcSet(parts, 0, dstPartsCount);
2151     // Negative zero can't be represented as an int.
2152     *isExact = !sign;
2153     return opOK;
2154   }
2155 
2156   src = significandParts();
2157 
2158   /* Step 1: place our absolute value, with any fraction truncated, in
2159      the destination.  */
2160   if (exponent < 0) {
2161     /* Our absolute value is less than one; truncate everything.  */
2162     APInt::tcSet(parts, 0, dstPartsCount);
2163     /* For exponent -1 the integer bit represents .5, look at that.
2164        For smaller exponents leftmost truncated bit is 0. */
2165     truncatedBits = semantics->precision -1U - exponent;
2166   } else {
2167     /* We want the most significant (exponent + 1) bits; the rest are
2168        truncated.  */
2169     unsigned int bits = exponent + 1U;
2170 
2171     /* Hopelessly large in magnitude?  */
2172     if (bits > width)
2173       return opInvalidOp;
2174 
2175     if (bits < semantics->precision) {
2176       /* We truncate (semantics->precision - bits) bits.  */
2177       truncatedBits = semantics->precision - bits;
2178       APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2179     } else {
2180       /* We want at least as many bits as are available.  */
2181       APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2182       APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2183       truncatedBits = 0;
2184     }
2185   }
2186 
2187   /* Step 2: work out any lost fraction, and increment the absolute
2188      value if we would round away from zero.  */
2189   if (truncatedBits) {
2190     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2191                                                   truncatedBits);
2192     if (lost_fraction != lfExactlyZero &&
2193         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2194       if (APInt::tcIncrement(parts, dstPartsCount))
2195         return opInvalidOp;     /* Overflow.  */
2196     }
2197   } else {
2198     lost_fraction = lfExactlyZero;
2199   }
2200 
2201   /* Step 3: check if we fit in the destination.  */
2202   unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2203 
2204   if (sign) {
2205     if (!isSigned) {
2206       /* Negative numbers cannot be represented as unsigned.  */
2207       if (omsb != 0)
2208         return opInvalidOp;
2209     } else {
2210       /* It takes omsb bits to represent the unsigned integer value.
2211          We lose a bit for the sign, but care is needed as the
2212          maximally negative integer is a special case.  */
2213       if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2214         return opInvalidOp;
2215 
2216       /* This case can happen because of rounding.  */
2217       if (omsb > width)
2218         return opInvalidOp;
2219     }
2220 
2221     APInt::tcNegate (parts, dstPartsCount);
2222   } else {
2223     if (omsb >= width + !isSigned)
2224       return opInvalidOp;
2225   }
2226 
2227   if (lost_fraction == lfExactlyZero) {
2228     *isExact = true;
2229     return opOK;
2230   } else
2231     return opInexact;
2232 }
2233 
2234 /* Same as convertToSignExtendedInteger, except we provide
2235    deterministic values in case of an invalid operation exception,
2236    namely zero for NaNs and the minimal or maximal value respectively
2237    for underflow or overflow.
2238    The *isExact output tells whether the result is exact, in the sense
2239    that converting it back to the original floating point type produces
2240    the original value.  This is almost equivalent to result==opOK,
2241    except for negative zeroes.
2242 */
2243 APFloat::opStatus
convertToInteger(integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2244 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2245                           bool isSigned,
2246                           roundingMode rounding_mode, bool *isExact) const
2247 {
2248   opStatus fs;
2249 
2250   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2251                                     isExact);
2252 
2253   if (fs == opInvalidOp) {
2254     unsigned int bits, dstPartsCount;
2255 
2256     dstPartsCount = partCountForBits(width);
2257 
2258     if (category == fcNaN)
2259       bits = 0;
2260     else if (sign)
2261       bits = isSigned;
2262     else
2263       bits = width - isSigned;
2264 
2265     APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2266     if (sign && isSigned)
2267       APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2268   }
2269 
2270   return fs;
2271 }
2272 
2273 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2274    an APSInt, whose initial bit-width and signed-ness are used to determine the
2275    precision of the conversion.
2276  */
2277 APFloat::opStatus
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const2278 APFloat::convertToInteger(APSInt &result,
2279                           roundingMode rounding_mode, bool *isExact) const
2280 {
2281   unsigned bitWidth = result.getBitWidth();
2282   SmallVector<uint64_t, 4> parts(result.getNumWords());
2283   opStatus status = convertToInteger(
2284     parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2285   // Keeps the original signed-ness.
2286   result = APInt(bitWidth, parts);
2287   return status;
2288 }
2289 
2290 /* Convert an unsigned integer SRC to a floating point number,
2291    rounding according to ROUNDING_MODE.  The sign of the floating
2292    point number is not modified.  */
2293 APFloat::opStatus
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2294 APFloat::convertFromUnsignedParts(const integerPart *src,
2295                                   unsigned int srcCount,
2296                                   roundingMode rounding_mode)
2297 {
2298   unsigned int omsb, precision, dstCount;
2299   integerPart *dst;
2300   lostFraction lost_fraction;
2301 
2302   category = fcNormal;
2303   omsb = APInt::tcMSB(src, srcCount) + 1;
2304   dst = significandParts();
2305   dstCount = partCount();
2306   precision = semantics->precision;
2307 
2308   /* We want the most significant PRECISION bits of SRC.  There may not
2309      be that many; extract what we can.  */
2310   if (precision <= omsb) {
2311     exponent = omsb - 1;
2312     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2313                                                   omsb - precision);
2314     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2315   } else {
2316     exponent = precision - 1;
2317     lost_fraction = lfExactlyZero;
2318     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2319   }
2320 
2321   return normalize(rounding_mode, lost_fraction);
2322 }
2323 
2324 APFloat::opStatus
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2325 APFloat::convertFromAPInt(const APInt &Val,
2326                           bool isSigned,
2327                           roundingMode rounding_mode)
2328 {
2329   unsigned int partCount = Val.getNumWords();
2330   APInt api = Val;
2331 
2332   sign = false;
2333   if (isSigned && api.isNegative()) {
2334     sign = true;
2335     api = -api;
2336   }
2337 
2338   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2339 }
2340 
2341 /* Convert a two's complement integer SRC to a floating point number,
2342    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2343    integer is signed, in which case it must be sign-extended.  */
2344 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2345 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2346                                         unsigned int srcCount,
2347                                         bool isSigned,
2348                                         roundingMode rounding_mode)
2349 {
2350   opStatus status;
2351 
2352   if (isSigned &&
2353       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2354     integerPart *copy;
2355 
2356     /* If we're signed and negative negate a copy.  */
2357     sign = true;
2358     copy = new integerPart[srcCount];
2359     APInt::tcAssign(copy, src, srcCount);
2360     APInt::tcNegate(copy, srcCount);
2361     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2362     delete [] copy;
2363   } else {
2364     sign = false;
2365     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2366   }
2367 
2368   return status;
2369 }
2370 
2371 /* FIXME: should this just take a const APInt reference?  */
2372 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2373 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2374                                         unsigned int width, bool isSigned,
2375                                         roundingMode rounding_mode)
2376 {
2377   unsigned int partCount = partCountForBits(width);
2378   APInt api = APInt(width, makeArrayRef(parts, partCount));
2379 
2380   sign = false;
2381   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2382     sign = true;
2383     api = -api;
2384   }
2385 
2386   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2387 }
2388 
2389 APFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2390 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2391 {
2392   lostFraction lost_fraction = lfExactlyZero;
2393 
2394   category = fcNormal;
2395   zeroSignificand();
2396   exponent = 0;
2397 
2398   integerPart *significand = significandParts();
2399   unsigned partsCount = partCount();
2400   unsigned bitPos = partsCount * integerPartWidth;
2401   bool computedTrailingFraction = false;
2402 
2403   // Skip leading zeroes and any (hexa)decimal point.
2404   StringRef::iterator begin = s.begin();
2405   StringRef::iterator end = s.end();
2406   StringRef::iterator dot;
2407   StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2408   StringRef::iterator firstSignificantDigit = p;
2409 
2410   while (p != end) {
2411     integerPart hex_value;
2412 
2413     if (*p == '.') {
2414       assert(dot == end && "String contains multiple dots");
2415       dot = p++;
2416       continue;
2417     }
2418 
2419     hex_value = hexDigitValue(*p);
2420     if (hex_value == -1U)
2421       break;
2422 
2423     p++;
2424 
2425     // Store the number while we have space.
2426     if (bitPos) {
2427       bitPos -= 4;
2428       hex_value <<= bitPos % integerPartWidth;
2429       significand[bitPos / integerPartWidth] |= hex_value;
2430     } else if (!computedTrailingFraction) {
2431       lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2432       computedTrailingFraction = true;
2433     }
2434   }
2435 
2436   /* Hex floats require an exponent but not a hexadecimal point.  */
2437   assert(p != end && "Hex strings require an exponent");
2438   assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2439   assert(p != begin && "Significand has no digits");
2440   assert((dot == end || p - begin != 1) && "Significand has no digits");
2441 
2442   /* Ignore the exponent if we are zero.  */
2443   if (p != firstSignificantDigit) {
2444     int expAdjustment;
2445 
2446     /* Implicit hexadecimal point?  */
2447     if (dot == end)
2448       dot = p;
2449 
2450     /* Calculate the exponent adjustment implicit in the number of
2451        significant digits.  */
2452     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2453     if (expAdjustment < 0)
2454       expAdjustment++;
2455     expAdjustment = expAdjustment * 4 - 1;
2456 
2457     /* Adjust for writing the significand starting at the most
2458        significant nibble.  */
2459     expAdjustment += semantics->precision;
2460     expAdjustment -= partsCount * integerPartWidth;
2461 
2462     /* Adjust for the given exponent.  */
2463     exponent = totalExponent(p + 1, end, expAdjustment);
2464   }
2465 
2466   return normalize(rounding_mode, lost_fraction);
2467 }
2468 
2469 APFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2470 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2471                                       unsigned sigPartCount, int exp,
2472                                       roundingMode rounding_mode)
2473 {
2474   unsigned int parts, pow5PartCount;
2475   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2476   integerPart pow5Parts[maxPowerOfFiveParts];
2477   bool isNearest;
2478 
2479   isNearest = (rounding_mode == rmNearestTiesToEven ||
2480                rounding_mode == rmNearestTiesToAway);
2481 
2482   parts = partCountForBits(semantics->precision + 11);
2483 
2484   /* Calculate pow(5, abs(exp)).  */
2485   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2486 
2487   for (;; parts *= 2) {
2488     opStatus sigStatus, powStatus;
2489     unsigned int excessPrecision, truncatedBits;
2490 
2491     calcSemantics.precision = parts * integerPartWidth - 1;
2492     excessPrecision = calcSemantics.precision - semantics->precision;
2493     truncatedBits = excessPrecision;
2494 
2495     APFloat decSig = APFloat::getZero(calcSemantics, sign);
2496     APFloat pow5(calcSemantics);
2497 
2498     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2499                                                 rmNearestTiesToEven);
2500     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2501                                               rmNearestTiesToEven);
2502     /* Add exp, as 10^n = 5^n * 2^n.  */
2503     decSig.exponent += exp;
2504 
2505     lostFraction calcLostFraction;
2506     integerPart HUerr, HUdistance;
2507     unsigned int powHUerr;
2508 
2509     if (exp >= 0) {
2510       /* multiplySignificand leaves the precision-th bit set to 1.  */
2511       calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2512       powHUerr = powStatus != opOK;
2513     } else {
2514       calcLostFraction = decSig.divideSignificand(pow5);
2515       /* Denormal numbers have less precision.  */
2516       if (decSig.exponent < semantics->minExponent) {
2517         excessPrecision += (semantics->minExponent - decSig.exponent);
2518         truncatedBits = excessPrecision;
2519         if (excessPrecision > calcSemantics.precision)
2520           excessPrecision = calcSemantics.precision;
2521       }
2522       /* Extra half-ulp lost in reciprocal of exponent.  */
2523       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2524     }
2525 
2526     /* Both multiplySignificand and divideSignificand return the
2527        result with the integer bit set.  */
2528     assert(APInt::tcExtractBit
2529            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2530 
2531     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2532                        powHUerr);
2533     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2534                                       excessPrecision, isNearest);
2535 
2536     /* Are we guaranteed to round correctly if we truncate?  */
2537     if (HUdistance >= HUerr) {
2538       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2539                        calcSemantics.precision - excessPrecision,
2540                        excessPrecision);
2541       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2542          above we must adjust our exponent to compensate for the
2543          implicit right shift.  */
2544       exponent = (decSig.exponent + semantics->precision
2545                   - (calcSemantics.precision - excessPrecision));
2546       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2547                                                        decSig.partCount(),
2548                                                        truncatedBits);
2549       return normalize(rounding_mode, calcLostFraction);
2550     }
2551   }
2552 }
2553 
2554 APFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2555 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)    // qq
2556 {
2557   decimalInfo D;
2558   opStatus fs;
2559 
2560   /* Scan the text.  */
2561   StringRef::iterator p = str.begin();
2562   fs = interpretDecimal(p, str.end(), &D);
2563   if (fs != opOK)
2564       return fs;
2565 
2566   /* Handle the quick cases.  First the case of no significant digits,
2567      i.e. zero, and then exponents that are obviously too large or too
2568      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2569      definitely overflows if
2570 
2571            (exp - 1) * L >= maxExponent
2572 
2573      and definitely underflows to zero where
2574 
2575            (exp + 1) * L <= minExponent - precision
2576 
2577      With integer arithmetic the tightest bounds for L are
2578 
2579            93/28 < L < 196/59            [ numerator <= 256 ]
2580            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2581   */
2582 
2583   // Test if we have a zero number allowing for strings with no null terminators
2584   // and zero decimals with non-zero exponents.
2585   //
2586   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2587   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2588   // be at most one dot. On the other hand, if we have a zero with a non-zero
2589   // exponent, then we know that D.firstSigDigit will be non-numeric.
2590   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2591     category = fcZero;
2592     fs = opOK;
2593 
2594   /* Check whether the normalized exponent is high enough to overflow
2595      max during the log-rebasing in the max-exponent check below. */
2596   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2597     fs = handleOverflow(rounding_mode);
2598 
2599   /* If it wasn't, then it also wasn't high enough to overflow max
2600      during the log-rebasing in the min-exponent check.  Check that it
2601      won't overflow min in either check, then perform the min-exponent
2602      check. */
2603   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2604              (D.normalizedExponent + 1) * 28738 <=
2605                8651 * (semantics->minExponent - (int) semantics->precision)) {
2606     /* Underflow to zero and round.  */
2607     category = fcNormal;
2608     zeroSignificand();
2609     fs = normalize(rounding_mode, lfLessThanHalf);
2610 
2611   /* We can finally safely perform the max-exponent check. */
2612   } else if ((D.normalizedExponent - 1) * 42039
2613              >= 12655 * semantics->maxExponent) {
2614     /* Overflow and round.  */
2615     fs = handleOverflow(rounding_mode);
2616   } else {
2617     integerPart *decSignificand;
2618     unsigned int partCount;
2619 
2620     /* A tight upper bound on number of bits required to hold an
2621        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2622        to hold the full significand, and an extra part required by
2623        tcMultiplyPart.  */
2624     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2625     partCount = partCountForBits(1 + 196 * partCount / 59);
2626     decSignificand = new integerPart[partCount + 1];
2627     partCount = 0;
2628 
2629     /* Convert to binary efficiently - we do almost all multiplication
2630        in an integerPart.  When this would overflow do we do a single
2631        bignum multiplication, and then revert again to multiplication
2632        in an integerPart.  */
2633     do {
2634       integerPart decValue, val, multiplier;
2635 
2636       val = 0;
2637       multiplier = 1;
2638 
2639       do {
2640         if (*p == '.') {
2641           p++;
2642           if (p == str.end()) {
2643             break;
2644           }
2645         }
2646         decValue = decDigitValue(*p++);
2647         assert(decValue < 10U && "Invalid character in significand");
2648         multiplier *= 10;
2649         val = val * 10 + decValue;
2650         /* The maximum number that can be multiplied by ten with any
2651            digit added without overflowing an integerPart.  */
2652       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2653 
2654       /* Multiply out the current part.  */
2655       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2656                             partCount, partCount + 1, false);
2657 
2658       /* If we used another part (likely but not guaranteed), increase
2659          the count.  */
2660       if (decSignificand[partCount])
2661         partCount++;
2662     } while (p <= D.lastSigDigit);
2663 
2664     category = fcNormal;
2665     fs = roundSignificandWithExponent(decSignificand, partCount,
2666                                       D.exponent, rounding_mode);
2667 
2668     delete [] decSignificand;
2669   }
2670 
2671   return fs;
2672 }
2673 
2674 bool
convertFromStringSpecials(StringRef str)2675 APFloat::convertFromStringSpecials(StringRef str) {
2676   if (str.equals("inf") || str.equals("INFINITY")) {
2677     makeInf(false);
2678     return true;
2679   }
2680 
2681   if (str.equals("-inf") || str.equals("-INFINITY")) {
2682     makeInf(true);
2683     return true;
2684   }
2685 
2686   if (str.equals("nan") || str.equals("NaN")) {
2687     makeNaN(false, false);
2688     return true;
2689   }
2690 
2691   if (str.equals("-nan") || str.equals("-NaN")) {
2692     makeNaN(false, true);
2693     return true;
2694   }
2695 
2696   return false;
2697 }
2698 
2699 APFloat::opStatus
convertFromString(StringRef str,roundingMode rounding_mode)2700 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2701 {
2702   assert(!str.empty() && "Invalid string length");
2703 
2704   // Handle special cases.
2705   if (convertFromStringSpecials(str))
2706     return opOK;
2707 
2708   /* Handle a leading minus sign.  */
2709   StringRef::iterator p = str.begin();
2710   size_t slen = str.size();
2711   sign = *p == '-' ? 1 : 0;
2712   if (*p == '-' || *p == '+') {
2713     p++;
2714     slen--;
2715     assert(slen && "String has no digits");
2716   }
2717 
2718   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2719     assert(slen - 2 && "Invalid string");
2720     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2721                                         rounding_mode);
2722   }
2723 
2724   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2725 }
2726 
2727 /* Write out a hexadecimal representation of the floating point value
2728    to DST, which must be of sufficient size, in the C99 form
2729    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2730    excluding the terminating NUL.
2731 
2732    If UPPERCASE, the output is in upper case, otherwise in lower case.
2733 
2734    HEXDIGITS digits appear altogether, rounding the value if
2735    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2736    number precisely is used instead.  If nothing would appear after
2737    the decimal point it is suppressed.
2738 
2739    The decimal exponent is always printed and has at least one digit.
2740    Zero values display an exponent of zero.  Infinities and NaNs
2741    appear as "infinity" or "nan" respectively.
2742 
2743    The above rules are as specified by C99.  There is ambiguity about
2744    what the leading hexadecimal digit should be.  This implementation
2745    uses whatever is necessary so that the exponent is displayed as
2746    stored.  This implies the exponent will fall within the IEEE format
2747    range, and the leading hexadecimal digit will be 0 (for denormals),
2748    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2749    any other digits zero).
2750 */
2751 unsigned int
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2752 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2753                             bool upperCase, roundingMode rounding_mode) const
2754 {
2755   char *p;
2756 
2757   p = dst;
2758   if (sign)
2759     *dst++ = '-';
2760 
2761   switch (category) {
2762   case fcInfinity:
2763     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2764     dst += sizeof infinityL - 1;
2765     break;
2766 
2767   case fcNaN:
2768     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2769     dst += sizeof NaNU - 1;
2770     break;
2771 
2772   case fcZero:
2773     *dst++ = '0';
2774     *dst++ = upperCase ? 'X': 'x';
2775     *dst++ = '0';
2776     if (hexDigits > 1) {
2777       *dst++ = '.';
2778       memset (dst, '0', hexDigits - 1);
2779       dst += hexDigits - 1;
2780     }
2781     *dst++ = upperCase ? 'P': 'p';
2782     *dst++ = '0';
2783     break;
2784 
2785   case fcNormal:
2786     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2787     break;
2788   }
2789 
2790   *dst = 0;
2791 
2792   return static_cast<unsigned int>(dst - p);
2793 }
2794 
2795 /* Does the hard work of outputting the correctly rounded hexadecimal
2796    form of a normal floating point number with the specified number of
2797    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2798    digits necessary to print the value precisely is output.  */
2799 char *
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2800 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2801                                   bool upperCase,
2802                                   roundingMode rounding_mode) const
2803 {
2804   unsigned int count, valueBits, shift, partsCount, outputDigits;
2805   const char *hexDigitChars;
2806   const integerPart *significand;
2807   char *p;
2808   bool roundUp;
2809 
2810   *dst++ = '0';
2811   *dst++ = upperCase ? 'X': 'x';
2812 
2813   roundUp = false;
2814   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2815 
2816   significand = significandParts();
2817   partsCount = partCount();
2818 
2819   /* +3 because the first digit only uses the single integer bit, so
2820      we have 3 virtual zero most-significant-bits.  */
2821   valueBits = semantics->precision + 3;
2822   shift = integerPartWidth - valueBits % integerPartWidth;
2823 
2824   /* The natural number of digits required ignoring trailing
2825      insignificant zeroes.  */
2826   outputDigits = (valueBits - significandLSB () + 3) / 4;
2827 
2828   /* hexDigits of zero means use the required number for the
2829      precision.  Otherwise, see if we are truncating.  If we are,
2830      find out if we need to round away from zero.  */
2831   if (hexDigits) {
2832     if (hexDigits < outputDigits) {
2833       /* We are dropping non-zero bits, so need to check how to round.
2834          "bits" is the number of dropped bits.  */
2835       unsigned int bits;
2836       lostFraction fraction;
2837 
2838       bits = valueBits - hexDigits * 4;
2839       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2840       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2841     }
2842     outputDigits = hexDigits;
2843   }
2844 
2845   /* Write the digits consecutively, and start writing in the location
2846      of the hexadecimal point.  We move the most significant digit
2847      left and add the hexadecimal point later.  */
2848   p = ++dst;
2849 
2850   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2851 
2852   while (outputDigits && count) {
2853     integerPart part;
2854 
2855     /* Put the most significant integerPartWidth bits in "part".  */
2856     if (--count == partsCount)
2857       part = 0;  /* An imaginary higher zero part.  */
2858     else
2859       part = significand[count] << shift;
2860 
2861     if (count && shift)
2862       part |= significand[count - 1] >> (integerPartWidth - shift);
2863 
2864     /* Convert as much of "part" to hexdigits as we can.  */
2865     unsigned int curDigits = integerPartWidth / 4;
2866 
2867     if (curDigits > outputDigits)
2868       curDigits = outputDigits;
2869     dst += partAsHex (dst, part, curDigits, hexDigitChars);
2870     outputDigits -= curDigits;
2871   }
2872 
2873   if (roundUp) {
2874     char *q = dst;
2875 
2876     /* Note that hexDigitChars has a trailing '0'.  */
2877     do {
2878       q--;
2879       *q = hexDigitChars[hexDigitValue (*q) + 1];
2880     } while (*q == '0');
2881     assert(q >= p);
2882   } else {
2883     /* Add trailing zeroes.  */
2884     memset (dst, '0', outputDigits);
2885     dst += outputDigits;
2886   }
2887 
2888   /* Move the most significant digit to before the point, and if there
2889      is something after the decimal point add it.  This must come
2890      after rounding above.  */
2891   p[-1] = p[0];
2892   if (dst -1 == p)
2893     dst--;
2894   else
2895     p[0] = '.';
2896 
2897   /* Finally output the exponent.  */
2898   *dst++ = upperCase ? 'P': 'p';
2899 
2900   return writeSignedDecimal (dst, exponent);
2901 }
2902 
hash_value(const APFloat & Arg)2903 hash_code llvm::hash_value(const APFloat &Arg) {
2904   if (!Arg.isFiniteNonZero())
2905     return hash_combine((uint8_t)Arg.category,
2906                         // NaN has no sign, fix it at zero.
2907                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2908                         Arg.semantics->precision);
2909 
2910   // Normal floats need their exponent and significand hashed.
2911   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2912                       Arg.semantics->precision, Arg.exponent,
2913                       hash_combine_range(
2914                         Arg.significandParts(),
2915                         Arg.significandParts() + Arg.partCount()));
2916 }
2917 
2918 // Conversion from APFloat to/from host float/double.  It may eventually be
2919 // possible to eliminate these and have everybody deal with APFloats, but that
2920 // will take a while.  This approach will not easily extend to long double.
2921 // Current implementation requires integerPartWidth==64, which is correct at
2922 // the moment but could be made more general.
2923 
2924 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2925 // the actual IEEE respresentations.  We compensate for that here.
2926 
2927 APInt
convertF80LongDoubleAPFloatToAPInt() const2928 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2929 {
2930   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2931   assert(partCount()==2);
2932 
2933   uint64_t myexponent, mysignificand;
2934 
2935   if (isFiniteNonZero()) {
2936     myexponent = exponent+16383; //bias
2937     mysignificand = significandParts()[0];
2938     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2939       myexponent = 0;   // denormal
2940   } else if (category==fcZero) {
2941     myexponent = 0;
2942     mysignificand = 0;
2943   } else if (category==fcInfinity) {
2944     myexponent = 0x7fff;
2945     mysignificand = 0x8000000000000000ULL;
2946   } else {
2947     assert(category == fcNaN && "Unknown category");
2948     myexponent = 0x7fff;
2949     mysignificand = significandParts()[0];
2950   }
2951 
2952   uint64_t words[2];
2953   words[0] = mysignificand;
2954   words[1] =  ((uint64_t)(sign & 1) << 15) |
2955               (myexponent & 0x7fffLL);
2956   return APInt(80, words);
2957 }
2958 
2959 APInt
convertPPCDoubleDoubleAPFloatToAPInt() const2960 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2961 {
2962   assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2963   assert(partCount()==2);
2964 
2965   uint64_t words[2];
2966   opStatus fs;
2967   bool losesInfo;
2968 
2969   // Convert number to double.  To avoid spurious underflows, we re-
2970   // normalize against the "double" minExponent first, and only *then*
2971   // truncate the mantissa.  The result of that second conversion
2972   // may be inexact, but should never underflow.
2973   // Declare fltSemantics before APFloat that uses it (and
2974   // saves pointer to it) to ensure correct destruction order.
2975   fltSemantics extendedSemantics = *semantics;
2976   extendedSemantics.minExponent = IEEEdouble.minExponent;
2977   APFloat extended(*this);
2978   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2979   assert(fs == opOK && !losesInfo);
2980   (void)fs;
2981 
2982   APFloat u(extended);
2983   fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2984   assert(fs == opOK || fs == opInexact);
2985   (void)fs;
2986   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2987 
2988   // If conversion was exact or resulted in a special case, we're done;
2989   // just set the second double to zero.  Otherwise, re-convert back to
2990   // the extended format and compute the difference.  This now should
2991   // convert exactly to double.
2992   if (u.isFiniteNonZero() && losesInfo) {
2993     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2994     assert(fs == opOK && !losesInfo);
2995     (void)fs;
2996 
2997     APFloat v(extended);
2998     v.subtract(u, rmNearestTiesToEven);
2999     fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
3000     assert(fs == opOK && !losesInfo);
3001     (void)fs;
3002     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3003   } else {
3004     words[1] = 0;
3005   }
3006 
3007   return APInt(128, words);
3008 }
3009 
3010 APInt
convertQuadrupleAPFloatToAPInt() const3011 APFloat::convertQuadrupleAPFloatToAPInt() const
3012 {
3013   assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
3014   assert(partCount()==2);
3015 
3016   uint64_t myexponent, mysignificand, mysignificand2;
3017 
3018   if (isFiniteNonZero()) {
3019     myexponent = exponent+16383; //bias
3020     mysignificand = significandParts()[0];
3021     mysignificand2 = significandParts()[1];
3022     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3023       myexponent = 0;   // denormal
3024   } else if (category==fcZero) {
3025     myexponent = 0;
3026     mysignificand = mysignificand2 = 0;
3027   } else if (category==fcInfinity) {
3028     myexponent = 0x7fff;
3029     mysignificand = mysignificand2 = 0;
3030   } else {
3031     assert(category == fcNaN && "Unknown category!");
3032     myexponent = 0x7fff;
3033     mysignificand = significandParts()[0];
3034     mysignificand2 = significandParts()[1];
3035   }
3036 
3037   uint64_t words[2];
3038   words[0] = mysignificand;
3039   words[1] = ((uint64_t)(sign & 1) << 63) |
3040              ((myexponent & 0x7fff) << 48) |
3041              (mysignificand2 & 0xffffffffffffLL);
3042 
3043   return APInt(128, words);
3044 }
3045 
3046 APInt
convertDoubleAPFloatToAPInt() const3047 APFloat::convertDoubleAPFloatToAPInt() const
3048 {
3049   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
3050   assert(partCount()==1);
3051 
3052   uint64_t myexponent, mysignificand;
3053 
3054   if (isFiniteNonZero()) {
3055     myexponent = exponent+1023; //bias
3056     mysignificand = *significandParts();
3057     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3058       myexponent = 0;   // denormal
3059   } else if (category==fcZero) {
3060     myexponent = 0;
3061     mysignificand = 0;
3062   } else if (category==fcInfinity) {
3063     myexponent = 0x7ff;
3064     mysignificand = 0;
3065   } else {
3066     assert(category == fcNaN && "Unknown category!");
3067     myexponent = 0x7ff;
3068     mysignificand = *significandParts();
3069   }
3070 
3071   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3072                      ((myexponent & 0x7ff) <<  52) |
3073                      (mysignificand & 0xfffffffffffffLL))));
3074 }
3075 
3076 APInt
convertFloatAPFloatToAPInt() const3077 APFloat::convertFloatAPFloatToAPInt() const
3078 {
3079   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
3080   assert(partCount()==1);
3081 
3082   uint32_t myexponent, mysignificand;
3083 
3084   if (isFiniteNonZero()) {
3085     myexponent = exponent+127; //bias
3086     mysignificand = (uint32_t)*significandParts();
3087     if (myexponent == 1 && !(mysignificand & 0x800000))
3088       myexponent = 0;   // denormal
3089   } else if (category==fcZero) {
3090     myexponent = 0;
3091     mysignificand = 0;
3092   } else if (category==fcInfinity) {
3093     myexponent = 0xff;
3094     mysignificand = 0;
3095   } else {
3096     assert(category == fcNaN && "Unknown category!");
3097     myexponent = 0xff;
3098     mysignificand = (uint32_t)*significandParts();
3099   }
3100 
3101   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3102                     (mysignificand & 0x7fffff)));
3103 }
3104 
3105 APInt
convertHalfAPFloatToAPInt() const3106 APFloat::convertHalfAPFloatToAPInt() const
3107 {
3108   assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3109   assert(partCount()==1);
3110 
3111   uint32_t myexponent, mysignificand;
3112 
3113   if (isFiniteNonZero()) {
3114     myexponent = exponent+15; //bias
3115     mysignificand = (uint32_t)*significandParts();
3116     if (myexponent == 1 && !(mysignificand & 0x400))
3117       myexponent = 0;   // denormal
3118   } else if (category==fcZero) {
3119     myexponent = 0;
3120     mysignificand = 0;
3121   } else if (category==fcInfinity) {
3122     myexponent = 0x1f;
3123     mysignificand = 0;
3124   } else {
3125     assert(category == fcNaN && "Unknown category!");
3126     myexponent = 0x1f;
3127     mysignificand = (uint32_t)*significandParts();
3128   }
3129 
3130   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3131                     (mysignificand & 0x3ff)));
3132 }
3133 
3134 // This function creates an APInt that is just a bit map of the floating
3135 // point constant as it would appear in memory.  It is not a conversion,
3136 // and treating the result as a normal integer is unlikely to be useful.
3137 
3138 APInt
bitcastToAPInt() const3139 APFloat::bitcastToAPInt() const
3140 {
3141   if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3142     return convertHalfAPFloatToAPInt();
3143 
3144   if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3145     return convertFloatAPFloatToAPInt();
3146 
3147   if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3148     return convertDoubleAPFloatToAPInt();
3149 
3150   if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3151     return convertQuadrupleAPFloatToAPInt();
3152 
3153   if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3154     return convertPPCDoubleDoubleAPFloatToAPInt();
3155 
3156   assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3157          "unknown format!");
3158   return convertF80LongDoubleAPFloatToAPInt();
3159 }
3160 
3161 float
convertToFloat() const3162 APFloat::convertToFloat() const
3163 {
3164   assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3165          "Float semantics are not IEEEsingle");
3166   APInt api = bitcastToAPInt();
3167   return api.bitsToFloat();
3168 }
3169 
3170 double
convertToDouble() const3171 APFloat::convertToDouble() const
3172 {
3173   assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3174          "Float semantics are not IEEEdouble");
3175   APInt api = bitcastToAPInt();
3176   return api.bitsToDouble();
3177 }
3178 
3179 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3180 /// does not support these bit patterns:
3181 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3182 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3183 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3184 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3185 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3186 void
initFromF80LongDoubleAPInt(const APInt & api)3187 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3188 {
3189   assert(api.getBitWidth()==80);
3190   uint64_t i1 = api.getRawData()[0];
3191   uint64_t i2 = api.getRawData()[1];
3192   uint64_t myexponent = (i2 & 0x7fff);
3193   uint64_t mysignificand = i1;
3194 
3195   initialize(&APFloat::x87DoubleExtended);
3196   assert(partCount()==2);
3197 
3198   sign = static_cast<unsigned int>(i2>>15);
3199   if (myexponent==0 && mysignificand==0) {
3200     // exponent, significand meaningless
3201     category = fcZero;
3202   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3203     // exponent, significand meaningless
3204     category = fcInfinity;
3205   } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3206     // exponent meaningless
3207     category = fcNaN;
3208     significandParts()[0] = mysignificand;
3209     significandParts()[1] = 0;
3210   } else {
3211     category = fcNormal;
3212     exponent = myexponent - 16383;
3213     significandParts()[0] = mysignificand;
3214     significandParts()[1] = 0;
3215     if (myexponent==0)          // denormal
3216       exponent = -16382;
3217   }
3218 }
3219 
3220 void
initFromPPCDoubleDoubleAPInt(const APInt & api)3221 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3222 {
3223   assert(api.getBitWidth()==128);
3224   uint64_t i1 = api.getRawData()[0];
3225   uint64_t i2 = api.getRawData()[1];
3226   opStatus fs;
3227   bool losesInfo;
3228 
3229   // Get the first double and convert to our format.
3230   initFromDoubleAPInt(APInt(64, i1));
3231   fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3232   assert(fs == opOK && !losesInfo);
3233   (void)fs;
3234 
3235   // Unless we have a special case, add in second double.
3236   if (isFiniteNonZero()) {
3237     APFloat v(IEEEdouble, APInt(64, i2));
3238     fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3239     assert(fs == opOK && !losesInfo);
3240     (void)fs;
3241 
3242     add(v, rmNearestTiesToEven);
3243   }
3244 }
3245 
3246 void
initFromQuadrupleAPInt(const APInt & api)3247 APFloat::initFromQuadrupleAPInt(const APInt &api)
3248 {
3249   assert(api.getBitWidth()==128);
3250   uint64_t i1 = api.getRawData()[0];
3251   uint64_t i2 = api.getRawData()[1];
3252   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3253   uint64_t mysignificand  = i1;
3254   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3255 
3256   initialize(&APFloat::IEEEquad);
3257   assert(partCount()==2);
3258 
3259   sign = static_cast<unsigned int>(i2>>63);
3260   if (myexponent==0 &&
3261       (mysignificand==0 && mysignificand2==0)) {
3262     // exponent, significand meaningless
3263     category = fcZero;
3264   } else if (myexponent==0x7fff &&
3265              (mysignificand==0 && mysignificand2==0)) {
3266     // exponent, significand meaningless
3267     category = fcInfinity;
3268   } else if (myexponent==0x7fff &&
3269              (mysignificand!=0 || mysignificand2 !=0)) {
3270     // exponent meaningless
3271     category = fcNaN;
3272     significandParts()[0] = mysignificand;
3273     significandParts()[1] = mysignificand2;
3274   } else {
3275     category = fcNormal;
3276     exponent = myexponent - 16383;
3277     significandParts()[0] = mysignificand;
3278     significandParts()[1] = mysignificand2;
3279     if (myexponent==0)          // denormal
3280       exponent = -16382;
3281     else
3282       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3283   }
3284 }
3285 
3286 void
initFromDoubleAPInt(const APInt & api)3287 APFloat::initFromDoubleAPInt(const APInt &api)
3288 {
3289   assert(api.getBitWidth()==64);
3290   uint64_t i = *api.getRawData();
3291   uint64_t myexponent = (i >> 52) & 0x7ff;
3292   uint64_t mysignificand = i & 0xfffffffffffffLL;
3293 
3294   initialize(&APFloat::IEEEdouble);
3295   assert(partCount()==1);
3296 
3297   sign = static_cast<unsigned int>(i>>63);
3298   if (myexponent==0 && mysignificand==0) {
3299     // exponent, significand meaningless
3300     category = fcZero;
3301   } else if (myexponent==0x7ff && mysignificand==0) {
3302     // exponent, significand meaningless
3303     category = fcInfinity;
3304   } else if (myexponent==0x7ff && mysignificand!=0) {
3305     // exponent meaningless
3306     category = fcNaN;
3307     *significandParts() = mysignificand;
3308   } else {
3309     category = fcNormal;
3310     exponent = myexponent - 1023;
3311     *significandParts() = mysignificand;
3312     if (myexponent==0)          // denormal
3313       exponent = -1022;
3314     else
3315       *significandParts() |= 0x10000000000000LL;  // integer bit
3316   }
3317 }
3318 
3319 void
initFromFloatAPInt(const APInt & api)3320 APFloat::initFromFloatAPInt(const APInt & api)
3321 {
3322   assert(api.getBitWidth()==32);
3323   uint32_t i = (uint32_t)*api.getRawData();
3324   uint32_t myexponent = (i >> 23) & 0xff;
3325   uint32_t mysignificand = i & 0x7fffff;
3326 
3327   initialize(&APFloat::IEEEsingle);
3328   assert(partCount()==1);
3329 
3330   sign = i >> 31;
3331   if (myexponent==0 && mysignificand==0) {
3332     // exponent, significand meaningless
3333     category = fcZero;
3334   } else if (myexponent==0xff && mysignificand==0) {
3335     // exponent, significand meaningless
3336     category = fcInfinity;
3337   } else if (myexponent==0xff && mysignificand!=0) {
3338     // sign, exponent, significand meaningless
3339     category = fcNaN;
3340     *significandParts() = mysignificand;
3341   } else {
3342     category = fcNormal;
3343     exponent = myexponent - 127;  //bias
3344     *significandParts() = mysignificand;
3345     if (myexponent==0)    // denormal
3346       exponent = -126;
3347     else
3348       *significandParts() |= 0x800000; // integer bit
3349   }
3350 }
3351 
3352 void
initFromHalfAPInt(const APInt & api)3353 APFloat::initFromHalfAPInt(const APInt & api)
3354 {
3355   assert(api.getBitWidth()==16);
3356   uint32_t i = (uint32_t)*api.getRawData();
3357   uint32_t myexponent = (i >> 10) & 0x1f;
3358   uint32_t mysignificand = i & 0x3ff;
3359 
3360   initialize(&APFloat::IEEEhalf);
3361   assert(partCount()==1);
3362 
3363   sign = i >> 15;
3364   if (myexponent==0 && mysignificand==0) {
3365     // exponent, significand meaningless
3366     category = fcZero;
3367   } else if (myexponent==0x1f && mysignificand==0) {
3368     // exponent, significand meaningless
3369     category = fcInfinity;
3370   } else if (myexponent==0x1f && mysignificand!=0) {
3371     // sign, exponent, significand meaningless
3372     category = fcNaN;
3373     *significandParts() = mysignificand;
3374   } else {
3375     category = fcNormal;
3376     exponent = myexponent - 15;  //bias
3377     *significandParts() = mysignificand;
3378     if (myexponent==0)    // denormal
3379       exponent = -14;
3380     else
3381       *significandParts() |= 0x400; // integer bit
3382   }
3383 }
3384 
3385 /// Treat api as containing the bits of a floating point number.  Currently
3386 /// we infer the floating point type from the size of the APInt.  The
3387 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3388 /// when the size is anything else).
3389 void
initFromAPInt(const fltSemantics * Sem,const APInt & api)3390 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3391 {
3392   if (Sem == &IEEEhalf)
3393     return initFromHalfAPInt(api);
3394   if (Sem == &IEEEsingle)
3395     return initFromFloatAPInt(api);
3396   if (Sem == &IEEEdouble)
3397     return initFromDoubleAPInt(api);
3398   if (Sem == &x87DoubleExtended)
3399     return initFromF80LongDoubleAPInt(api);
3400   if (Sem == &IEEEquad)
3401     return initFromQuadrupleAPInt(api);
3402   if (Sem == &PPCDoubleDouble)
3403     return initFromPPCDoubleDoubleAPInt(api);
3404 
3405   llvm_unreachable(nullptr);
3406 }
3407 
3408 APFloat
getAllOnesValue(unsigned BitWidth,bool isIEEE)3409 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3410 {
3411   switch (BitWidth) {
3412   case 16:
3413     return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3414   case 32:
3415     return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3416   case 64:
3417     return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3418   case 80:
3419     return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3420   case 128:
3421     if (isIEEE)
3422       return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3423     return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3424   default:
3425     llvm_unreachable("Unknown floating bit width");
3426   }
3427 }
3428 
getSizeInBits(const fltSemantics & Sem)3429 unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
3430   return Sem.sizeInBits;
3431 }
3432 
3433 /// Make this number the largest magnitude normal number in the given
3434 /// semantics.
makeLargest(bool Negative)3435 void APFloat::makeLargest(bool Negative) {
3436   // We want (in interchange format):
3437   //   sign = {Negative}
3438   //   exponent = 1..10
3439   //   significand = 1..1
3440   category = fcNormal;
3441   sign = Negative;
3442   exponent = semantics->maxExponent;
3443 
3444   // Use memset to set all but the highest integerPart to all ones.
3445   integerPart *significand = significandParts();
3446   unsigned PartCount = partCount();
3447   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3448 
3449   // Set the high integerPart especially setting all unused top bits for
3450   // internal consistency.
3451   const unsigned NumUnusedHighBits =
3452     PartCount*integerPartWidth - semantics->precision;
3453   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3454                                    ? (~integerPart(0) >> NumUnusedHighBits)
3455                                    : 0;
3456 }
3457 
3458 /// Make this number the smallest magnitude denormal number in the given
3459 /// semantics.
makeSmallest(bool Negative)3460 void APFloat::makeSmallest(bool Negative) {
3461   // We want (in interchange format):
3462   //   sign = {Negative}
3463   //   exponent = 0..0
3464   //   significand = 0..01
3465   category = fcNormal;
3466   sign = Negative;
3467   exponent = semantics->minExponent;
3468   APInt::tcSet(significandParts(), 1, partCount());
3469 }
3470 
3471 
getLargest(const fltSemantics & Sem,bool Negative)3472 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3473   // We want (in interchange format):
3474   //   sign = {Negative}
3475   //   exponent = 1..10
3476   //   significand = 1..1
3477   APFloat Val(Sem, uninitialized);
3478   Val.makeLargest(Negative);
3479   return Val;
3480 }
3481 
getSmallest(const fltSemantics & Sem,bool Negative)3482 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3483   // We want (in interchange format):
3484   //   sign = {Negative}
3485   //   exponent = 0..0
3486   //   significand = 0..01
3487   APFloat Val(Sem, uninitialized);
3488   Val.makeSmallest(Negative);
3489   return Val;
3490 }
3491 
getSmallestNormalized(const fltSemantics & Sem,bool Negative)3492 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3493   APFloat Val(Sem, uninitialized);
3494 
3495   // We want (in interchange format):
3496   //   sign = {Negative}
3497   //   exponent = 0..0
3498   //   significand = 10..0
3499 
3500   Val.category = fcNormal;
3501   Val.zeroSignificand();
3502   Val.sign = Negative;
3503   Val.exponent = Sem.minExponent;
3504   Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3505     (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3506 
3507   return Val;
3508 }
3509 
APFloat(const fltSemantics & Sem,const APInt & API)3510 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3511   initFromAPInt(&Sem, API);
3512 }
3513 
APFloat(float f)3514 APFloat::APFloat(float f) {
3515   initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3516 }
3517 
APFloat(double d)3518 APFloat::APFloat(double d) {
3519   initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3520 }
3521 
3522 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3523   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3524     Buffer.append(Str.begin(), Str.end());
3525   }
3526 
3527   /// Removes data from the given significand until it is no more
3528   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3529   void AdjustToPrecision(APInt &significand,
3530                          int &exp, unsigned FormatPrecision) {
3531     unsigned bits = significand.getActiveBits();
3532 
3533     // 196/59 is a very slight overestimate of lg_2(10).
3534     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3535 
3536     if (bits <= bitsRequired) return;
3537 
3538     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3539     if (!tensRemovable) return;
3540 
3541     exp += tensRemovable;
3542 
3543     APInt divisor(significand.getBitWidth(), 1);
3544     APInt powten(significand.getBitWidth(), 10);
3545     while (true) {
3546       if (tensRemovable & 1)
3547         divisor *= powten;
3548       tensRemovable >>= 1;
3549       if (!tensRemovable) break;
3550       powten *= powten;
3551     }
3552 
3553     significand = significand.udiv(divisor);
3554 
3555     // Truncate the significand down to its active bit count.
3556     significand = significand.trunc(significand.getActiveBits());
3557   }
3558 
3559 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3560   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3561                          int &exp, unsigned FormatPrecision) {
3562     unsigned N = buffer.size();
3563     if (N <= FormatPrecision) return;
3564 
3565     // The most significant figures are the last ones in the buffer.
3566     unsigned FirstSignificant = N - FormatPrecision;
3567 
3568     // Round.
3569     // FIXME: this probably shouldn't use 'round half up'.
3570 
3571     // Rounding down is just a truncation, except we also want to drop
3572     // trailing zeros from the new result.
3573     if (buffer[FirstSignificant - 1] < '5') {
3574       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3575         FirstSignificant++;
3576 
3577       exp += FirstSignificant;
3578       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3579       return;
3580     }
3581 
3582     // Rounding up requires a decimal add-with-carry.  If we continue
3583     // the carry, the newly-introduced zeros will just be truncated.
3584     for (unsigned I = FirstSignificant; I != N; ++I) {
3585       if (buffer[I] == '9') {
3586         FirstSignificant++;
3587       } else {
3588         buffer[I]++;
3589         break;
3590       }
3591     }
3592 
3593     // If we carried through, we have exactly one digit of precision.
3594     if (FirstSignificant == N) {
3595       exp += FirstSignificant;
3596       buffer.clear();
3597       buffer.push_back('1');
3598       return;
3599     }
3600 
3601     exp += FirstSignificant;
3602     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3603   }
3604 }
3605 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding) const3606 void APFloat::toString(SmallVectorImpl<char> &Str,
3607                        unsigned FormatPrecision,
3608                        unsigned FormatMaxPadding) const {
3609   switch (category) {
3610   case fcInfinity:
3611     if (isNegative())
3612       return append(Str, "-Inf");
3613     else
3614       return append(Str, "+Inf");
3615 
3616   case fcNaN: return append(Str, "NaN");
3617 
3618   case fcZero:
3619     if (isNegative())
3620       Str.push_back('-');
3621 
3622     if (!FormatMaxPadding)
3623       append(Str, "0.0E+0");
3624     else
3625       Str.push_back('0');
3626     return;
3627 
3628   case fcNormal:
3629     break;
3630   }
3631 
3632   if (isNegative())
3633     Str.push_back('-');
3634 
3635   // Decompose the number into an APInt and an exponent.
3636   int exp = exponent - ((int) semantics->precision - 1);
3637   APInt significand(semantics->precision,
3638                     makeArrayRef(significandParts(),
3639                                  partCountForBits(semantics->precision)));
3640 
3641   // Set FormatPrecision if zero.  We want to do this before we
3642   // truncate trailing zeros, as those are part of the precision.
3643   if (!FormatPrecision) {
3644     // We use enough digits so the number can be round-tripped back to an
3645     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3646     // Accurately" by Steele and White.
3647     // FIXME: Using a formula based purely on the precision is conservative;
3648     // we can print fewer digits depending on the actual value being printed.
3649 
3650     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3651     FormatPrecision = 2 + semantics->precision * 59 / 196;
3652   }
3653 
3654   // Ignore trailing binary zeros.
3655   int trailingZeros = significand.countTrailingZeros();
3656   exp += trailingZeros;
3657   significand = significand.lshr(trailingZeros);
3658 
3659   // Change the exponent from 2^e to 10^e.
3660   if (exp == 0) {
3661     // Nothing to do.
3662   } else if (exp > 0) {
3663     // Just shift left.
3664     significand = significand.zext(semantics->precision + exp);
3665     significand <<= exp;
3666     exp = 0;
3667   } else { /* exp < 0 */
3668     int texp = -exp;
3669 
3670     // We transform this using the identity:
3671     //   (N)(2^-e) == (N)(5^e)(10^-e)
3672     // This means we have to multiply N (the significand) by 5^e.
3673     // To avoid overflow, we have to operate on numbers large
3674     // enough to store N * 5^e:
3675     //   log2(N * 5^e) == log2(N) + e * log2(5)
3676     //                 <= semantics->precision + e * 137 / 59
3677     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3678 
3679     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3680 
3681     // Multiply significand by 5^e.
3682     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3683     significand = significand.zext(precision);
3684     APInt five_to_the_i(precision, 5);
3685     while (true) {
3686       if (texp & 1) significand *= five_to_the_i;
3687 
3688       texp >>= 1;
3689       if (!texp) break;
3690       five_to_the_i *= five_to_the_i;
3691     }
3692   }
3693 
3694   AdjustToPrecision(significand, exp, FormatPrecision);
3695 
3696   SmallVector<char, 256> buffer;
3697 
3698   // Fill the buffer.
3699   unsigned precision = significand.getBitWidth();
3700   APInt ten(precision, 10);
3701   APInt digit(precision, 0);
3702 
3703   bool inTrail = true;
3704   while (significand != 0) {
3705     // digit <- significand % 10
3706     // significand <- significand / 10
3707     APInt::udivrem(significand, ten, significand, digit);
3708 
3709     unsigned d = digit.getZExtValue();
3710 
3711     // Drop trailing zeros.
3712     if (inTrail && !d) exp++;
3713     else {
3714       buffer.push_back((char) ('0' + d));
3715       inTrail = false;
3716     }
3717   }
3718 
3719   assert(!buffer.empty() && "no characters in buffer!");
3720 
3721   // Drop down to FormatPrecision.
3722   // TODO: don't do more precise calculations above than are required.
3723   AdjustToPrecision(buffer, exp, FormatPrecision);
3724 
3725   unsigned NDigits = buffer.size();
3726 
3727   // Check whether we should use scientific notation.
3728   bool FormatScientific;
3729   if (!FormatMaxPadding)
3730     FormatScientific = true;
3731   else {
3732     if (exp >= 0) {
3733       // 765e3 --> 765000
3734       //              ^^^
3735       // But we shouldn't make the number look more precise than it is.
3736       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3737                           NDigits + (unsigned) exp > FormatPrecision);
3738     } else {
3739       // Power of the most significant digit.
3740       int MSD = exp + (int) (NDigits - 1);
3741       if (MSD >= 0) {
3742         // 765e-2 == 7.65
3743         FormatScientific = false;
3744       } else {
3745         // 765e-5 == 0.00765
3746         //           ^ ^^
3747         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3748       }
3749     }
3750   }
3751 
3752   // Scientific formatting is pretty straightforward.
3753   if (FormatScientific) {
3754     exp += (NDigits - 1);
3755 
3756     Str.push_back(buffer[NDigits-1]);
3757     Str.push_back('.');
3758     if (NDigits == 1)
3759       Str.push_back('0');
3760     else
3761       for (unsigned I = 1; I != NDigits; ++I)
3762         Str.push_back(buffer[NDigits-1-I]);
3763     Str.push_back('E');
3764 
3765     Str.push_back(exp >= 0 ? '+' : '-');
3766     if (exp < 0) exp = -exp;
3767     SmallVector<char, 6> expbuf;
3768     do {
3769       expbuf.push_back((char) ('0' + (exp % 10)));
3770       exp /= 10;
3771     } while (exp);
3772     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3773       Str.push_back(expbuf[E-1-I]);
3774     return;
3775   }
3776 
3777   // Non-scientific, positive exponents.
3778   if (exp >= 0) {
3779     for (unsigned I = 0; I != NDigits; ++I)
3780       Str.push_back(buffer[NDigits-1-I]);
3781     for (unsigned I = 0; I != (unsigned) exp; ++I)
3782       Str.push_back('0');
3783     return;
3784   }
3785 
3786   // Non-scientific, negative exponents.
3787 
3788   // The number of digits to the left of the decimal point.
3789   int NWholeDigits = exp + (int) NDigits;
3790 
3791   unsigned I = 0;
3792   if (NWholeDigits > 0) {
3793     for (; I != (unsigned) NWholeDigits; ++I)
3794       Str.push_back(buffer[NDigits-I-1]);
3795     Str.push_back('.');
3796   } else {
3797     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3798 
3799     Str.push_back('0');
3800     Str.push_back('.');
3801     for (unsigned Z = 1; Z != NZeros; ++Z)
3802       Str.push_back('0');
3803   }
3804 
3805   for (; I != NDigits; ++I)
3806     Str.push_back(buffer[NDigits-I-1]);
3807 }
3808 
getExactInverse(APFloat * inv) const3809 bool APFloat::getExactInverse(APFloat *inv) const {
3810   // Special floats and denormals have no exact inverse.
3811   if (!isFiniteNonZero())
3812     return false;
3813 
3814   // Check that the number is a power of two by making sure that only the
3815   // integer bit is set in the significand.
3816   if (significandLSB() != semantics->precision - 1)
3817     return false;
3818 
3819   // Get the inverse.
3820   APFloat reciprocal(*semantics, 1ULL);
3821   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3822     return false;
3823 
3824   // Avoid multiplication with a denormal, it is not safe on all platforms and
3825   // may be slower than a normal division.
3826   if (reciprocal.isDenormal())
3827     return false;
3828 
3829   assert(reciprocal.isFiniteNonZero() &&
3830          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3831 
3832   if (inv)
3833     *inv = reciprocal;
3834 
3835   return true;
3836 }
3837 
isSignaling() const3838 bool APFloat::isSignaling() const {
3839   if (!isNaN())
3840     return false;
3841 
3842   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3843   // first bit of the trailing significand being 0.
3844   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3845 }
3846 
3847 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3848 ///
3849 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3850 /// appropriate sign switching before/after the computation.
next(bool nextDown)3851 APFloat::opStatus APFloat::next(bool nextDown) {
3852   // If we are performing nextDown, swap sign so we have -x.
3853   if (nextDown)
3854     changeSign();
3855 
3856   // Compute nextUp(x)
3857   opStatus result = opOK;
3858 
3859   // Handle each float category separately.
3860   switch (category) {
3861   case fcInfinity:
3862     // nextUp(+inf) = +inf
3863     if (!isNegative())
3864       break;
3865     // nextUp(-inf) = -getLargest()
3866     makeLargest(true);
3867     break;
3868   case fcNaN:
3869     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3870     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3871     //                     change the payload.
3872     if (isSignaling()) {
3873       result = opInvalidOp;
3874       // For consistency, propagate the sign of the sNaN to the qNaN.
3875       makeNaN(false, isNegative(), nullptr);
3876     }
3877     break;
3878   case fcZero:
3879     // nextUp(pm 0) = +getSmallest()
3880     makeSmallest(false);
3881     break;
3882   case fcNormal:
3883     // nextUp(-getSmallest()) = -0
3884     if (isSmallest() && isNegative()) {
3885       APInt::tcSet(significandParts(), 0, partCount());
3886       category = fcZero;
3887       exponent = 0;
3888       break;
3889     }
3890 
3891     // nextUp(getLargest()) == INFINITY
3892     if (isLargest() && !isNegative()) {
3893       APInt::tcSet(significandParts(), 0, partCount());
3894       category = fcInfinity;
3895       exponent = semantics->maxExponent + 1;
3896       break;
3897     }
3898 
3899     // nextUp(normal) == normal + inc.
3900     if (isNegative()) {
3901       // If we are negative, we need to decrement the significand.
3902 
3903       // We only cross a binade boundary that requires adjusting the exponent
3904       // if:
3905       //   1. exponent != semantics->minExponent. This implies we are not in the
3906       //   smallest binade or are dealing with denormals.
3907       //   2. Our significand excluding the integral bit is all zeros.
3908       bool WillCrossBinadeBoundary =
3909         exponent != semantics->minExponent && isSignificandAllZeros();
3910 
3911       // Decrement the significand.
3912       //
3913       // We always do this since:
3914       //   1. If we are dealing with a non-binade decrement, by definition we
3915       //   just decrement the significand.
3916       //   2. If we are dealing with a normal -> normal binade decrement, since
3917       //   we have an explicit integral bit the fact that all bits but the
3918       //   integral bit are zero implies that subtracting one will yield a
3919       //   significand with 0 integral bit and 1 in all other spots. Thus we
3920       //   must just adjust the exponent and set the integral bit to 1.
3921       //   3. If we are dealing with a normal -> denormal binade decrement,
3922       //   since we set the integral bit to 0 when we represent denormals, we
3923       //   just decrement the significand.
3924       integerPart *Parts = significandParts();
3925       APInt::tcDecrement(Parts, partCount());
3926 
3927       if (WillCrossBinadeBoundary) {
3928         // Our result is a normal number. Do the following:
3929         // 1. Set the integral bit to 1.
3930         // 2. Decrement the exponent.
3931         APInt::tcSetBit(Parts, semantics->precision - 1);
3932         exponent--;
3933       }
3934     } else {
3935       // If we are positive, we need to increment the significand.
3936 
3937       // We only cross a binade boundary that requires adjusting the exponent if
3938       // the input is not a denormal and all of said input's significand bits
3939       // are set. If all of said conditions are true: clear the significand, set
3940       // the integral bit to 1, and increment the exponent. If we have a
3941       // denormal always increment since moving denormals and the numbers in the
3942       // smallest normal binade have the same exponent in our representation.
3943       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3944 
3945       if (WillCrossBinadeBoundary) {
3946         integerPart *Parts = significandParts();
3947         APInt::tcSet(Parts, 0, partCount());
3948         APInt::tcSetBit(Parts, semantics->precision - 1);
3949         assert(exponent != semantics->maxExponent &&
3950                "We can not increment an exponent beyond the maxExponent allowed"
3951                " by the given floating point semantics.");
3952         exponent++;
3953       } else {
3954         incrementSignificand();
3955       }
3956     }
3957     break;
3958   }
3959 
3960   // If we are performing nextDown, swap sign so we have -nextUp(-x)
3961   if (nextDown)
3962     changeSign();
3963 
3964   return result;
3965 }
3966 
3967 void
makeInf(bool Negative)3968 APFloat::makeInf(bool Negative) {
3969   category = fcInfinity;
3970   sign = Negative;
3971   exponent = semantics->maxExponent + 1;
3972   APInt::tcSet(significandParts(), 0, partCount());
3973 }
3974 
3975 void
makeZero(bool Negative)3976 APFloat::makeZero(bool Negative) {
3977   category = fcZero;
3978   sign = Negative;
3979   exponent = semantics->minExponent-1;
3980   APInt::tcSet(significandParts(), 0, partCount());
3981 }
3982 
scalbn(APFloat X,int Exp)3983 APFloat llvm::scalbn(APFloat X, int Exp) {
3984   if (X.isInfinity() || X.isZero() || X.isNaN())
3985     return X;
3986 
3987   auto MaxExp = X.getSemantics().maxExponent;
3988   auto MinExp = X.getSemantics().minExponent;
3989   if (Exp > (MaxExp - X.exponent))
3990     // Overflow saturates to infinity.
3991     return APFloat::getInf(X.getSemantics(), X.isNegative());
3992   if (Exp < (MinExp - X.exponent))
3993     // Underflow saturates to zero.
3994     return APFloat::getZero(X.getSemantics(), X.isNegative());
3995 
3996   X.exponent += Exp;
3997   return X;
3998 }
3999