1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28 
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
30   do {                                                                         \
31     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
32       return U.IEEE.METHOD_CALL;                                               \
33     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
34       return U.Double.METHOD_CALL;                                             \
35     llvm_unreachable("Unexpected semantics");                                  \
36   } while (false)
37 
38 using namespace llvm;
39 
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47 
48 /* Assumed in hexadecimal significand parsing, and conversion to
49    hexadecimal strings.  */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51 
52 namespace llvm {
53   /* Represents floating point arithmetic semantics.  */
54   struct fltSemantics {
55     /* The largest E such that 2^E is representable; this matches the
56        definition of IEEE 754.  */
57     APFloatBase::ExponentType maxExponent;
58 
59     /* The smallest E such that 2^E is a normalized number; this
60        matches the definition of IEEE 754.  */
61     APFloatBase::ExponentType minExponent;
62 
63     /* Number of bits in the significand.  This includes the integer
64        bit.  */
65     unsigned int precision;
66 
67     /* Number of bits actually used in the semantics. */
68     unsigned int sizeInBits;
69 
70     // Returns true if any number described by this semantics can be precisely
71     // represented by the specified semantics.
isRepresentableByllvm::fltSemantics72     bool isRepresentableBy(const fltSemantics &S) const {
73       return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
74              precision <= S.precision;
75     }
76   };
77 
78   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
79   static const fltSemantics semBFloat = {127, -126, 8, 16};
80   static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
81   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
82   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
83   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
84   static const fltSemantics semBogus = {0, 0, 0, 0};
85 
86   /* The IBM double-double semantics. Such a number consists of a pair of IEEE
87      64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
88      (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
89      Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
90      to each other, and two 11-bit exponents.
91 
92      Note: we need to make the value different from semBogus as otherwise
93      an unsafe optimization may collapse both values to a single address,
94      and we heavily rely on them having distinct addresses.             */
95   static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
96 
97   /* These are legacy semantics for the fallback, inaccrurate implementation of
98      IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
99      operation. It's equivalent to having an IEEE number with consecutive 106
100      bits of mantissa and 11 bits of exponent.
101 
102      It's not equivalent to IBM double-double. For example, a legit IBM
103      double-double, 1 + epsilon:
104 
105        1 + epsilon = 1 + (1 >> 1076)
106 
107      is not representable by a consecutive 106 bits of mantissa.
108 
109      Currently, these semantics are used in the following way:
110 
111        semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
112        (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
113        semPPCDoubleDoubleLegacy -> IEEE operations
114 
115      We use bitcastToAPInt() to get the bit representation (in APInt) of the
116      underlying IEEEdouble, then use the APInt constructor to construct the
117      legacy IEEE float.
118 
119      TODO: Implement all operations in semPPCDoubleDouble, and delete these
120      semantics.  */
121   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
122                                                         53 + 53, 128};
123 
EnumToSemantics(Semantics S)124   const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
125     switch (S) {
126     case S_IEEEhalf:
127       return IEEEhalf();
128     case S_BFloat:
129       return BFloat();
130     case S_IEEEsingle:
131       return IEEEsingle();
132     case S_IEEEdouble:
133       return IEEEdouble();
134     case S_x87DoubleExtended:
135       return x87DoubleExtended();
136     case S_IEEEquad:
137       return IEEEquad();
138     case S_PPCDoubleDouble:
139       return PPCDoubleDouble();
140     }
141     llvm_unreachable("Unrecognised floating semantics");
142   }
143 
144   APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)145   APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
146     if (&Sem == &llvm::APFloat::IEEEhalf())
147       return S_IEEEhalf;
148     else if (&Sem == &llvm::APFloat::BFloat())
149       return S_BFloat;
150     else if (&Sem == &llvm::APFloat::IEEEsingle())
151       return S_IEEEsingle;
152     else if (&Sem == &llvm::APFloat::IEEEdouble())
153       return S_IEEEdouble;
154     else if (&Sem == &llvm::APFloat::x87DoubleExtended())
155       return S_x87DoubleExtended;
156     else if (&Sem == &llvm::APFloat::IEEEquad())
157       return S_IEEEquad;
158     else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
159       return S_PPCDoubleDouble;
160     else
161       llvm_unreachable("Unknown floating semantics");
162   }
163 
IEEEhalf()164   const fltSemantics &APFloatBase::IEEEhalf() {
165     return semIEEEhalf;
166   }
BFloat()167   const fltSemantics &APFloatBase::BFloat() {
168     return semBFloat;
169   }
IEEEsingle()170   const fltSemantics &APFloatBase::IEEEsingle() {
171     return semIEEEsingle;
172   }
IEEEdouble()173   const fltSemantics &APFloatBase::IEEEdouble() {
174     return semIEEEdouble;
175   }
IEEEquad()176   const fltSemantics &APFloatBase::IEEEquad() {
177     return semIEEEquad;
178   }
x87DoubleExtended()179   const fltSemantics &APFloatBase::x87DoubleExtended() {
180     return semX87DoubleExtended;
181   }
Bogus()182   const fltSemantics &APFloatBase::Bogus() {
183     return semBogus;
184   }
PPCDoubleDouble()185   const fltSemantics &APFloatBase::PPCDoubleDouble() {
186     return semPPCDoubleDouble;
187   }
188 
189   constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
190   constexpr RoundingMode APFloatBase::rmTowardPositive;
191   constexpr RoundingMode APFloatBase::rmTowardNegative;
192   constexpr RoundingMode APFloatBase::rmTowardZero;
193   constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
194 
195   /* A tight upper bound on number of parts required to hold the value
196      pow(5, power) is
197 
198        power * 815 / (351 * integerPartWidth) + 1
199 
200      However, whilst the result may require only this many parts,
201      because we are multiplying two values to get it, the
202      multiplication may require an extra part with the excess part
203      being zero (consider the trivial case of 1 * 1, tcFullMultiply
204      requires two parts to hold the single-part result).  So we add an
205      extra one to guarantee enough space whilst multiplying.  */
206   const unsigned int maxExponent = 16383;
207   const unsigned int maxPrecision = 113;
208   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
209   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
210 
semanticsPrecision(const fltSemantics & semantics)211   unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
212     return semantics.precision;
213   }
214   APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)215   APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
216     return semantics.maxExponent;
217   }
218   APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)219   APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
220     return semantics.minExponent;
221   }
semanticsSizeInBits(const fltSemantics & semantics)222   unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
223     return semantics.sizeInBits;
224   }
225 
getSizeInBits(const fltSemantics & Sem)226   unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
227     return Sem.sizeInBits;
228 }
229 
230 /* A bunch of private, handy routines.  */
231 
createError(const Twine & Err)232 static inline Error createError(const Twine &Err) {
233   return make_error<StringError>(Err, inconvertibleErrorCode());
234 }
235 
236 static inline unsigned int
partCountForBits(unsigned int bits)237 partCountForBits(unsigned int bits)
238 {
239   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
240 }
241 
242 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
243 static inline unsigned int
decDigitValue(unsigned int c)244 decDigitValue(unsigned int c)
245 {
246   return c - '0';
247 }
248 
249 /* Return the value of a decimal exponent of the form
250    [+-]ddddddd.
251 
252    If the exponent overflows, returns a large exponent with the
253    appropriate sign.  */
readExponent(StringRef::iterator begin,StringRef::iterator end)254 static Expected<int> readExponent(StringRef::iterator begin,
255                                   StringRef::iterator end) {
256   bool isNegative;
257   unsigned int absExponent;
258   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
259   StringRef::iterator p = begin;
260 
261   // Treat no exponent as 0 to match binutils
262   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
263     return 0;
264   }
265 
266   isNegative = (*p == '-');
267   if (*p == '-' || *p == '+') {
268     p++;
269     if (p == end)
270       return createError("Exponent has no digits");
271   }
272 
273   absExponent = decDigitValue(*p++);
274   if (absExponent >= 10U)
275     return createError("Invalid character in exponent");
276 
277   for (; p != end; ++p) {
278     unsigned int value;
279 
280     value = decDigitValue(*p);
281     if (value >= 10U)
282       return createError("Invalid character in exponent");
283 
284     absExponent = absExponent * 10U + value;
285     if (absExponent >= overlargeExponent) {
286       absExponent = overlargeExponent;
287       break;
288     }
289   }
290 
291   if (isNegative)
292     return -(int) absExponent;
293   else
294     return (int) absExponent;
295 }
296 
297 /* This is ugly and needs cleaning up, but I don't immediately see
298    how whilst remaining safe.  */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)299 static Expected<int> totalExponent(StringRef::iterator p,
300                                    StringRef::iterator end,
301                                    int exponentAdjustment) {
302   int unsignedExponent;
303   bool negative, overflow;
304   int exponent = 0;
305 
306   if (p == end)
307     return createError("Exponent has no digits");
308 
309   negative = *p == '-';
310   if (*p == '-' || *p == '+') {
311     p++;
312     if (p == end)
313       return createError("Exponent has no digits");
314   }
315 
316   unsignedExponent = 0;
317   overflow = false;
318   for (; p != end; ++p) {
319     unsigned int value;
320 
321     value = decDigitValue(*p);
322     if (value >= 10U)
323       return createError("Invalid character in exponent");
324 
325     unsignedExponent = unsignedExponent * 10 + value;
326     if (unsignedExponent > 32767) {
327       overflow = true;
328       break;
329     }
330   }
331 
332   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
333     overflow = true;
334 
335   if (!overflow) {
336     exponent = unsignedExponent;
337     if (negative)
338       exponent = -exponent;
339     exponent += exponentAdjustment;
340     if (exponent > 32767 || exponent < -32768)
341       overflow = true;
342   }
343 
344   if (overflow)
345     exponent = negative ? -32768: 32767;
346 
347   return exponent;
348 }
349 
350 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)351 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
352                            StringRef::iterator *dot) {
353   StringRef::iterator p = begin;
354   *dot = end;
355   while (p != end && *p == '0')
356     p++;
357 
358   if (p != end && *p == '.') {
359     *dot = p++;
360 
361     if (end - begin == 1)
362       return createError("Significand has no digits");
363 
364     while (p != end && *p == '0')
365       p++;
366   }
367 
368   return p;
369 }
370 
371 /* Given a normal decimal floating point number of the form
372 
373      dddd.dddd[eE][+-]ddd
374 
375    where the decimal point and exponent are optional, fill out the
376    structure D.  Exponent is appropriate if the significand is
377    treated as an integer, and normalizedExponent if the significand
378    is taken to have the decimal point after a single leading
379    non-zero digit.
380 
381    If the value is zero, V->firstSigDigit points to a non-digit, and
382    the return exponent is zero.
383 */
384 struct decimalInfo {
385   const char *firstSigDigit;
386   const char *lastSigDigit;
387   int exponent;
388   int normalizedExponent;
389 };
390 
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)391 static Error interpretDecimal(StringRef::iterator begin,
392                               StringRef::iterator end, decimalInfo *D) {
393   StringRef::iterator dot = end;
394 
395   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
396   if (!PtrOrErr)
397     return PtrOrErr.takeError();
398   StringRef::iterator p = *PtrOrErr;
399 
400   D->firstSigDigit = p;
401   D->exponent = 0;
402   D->normalizedExponent = 0;
403 
404   for (; p != end; ++p) {
405     if (*p == '.') {
406       if (dot != end)
407         return createError("String contains multiple dots");
408       dot = p++;
409       if (p == end)
410         break;
411     }
412     if (decDigitValue(*p) >= 10U)
413       break;
414   }
415 
416   if (p != end) {
417     if (*p != 'e' && *p != 'E')
418       return createError("Invalid character in significand");
419     if (p == begin)
420       return createError("Significand has no digits");
421     if (dot != end && p - begin == 1)
422       return createError("Significand has no digits");
423 
424     /* p points to the first non-digit in the string */
425     auto ExpOrErr = readExponent(p + 1, end);
426     if (!ExpOrErr)
427       return ExpOrErr.takeError();
428     D->exponent = *ExpOrErr;
429 
430     /* Implied decimal point?  */
431     if (dot == end)
432       dot = p;
433   }
434 
435   /* If number is all zeroes accept any exponent.  */
436   if (p != D->firstSigDigit) {
437     /* Drop insignificant trailing zeroes.  */
438     if (p != begin) {
439       do
440         do
441           p--;
442         while (p != begin && *p == '0');
443       while (p != begin && *p == '.');
444     }
445 
446     /* Adjust the exponents for any decimal point.  */
447     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
448     D->normalizedExponent = (D->exponent +
449               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
450                                       - (dot > D->firstSigDigit && dot < p)));
451   }
452 
453   D->lastSigDigit = p;
454   return Error::success();
455 }
456 
457 /* Return the trailing fraction of a hexadecimal number.
458    DIGITVALUE is the first hex digit of the fraction, P points to
459    the next digit.  */
460 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)461 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
462                             unsigned int digitValue) {
463   unsigned int hexDigit;
464 
465   /* If the first trailing digit isn't 0 or 8 we can work out the
466      fraction immediately.  */
467   if (digitValue > 8)
468     return lfMoreThanHalf;
469   else if (digitValue < 8 && digitValue > 0)
470     return lfLessThanHalf;
471 
472   // Otherwise we need to find the first non-zero digit.
473   while (p != end && (*p == '0' || *p == '.'))
474     p++;
475 
476   if (p == end)
477     return createError("Invalid trailing hexadecimal fraction!");
478 
479   hexDigit = hexDigitValue(*p);
480 
481   /* If we ran off the end it is exactly zero or one-half, otherwise
482      a little more.  */
483   if (hexDigit == -1U)
484     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
485   else
486     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
487 }
488 
489 /* Return the fraction lost were a bignum truncated losing the least
490    significant BITS bits.  */
491 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)492 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
493                               unsigned int partCount,
494                               unsigned int bits)
495 {
496   unsigned int lsb;
497 
498   lsb = APInt::tcLSB(parts, partCount);
499 
500   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
501   if (bits <= lsb)
502     return lfExactlyZero;
503   if (bits == lsb + 1)
504     return lfExactlyHalf;
505   if (bits <= partCount * APFloatBase::integerPartWidth &&
506       APInt::tcExtractBit(parts, bits - 1))
507     return lfMoreThanHalf;
508 
509   return lfLessThanHalf;
510 }
511 
512 /* Shift DST right BITS bits noting lost fraction.  */
513 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)514 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
515 {
516   lostFraction lost_fraction;
517 
518   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
519 
520   APInt::tcShiftRight(dst, parts, bits);
521 
522   return lost_fraction;
523 }
524 
525 /* Combine the effect of two lost fractions.  */
526 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)527 combineLostFractions(lostFraction moreSignificant,
528                      lostFraction lessSignificant)
529 {
530   if (lessSignificant != lfExactlyZero) {
531     if (moreSignificant == lfExactlyZero)
532       moreSignificant = lfLessThanHalf;
533     else if (moreSignificant == lfExactlyHalf)
534       moreSignificant = lfMoreThanHalf;
535   }
536 
537   return moreSignificant;
538 }
539 
540 /* The error from the true value, in half-ulps, on multiplying two
541    floating point numbers, which differ from the value they
542    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
543    than the returned value.
544 
545    See "How to Read Floating Point Numbers Accurately" by William D
546    Clinger.  */
547 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)548 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
549 {
550   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
551 
552   if (HUerr1 + HUerr2 == 0)
553     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
554   else
555     return inexactMultiply + 2 * (HUerr1 + HUerr2);
556 }
557 
558 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
559    when the least significant BITS are truncated.  BITS cannot be
560    zero.  */
561 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)562 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
563                  bool isNearest) {
564   unsigned int count, partBits;
565   APFloatBase::integerPart part, boundary;
566 
567   assert(bits != 0);
568 
569   bits--;
570   count = bits / APFloatBase::integerPartWidth;
571   partBits = bits % APFloatBase::integerPartWidth + 1;
572 
573   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
574 
575   if (isNearest)
576     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
577   else
578     boundary = 0;
579 
580   if (count == 0) {
581     if (part - boundary <= boundary - part)
582       return part - boundary;
583     else
584       return boundary - part;
585   }
586 
587   if (part == boundary) {
588     while (--count)
589       if (parts[count])
590         return ~(APFloatBase::integerPart) 0; /* A lot.  */
591 
592     return parts[0];
593   } else if (part == boundary - 1) {
594     while (--count)
595       if (~parts[count])
596         return ~(APFloatBase::integerPart) 0; /* A lot.  */
597 
598     return -parts[0];
599   }
600 
601   return ~(APFloatBase::integerPart) 0; /* A lot.  */
602 }
603 
604 /* Place pow(5, power) in DST, and return the number of parts used.
605    DST must be at least one part larger than size of the answer.  */
606 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)607 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
608   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
609   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
610   pow5s[0] = 78125 * 5;
611 
612   unsigned int partsCount[16] = { 1 };
613   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
614   unsigned int result;
615   assert(power <= maxExponent);
616 
617   p1 = dst;
618   p2 = scratch;
619 
620   *p1 = firstEightPowers[power & 7];
621   power >>= 3;
622 
623   result = 1;
624   pow5 = pow5s;
625 
626   for (unsigned int n = 0; power; power >>= 1, n++) {
627     unsigned int pc;
628 
629     pc = partsCount[n];
630 
631     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
632     if (pc == 0) {
633       pc = partsCount[n - 1];
634       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
635       pc *= 2;
636       if (pow5[pc - 1] == 0)
637         pc--;
638       partsCount[n] = pc;
639     }
640 
641     if (power & 1) {
642       APFloatBase::integerPart *tmp;
643 
644       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
645       result += pc;
646       if (p2[result - 1] == 0)
647         result--;
648 
649       /* Now result is in p1 with partsCount parts and p2 is scratch
650          space.  */
651       tmp = p1;
652       p1 = p2;
653       p2 = tmp;
654     }
655 
656     pow5 += pc;
657   }
658 
659   if (p1 != dst)
660     APInt::tcAssign(dst, p1, result);
661 
662   return result;
663 }
664 
665 /* Zero at the end to avoid modular arithmetic when adding one; used
666    when rounding up during hexadecimal output.  */
667 static const char hexDigitsLower[] = "0123456789abcdef0";
668 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
669 static const char infinityL[] = "infinity";
670 static const char infinityU[] = "INFINITY";
671 static const char NaNL[] = "nan";
672 static const char NaNU[] = "NAN";
673 
674 /* Write out an integerPart in hexadecimal, starting with the most
675    significant nibble.  Write out exactly COUNT hexdigits, return
676    COUNT.  */
677 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)678 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
679            const char *hexDigitChars)
680 {
681   unsigned int result = count;
682 
683   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
684 
685   part >>= (APFloatBase::integerPartWidth - 4 * count);
686   while (count--) {
687     dst[count] = hexDigitChars[part & 0xf];
688     part >>= 4;
689   }
690 
691   return result;
692 }
693 
694 /* Write out an unsigned decimal integer.  */
695 static char *
writeUnsignedDecimal(char * dst,unsigned int n)696 writeUnsignedDecimal (char *dst, unsigned int n)
697 {
698   char buff[40], *p;
699 
700   p = buff;
701   do
702     *p++ = '0' + n % 10;
703   while (n /= 10);
704 
705   do
706     *dst++ = *--p;
707   while (p != buff);
708 
709   return dst;
710 }
711 
712 /* Write out a signed decimal integer.  */
713 static char *
writeSignedDecimal(char * dst,int value)714 writeSignedDecimal (char *dst, int value)
715 {
716   if (value < 0) {
717     *dst++ = '-';
718     dst = writeUnsignedDecimal(dst, -(unsigned) value);
719   } else
720     dst = writeUnsignedDecimal(dst, value);
721 
722   return dst;
723 }
724 
725 namespace detail {
726 /* Constructors.  */
initialize(const fltSemantics * ourSemantics)727 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
728   unsigned int count;
729 
730   semantics = ourSemantics;
731   count = partCount();
732   if (count > 1)
733     significand.parts = new integerPart[count];
734 }
735 
freeSignificand()736 void IEEEFloat::freeSignificand() {
737   if (needsCleanup())
738     delete [] significand.parts;
739 }
740 
assign(const IEEEFloat & rhs)741 void IEEEFloat::assign(const IEEEFloat &rhs) {
742   assert(semantics == rhs.semantics);
743 
744   sign = rhs.sign;
745   category = rhs.category;
746   exponent = rhs.exponent;
747   if (isFiniteNonZero() || category == fcNaN)
748     copySignificand(rhs);
749 }
750 
copySignificand(const IEEEFloat & rhs)751 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
752   assert(isFiniteNonZero() || category == fcNaN);
753   assert(rhs.partCount() >= partCount());
754 
755   APInt::tcAssign(significandParts(), rhs.significandParts(),
756                   partCount());
757 }
758 
759 /* Make this number a NaN, with an arbitrary but deterministic value
760    for the significand.  If double or longer, this is a signalling NaN,
761    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)762 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
763   category = fcNaN;
764   sign = Negative;
765   exponent = exponentNaN();
766 
767   integerPart *significand = significandParts();
768   unsigned numParts = partCount();
769 
770   // Set the significand bits to the fill.
771   if (!fill || fill->getNumWords() < numParts)
772     APInt::tcSet(significand, 0, numParts);
773   if (fill) {
774     APInt::tcAssign(significand, fill->getRawData(),
775                     std::min(fill->getNumWords(), numParts));
776 
777     // Zero out the excess bits of the significand.
778     unsigned bitsToPreserve = semantics->precision - 1;
779     unsigned part = bitsToPreserve / 64;
780     bitsToPreserve %= 64;
781     significand[part] &= ((1ULL << bitsToPreserve) - 1);
782     for (part++; part != numParts; ++part)
783       significand[part] = 0;
784   }
785 
786   unsigned QNaNBit = semantics->precision - 2;
787 
788   if (SNaN) {
789     // We always have to clear the QNaN bit to make it an SNaN.
790     APInt::tcClearBit(significand, QNaNBit);
791 
792     // If there are no bits set in the payload, we have to set
793     // *something* to make it a NaN instead of an infinity;
794     // conventionally, this is the next bit down from the QNaN bit.
795     if (APInt::tcIsZero(significand, numParts))
796       APInt::tcSetBit(significand, QNaNBit - 1);
797   } else {
798     // We always have to set the QNaN bit to make it a QNaN.
799     APInt::tcSetBit(significand, QNaNBit);
800   }
801 
802   // For x87 extended precision, we want to make a NaN, not a
803   // pseudo-NaN.  Maybe we should expose the ability to make
804   // pseudo-NaNs?
805   if (semantics == &semX87DoubleExtended)
806     APInt::tcSetBit(significand, QNaNBit + 1);
807 }
808 
operator =(const IEEEFloat & rhs)809 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
810   if (this != &rhs) {
811     if (semantics != rhs.semantics) {
812       freeSignificand();
813       initialize(rhs.semantics);
814     }
815     assign(rhs);
816   }
817 
818   return *this;
819 }
820 
operator =(IEEEFloat && rhs)821 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
822   freeSignificand();
823 
824   semantics = rhs.semantics;
825   significand = rhs.significand;
826   exponent = rhs.exponent;
827   category = rhs.category;
828   sign = rhs.sign;
829 
830   rhs.semantics = &semBogus;
831   return *this;
832 }
833 
isDenormal() const834 bool IEEEFloat::isDenormal() const {
835   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
836          (APInt::tcExtractBit(significandParts(),
837                               semantics->precision - 1) == 0);
838 }
839 
isSmallest() const840 bool IEEEFloat::isSmallest() const {
841   // The smallest number by magnitude in our format will be the smallest
842   // denormal, i.e. the floating point number with exponent being minimum
843   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
844   return isFiniteNonZero() && exponent == semantics->minExponent &&
845     significandMSB() == 0;
846 }
847 
isSignificandAllOnes() const848 bool IEEEFloat::isSignificandAllOnes() const {
849   // Test if the significand excluding the integral bit is all ones. This allows
850   // us to test for binade boundaries.
851   const integerPart *Parts = significandParts();
852   const unsigned PartCount = partCountForBits(semantics->precision);
853   for (unsigned i = 0; i < PartCount - 1; i++)
854     if (~Parts[i])
855       return false;
856 
857   // Set the unused high bits to all ones when we compare.
858   const unsigned NumHighBits =
859     PartCount*integerPartWidth - semantics->precision + 1;
860   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
861          "Can not have more high bits to fill than integerPartWidth");
862   const integerPart HighBitFill =
863     ~integerPart(0) << (integerPartWidth - NumHighBits);
864   if (~(Parts[PartCount - 1] | HighBitFill))
865     return false;
866 
867   return true;
868 }
869 
isSignificandAllZeros() const870 bool IEEEFloat::isSignificandAllZeros() const {
871   // Test if the significand excluding the integral bit is all zeros. This
872   // allows us to test for binade boundaries.
873   const integerPart *Parts = significandParts();
874   const unsigned PartCount = partCountForBits(semantics->precision);
875 
876   for (unsigned i = 0; i < PartCount - 1; i++)
877     if (Parts[i])
878       return false;
879 
880   // Compute how many bits are used in the final word.
881   const unsigned NumHighBits =
882     PartCount*integerPartWidth - semantics->precision + 1;
883   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
884          "clear than integerPartWidth");
885   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
886 
887   if (Parts[PartCount - 1] & HighBitMask)
888     return false;
889 
890   return true;
891 }
892 
isLargest() const893 bool IEEEFloat::isLargest() const {
894   // The largest number by magnitude in our format will be the floating point
895   // number with maximum exponent and with significand that is all ones.
896   return isFiniteNonZero() && exponent == semantics->maxExponent
897     && isSignificandAllOnes();
898 }
899 
isInteger() const900 bool IEEEFloat::isInteger() const {
901   // This could be made more efficient; I'm going for obviously correct.
902   if (!isFinite()) return false;
903   IEEEFloat truncated = *this;
904   truncated.roundToIntegral(rmTowardZero);
905   return compare(truncated) == cmpEqual;
906 }
907 
bitwiseIsEqual(const IEEEFloat & rhs) const908 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
909   if (this == &rhs)
910     return true;
911   if (semantics != rhs.semantics ||
912       category != rhs.category ||
913       sign != rhs.sign)
914     return false;
915   if (category==fcZero || category==fcInfinity)
916     return true;
917 
918   if (isFiniteNonZero() && exponent != rhs.exponent)
919     return false;
920 
921   return std::equal(significandParts(), significandParts() + partCount(),
922                     rhs.significandParts());
923 }
924 
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)925 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
926   initialize(&ourSemantics);
927   sign = 0;
928   category = fcNormal;
929   zeroSignificand();
930   exponent = ourSemantics.precision - 1;
931   significandParts()[0] = value;
932   normalize(rmNearestTiesToEven, lfExactlyZero);
933 }
934 
IEEEFloat(const fltSemantics & ourSemantics)935 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
936   initialize(&ourSemantics);
937   makeZero(false);
938 }
939 
940 // Delegate to the previous constructor, because later copy constructor may
941 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)942 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
943     : IEEEFloat(ourSemantics) {}
944 
IEEEFloat(const IEEEFloat & rhs)945 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
946   initialize(rhs.semantics);
947   assign(rhs);
948 }
949 
IEEEFloat(IEEEFloat && rhs)950 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
951   *this = std::move(rhs);
952 }
953 
~IEEEFloat()954 IEEEFloat::~IEEEFloat() { freeSignificand(); }
955 
partCount() const956 unsigned int IEEEFloat::partCount() const {
957   return partCountForBits(semantics->precision + 1);
958 }
959 
significandParts() const960 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
961   return const_cast<IEEEFloat *>(this)->significandParts();
962 }
963 
significandParts()964 IEEEFloat::integerPart *IEEEFloat::significandParts() {
965   if (partCount() > 1)
966     return significand.parts;
967   else
968     return &significand.part;
969 }
970 
zeroSignificand()971 void IEEEFloat::zeroSignificand() {
972   APInt::tcSet(significandParts(), 0, partCount());
973 }
974 
975 /* Increment an fcNormal floating point number's significand.  */
incrementSignificand()976 void IEEEFloat::incrementSignificand() {
977   integerPart carry;
978 
979   carry = APInt::tcIncrement(significandParts(), partCount());
980 
981   /* Our callers should never cause us to overflow.  */
982   assert(carry == 0);
983   (void)carry;
984 }
985 
986 /* Add the significand of the RHS.  Returns the carry flag.  */
addSignificand(const IEEEFloat & rhs)987 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
988   integerPart *parts;
989 
990   parts = significandParts();
991 
992   assert(semantics == rhs.semantics);
993   assert(exponent == rhs.exponent);
994 
995   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
996 }
997 
998 /* Subtract the significand of the RHS with a borrow flag.  Returns
999    the borrow flag.  */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)1000 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1001                                                       integerPart borrow) {
1002   integerPart *parts;
1003 
1004   parts = significandParts();
1005 
1006   assert(semantics == rhs.semantics);
1007   assert(exponent == rhs.exponent);
1008 
1009   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1010                            partCount());
1011 }
1012 
1013 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1014    on to the full-precision result of the multiplication.  Returns the
1015    lost fraction.  */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)1016 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1017                                             IEEEFloat addend) {
1018   unsigned int omsb;        // One, not zero, based MSB.
1019   unsigned int partsCount, newPartsCount, precision;
1020   integerPart *lhsSignificand;
1021   integerPart scratch[4];
1022   integerPart *fullSignificand;
1023   lostFraction lost_fraction;
1024   bool ignored;
1025 
1026   assert(semantics == rhs.semantics);
1027 
1028   precision = semantics->precision;
1029 
1030   // Allocate space for twice as many bits as the original significand, plus one
1031   // extra bit for the addition to overflow into.
1032   newPartsCount = partCountForBits(precision * 2 + 1);
1033 
1034   if (newPartsCount > 4)
1035     fullSignificand = new integerPart[newPartsCount];
1036   else
1037     fullSignificand = scratch;
1038 
1039   lhsSignificand = significandParts();
1040   partsCount = partCount();
1041 
1042   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1043                         rhs.significandParts(), partsCount, partsCount);
1044 
1045   lost_fraction = lfExactlyZero;
1046   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1047   exponent += rhs.exponent;
1048 
1049   // Assume the operands involved in the multiplication are single-precision
1050   // FP, and the two multiplicants are:
1051   //   *this = a23 . a22 ... a0 * 2^e1
1052   //     rhs = b23 . b22 ... b0 * 2^e2
1053   // the result of multiplication is:
1054   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1055   // Note that there are three significant bits at the left-hand side of the
1056   // radix point: two for the multiplication, and an overflow bit for the
1057   // addition (that will always be zero at this point). Move the radix point
1058   // toward left by two bits, and adjust exponent accordingly.
1059   exponent += 2;
1060 
1061   if (addend.isNonZero()) {
1062     // The intermediate result of the multiplication has "2 * precision"
1063     // signicant bit; adjust the addend to be consistent with mul result.
1064     //
1065     Significand savedSignificand = significand;
1066     const fltSemantics *savedSemantics = semantics;
1067     fltSemantics extendedSemantics;
1068     opStatus status;
1069     unsigned int extendedPrecision;
1070 
1071     // Normalize our MSB to one below the top bit to allow for overflow.
1072     extendedPrecision = 2 * precision + 1;
1073     if (omsb != extendedPrecision - 1) {
1074       assert(extendedPrecision > omsb);
1075       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1076                          (extendedPrecision - 1) - omsb);
1077       exponent -= (extendedPrecision - 1) - omsb;
1078     }
1079 
1080     /* Create new semantics.  */
1081     extendedSemantics = *semantics;
1082     extendedSemantics.precision = extendedPrecision;
1083 
1084     if (newPartsCount == 1)
1085       significand.part = fullSignificand[0];
1086     else
1087       significand.parts = fullSignificand;
1088     semantics = &extendedSemantics;
1089 
1090     // Make a copy so we can convert it to the extended semantics.
1091     // Note that we cannot convert the addend directly, as the extendedSemantics
1092     // is a local variable (which we take a reference to).
1093     IEEEFloat extendedAddend(addend);
1094     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1095     assert(status == opOK);
1096     (void)status;
1097 
1098     // Shift the significand of the addend right by one bit. This guarantees
1099     // that the high bit of the significand is zero (same as fullSignificand),
1100     // so the addition will overflow (if it does overflow at all) into the top bit.
1101     lost_fraction = extendedAddend.shiftSignificandRight(1);
1102     assert(lost_fraction == lfExactlyZero &&
1103            "Lost precision while shifting addend for fused-multiply-add.");
1104 
1105     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1106 
1107     /* Restore our state.  */
1108     if (newPartsCount == 1)
1109       fullSignificand[0] = significand.part;
1110     significand = savedSignificand;
1111     semantics = savedSemantics;
1112 
1113     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1114   }
1115 
1116   // Convert the result having "2 * precision" significant-bits back to the one
1117   // having "precision" significant-bits. First, move the radix point from
1118   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1119   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1120   exponent -= precision + 1;
1121 
1122   // In case MSB resides at the left-hand side of radix point, shift the
1123   // mantissa right by some amount to make sure the MSB reside right before
1124   // the radix point (i.e. "MSB . rest-significant-bits").
1125   //
1126   // Note that the result is not normalized when "omsb < precision". So, the
1127   // caller needs to call IEEEFloat::normalize() if normalized value is
1128   // expected.
1129   if (omsb > precision) {
1130     unsigned int bits, significantParts;
1131     lostFraction lf;
1132 
1133     bits = omsb - precision;
1134     significantParts = partCountForBits(omsb);
1135     lf = shiftRight(fullSignificand, significantParts, bits);
1136     lost_fraction = combineLostFractions(lf, lost_fraction);
1137     exponent += bits;
1138   }
1139 
1140   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1141 
1142   if (newPartsCount > 4)
1143     delete [] fullSignificand;
1144 
1145   return lost_fraction;
1146 }
1147 
multiplySignificand(const IEEEFloat & rhs)1148 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1149   return multiplySignificand(rhs, IEEEFloat(*semantics));
1150 }
1151 
1152 /* Multiply the significands of LHS and RHS to DST.  */
divideSignificand(const IEEEFloat & rhs)1153 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1154   unsigned int bit, i, partsCount;
1155   const integerPart *rhsSignificand;
1156   integerPart *lhsSignificand, *dividend, *divisor;
1157   integerPart scratch[4];
1158   lostFraction lost_fraction;
1159 
1160   assert(semantics == rhs.semantics);
1161 
1162   lhsSignificand = significandParts();
1163   rhsSignificand = rhs.significandParts();
1164   partsCount = partCount();
1165 
1166   if (partsCount > 2)
1167     dividend = new integerPart[partsCount * 2];
1168   else
1169     dividend = scratch;
1170 
1171   divisor = dividend + partsCount;
1172 
1173   /* Copy the dividend and divisor as they will be modified in-place.  */
1174   for (i = 0; i < partsCount; i++) {
1175     dividend[i] = lhsSignificand[i];
1176     divisor[i] = rhsSignificand[i];
1177     lhsSignificand[i] = 0;
1178   }
1179 
1180   exponent -= rhs.exponent;
1181 
1182   unsigned int precision = semantics->precision;
1183 
1184   /* Normalize the divisor.  */
1185   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1186   if (bit) {
1187     exponent += bit;
1188     APInt::tcShiftLeft(divisor, partsCount, bit);
1189   }
1190 
1191   /* Normalize the dividend.  */
1192   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1193   if (bit) {
1194     exponent -= bit;
1195     APInt::tcShiftLeft(dividend, partsCount, bit);
1196   }
1197 
1198   /* Ensure the dividend >= divisor initially for the loop below.
1199      Incidentally, this means that the division loop below is
1200      guaranteed to set the integer bit to one.  */
1201   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1202     exponent--;
1203     APInt::tcShiftLeft(dividend, partsCount, 1);
1204     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1205   }
1206 
1207   /* Long division.  */
1208   for (bit = precision; bit; bit -= 1) {
1209     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1210       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1211       APInt::tcSetBit(lhsSignificand, bit - 1);
1212     }
1213 
1214     APInt::tcShiftLeft(dividend, partsCount, 1);
1215   }
1216 
1217   /* Figure out the lost fraction.  */
1218   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1219 
1220   if (cmp > 0)
1221     lost_fraction = lfMoreThanHalf;
1222   else if (cmp == 0)
1223     lost_fraction = lfExactlyHalf;
1224   else if (APInt::tcIsZero(dividend, partsCount))
1225     lost_fraction = lfExactlyZero;
1226   else
1227     lost_fraction = lfLessThanHalf;
1228 
1229   if (partsCount > 2)
1230     delete [] dividend;
1231 
1232   return lost_fraction;
1233 }
1234 
significandMSB() const1235 unsigned int IEEEFloat::significandMSB() const {
1236   return APInt::tcMSB(significandParts(), partCount());
1237 }
1238 
significandLSB() const1239 unsigned int IEEEFloat::significandLSB() const {
1240   return APInt::tcLSB(significandParts(), partCount());
1241 }
1242 
1243 /* Note that a zero result is NOT normalized to fcZero.  */
shiftSignificandRight(unsigned int bits)1244 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1245   /* Our exponent should not overflow.  */
1246   assert((ExponentType) (exponent + bits) >= exponent);
1247 
1248   exponent += bits;
1249 
1250   return shiftRight(significandParts(), partCount(), bits);
1251 }
1252 
1253 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
shiftSignificandLeft(unsigned int bits)1254 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1255   assert(bits < semantics->precision);
1256 
1257   if (bits) {
1258     unsigned int partsCount = partCount();
1259 
1260     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1261     exponent -= bits;
1262 
1263     assert(!APInt::tcIsZero(significandParts(), partsCount));
1264   }
1265 }
1266 
1267 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1268 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1269   int compare;
1270 
1271   assert(semantics == rhs.semantics);
1272   assert(isFiniteNonZero());
1273   assert(rhs.isFiniteNonZero());
1274 
1275   compare = exponent - rhs.exponent;
1276 
1277   /* If exponents are equal, do an unsigned bignum comparison of the
1278      significands.  */
1279   if (compare == 0)
1280     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1281                                partCount());
1282 
1283   if (compare > 0)
1284     return cmpGreaterThan;
1285   else if (compare < 0)
1286     return cmpLessThan;
1287   else
1288     return cmpEqual;
1289 }
1290 
1291 /* Handle overflow.  Sign is preserved.  We either become infinity or
1292    the largest finite number.  */
handleOverflow(roundingMode rounding_mode)1293 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1294   /* Infinity?  */
1295   if (rounding_mode == rmNearestTiesToEven ||
1296       rounding_mode == rmNearestTiesToAway ||
1297       (rounding_mode == rmTowardPositive && !sign) ||
1298       (rounding_mode == rmTowardNegative && sign)) {
1299     category = fcInfinity;
1300     return (opStatus) (opOverflow | opInexact);
1301   }
1302 
1303   /* Otherwise we become the largest finite number.  */
1304   category = fcNormal;
1305   exponent = semantics->maxExponent;
1306   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1307                                    semantics->precision);
1308 
1309   return opInexact;
1310 }
1311 
1312 /* Returns TRUE if, when truncating the current number, with BIT the
1313    new LSB, with the given lost fraction and rounding mode, the result
1314    would need to be rounded away from zero (i.e., by increasing the
1315    signficand).  This routine must work for fcZero of both signs, and
1316    fcNormal numbers.  */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1317 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1318                                   lostFraction lost_fraction,
1319                                   unsigned int bit) const {
1320   /* NaNs and infinities should not have lost fractions.  */
1321   assert(isFiniteNonZero() || category == fcZero);
1322 
1323   /* Current callers never pass this so we don't handle it.  */
1324   assert(lost_fraction != lfExactlyZero);
1325 
1326   switch (rounding_mode) {
1327   case rmNearestTiesToAway:
1328     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1329 
1330   case rmNearestTiesToEven:
1331     if (lost_fraction == lfMoreThanHalf)
1332       return true;
1333 
1334     /* Our zeroes don't have a significand to test.  */
1335     if (lost_fraction == lfExactlyHalf && category != fcZero)
1336       return APInt::tcExtractBit(significandParts(), bit);
1337 
1338     return false;
1339 
1340   case rmTowardZero:
1341     return false;
1342 
1343   case rmTowardPositive:
1344     return !sign;
1345 
1346   case rmTowardNegative:
1347     return sign;
1348 
1349   default:
1350     break;
1351   }
1352   llvm_unreachable("Invalid rounding mode found");
1353 }
1354 
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1355 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1356                                          lostFraction lost_fraction) {
1357   unsigned int omsb;                /* One, not zero, based MSB.  */
1358   int exponentChange;
1359 
1360   if (!isFiniteNonZero())
1361     return opOK;
1362 
1363   /* Before rounding normalize the exponent of fcNormal numbers.  */
1364   omsb = significandMSB() + 1;
1365 
1366   if (omsb) {
1367     /* OMSB is numbered from 1.  We want to place it in the integer
1368        bit numbered PRECISION if possible, with a compensating change in
1369        the exponent.  */
1370     exponentChange = omsb - semantics->precision;
1371 
1372     /* If the resulting exponent is too high, overflow according to
1373        the rounding mode.  */
1374     if (exponent + exponentChange > semantics->maxExponent)
1375       return handleOverflow(rounding_mode);
1376 
1377     /* Subnormal numbers have exponent minExponent, and their MSB
1378        is forced based on that.  */
1379     if (exponent + exponentChange < semantics->minExponent)
1380       exponentChange = semantics->minExponent - exponent;
1381 
1382     /* Shifting left is easy as we don't lose precision.  */
1383     if (exponentChange < 0) {
1384       assert(lost_fraction == lfExactlyZero);
1385 
1386       shiftSignificandLeft(-exponentChange);
1387 
1388       return opOK;
1389     }
1390 
1391     if (exponentChange > 0) {
1392       lostFraction lf;
1393 
1394       /* Shift right and capture any new lost fraction.  */
1395       lf = shiftSignificandRight(exponentChange);
1396 
1397       lost_fraction = combineLostFractions(lf, lost_fraction);
1398 
1399       /* Keep OMSB up-to-date.  */
1400       if (omsb > (unsigned) exponentChange)
1401         omsb -= exponentChange;
1402       else
1403         omsb = 0;
1404     }
1405   }
1406 
1407   /* Now round the number according to rounding_mode given the lost
1408      fraction.  */
1409 
1410   /* As specified in IEEE 754, since we do not trap we do not report
1411      underflow for exact results.  */
1412   if (lost_fraction == lfExactlyZero) {
1413     /* Canonicalize zeroes.  */
1414     if (omsb == 0)
1415       category = fcZero;
1416 
1417     return opOK;
1418   }
1419 
1420   /* Increment the significand if we're rounding away from zero.  */
1421   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1422     if (omsb == 0)
1423       exponent = semantics->minExponent;
1424 
1425     incrementSignificand();
1426     omsb = significandMSB() + 1;
1427 
1428     /* Did the significand increment overflow?  */
1429     if (omsb == (unsigned) semantics->precision + 1) {
1430       /* Renormalize by incrementing the exponent and shifting our
1431          significand right one.  However if we already have the
1432          maximum exponent we overflow to infinity.  */
1433       if (exponent == semantics->maxExponent) {
1434         category = fcInfinity;
1435 
1436         return (opStatus) (opOverflow | opInexact);
1437       }
1438 
1439       shiftSignificandRight(1);
1440 
1441       return opInexact;
1442     }
1443   }
1444 
1445   /* The normal case - we were and are not denormal, and any
1446      significand increment above didn't overflow.  */
1447   if (omsb == semantics->precision)
1448     return opInexact;
1449 
1450   /* We have a non-zero denormal.  */
1451   assert(omsb < semantics->precision);
1452 
1453   /* Canonicalize zeroes.  */
1454   if (omsb == 0)
1455     category = fcZero;
1456 
1457   /* The fcZero case is a denormal that underflowed to zero.  */
1458   return (opStatus) (opUnderflow | opInexact);
1459 }
1460 
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1461 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1462                                                      bool subtract) {
1463   switch (PackCategoriesIntoKey(category, rhs.category)) {
1464   default:
1465     llvm_unreachable(nullptr);
1466 
1467   case PackCategoriesIntoKey(fcZero, fcNaN):
1468   case PackCategoriesIntoKey(fcNormal, fcNaN):
1469   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1470     assign(rhs);
1471     LLVM_FALLTHROUGH;
1472   case PackCategoriesIntoKey(fcNaN, fcZero):
1473   case PackCategoriesIntoKey(fcNaN, fcNormal):
1474   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1475   case PackCategoriesIntoKey(fcNaN, fcNaN):
1476     if (isSignaling()) {
1477       makeQuiet();
1478       return opInvalidOp;
1479     }
1480     return rhs.isSignaling() ? opInvalidOp : opOK;
1481 
1482   case PackCategoriesIntoKey(fcNormal, fcZero):
1483   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1484   case PackCategoriesIntoKey(fcInfinity, fcZero):
1485     return opOK;
1486 
1487   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1488   case PackCategoriesIntoKey(fcZero, fcInfinity):
1489     category = fcInfinity;
1490     sign = rhs.sign ^ subtract;
1491     return opOK;
1492 
1493   case PackCategoriesIntoKey(fcZero, fcNormal):
1494     assign(rhs);
1495     sign = rhs.sign ^ subtract;
1496     return opOK;
1497 
1498   case PackCategoriesIntoKey(fcZero, fcZero):
1499     /* Sign depends on rounding mode; handled by caller.  */
1500     return opOK;
1501 
1502   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1503     /* Differently signed infinities can only be validly
1504        subtracted.  */
1505     if (((sign ^ rhs.sign)!=0) != subtract) {
1506       makeNaN();
1507       return opInvalidOp;
1508     }
1509 
1510     return opOK;
1511 
1512   case PackCategoriesIntoKey(fcNormal, fcNormal):
1513     return opDivByZero;
1514   }
1515 }
1516 
1517 /* Add or subtract two normal numbers.  */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1518 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1519                                                  bool subtract) {
1520   integerPart carry;
1521   lostFraction lost_fraction;
1522   int bits;
1523 
1524   /* Determine if the operation on the absolute values is effectively
1525      an addition or subtraction.  */
1526   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1527 
1528   /* Are we bigger exponent-wise than the RHS?  */
1529   bits = exponent - rhs.exponent;
1530 
1531   /* Subtraction is more subtle than one might naively expect.  */
1532   if (subtract) {
1533     IEEEFloat temp_rhs(rhs);
1534 
1535     if (bits == 0)
1536       lost_fraction = lfExactlyZero;
1537     else if (bits > 0) {
1538       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1539       shiftSignificandLeft(1);
1540     } else {
1541       lost_fraction = shiftSignificandRight(-bits - 1);
1542       temp_rhs.shiftSignificandLeft(1);
1543     }
1544 
1545     // Should we reverse the subtraction.
1546     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1547       carry = temp_rhs.subtractSignificand
1548         (*this, lost_fraction != lfExactlyZero);
1549       copySignificand(temp_rhs);
1550       sign = !sign;
1551     } else {
1552       carry = subtractSignificand
1553         (temp_rhs, lost_fraction != lfExactlyZero);
1554     }
1555 
1556     /* Invert the lost fraction - it was on the RHS and
1557        subtracted.  */
1558     if (lost_fraction == lfLessThanHalf)
1559       lost_fraction = lfMoreThanHalf;
1560     else if (lost_fraction == lfMoreThanHalf)
1561       lost_fraction = lfLessThanHalf;
1562 
1563     /* The code above is intended to ensure that no borrow is
1564        necessary.  */
1565     assert(!carry);
1566     (void)carry;
1567   } else {
1568     if (bits > 0) {
1569       IEEEFloat temp_rhs(rhs);
1570 
1571       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1572       carry = addSignificand(temp_rhs);
1573     } else {
1574       lost_fraction = shiftSignificandRight(-bits);
1575       carry = addSignificand(rhs);
1576     }
1577 
1578     /* We have a guard bit; generating a carry cannot happen.  */
1579     assert(!carry);
1580     (void)carry;
1581   }
1582 
1583   return lost_fraction;
1584 }
1585 
multiplySpecials(const IEEEFloat & rhs)1586 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1587   switch (PackCategoriesIntoKey(category, rhs.category)) {
1588   default:
1589     llvm_unreachable(nullptr);
1590 
1591   case PackCategoriesIntoKey(fcZero, fcNaN):
1592   case PackCategoriesIntoKey(fcNormal, fcNaN):
1593   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1594     assign(rhs);
1595     sign = false;
1596     LLVM_FALLTHROUGH;
1597   case PackCategoriesIntoKey(fcNaN, fcZero):
1598   case PackCategoriesIntoKey(fcNaN, fcNormal):
1599   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1600   case PackCategoriesIntoKey(fcNaN, fcNaN):
1601     sign ^= rhs.sign; // restore the original sign
1602     if (isSignaling()) {
1603       makeQuiet();
1604       return opInvalidOp;
1605     }
1606     return rhs.isSignaling() ? opInvalidOp : opOK;
1607 
1608   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1609   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1610   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1611     category = fcInfinity;
1612     return opOK;
1613 
1614   case PackCategoriesIntoKey(fcZero, fcNormal):
1615   case PackCategoriesIntoKey(fcNormal, fcZero):
1616   case PackCategoriesIntoKey(fcZero, fcZero):
1617     category = fcZero;
1618     return opOK;
1619 
1620   case PackCategoriesIntoKey(fcZero, fcInfinity):
1621   case PackCategoriesIntoKey(fcInfinity, fcZero):
1622     makeNaN();
1623     return opInvalidOp;
1624 
1625   case PackCategoriesIntoKey(fcNormal, fcNormal):
1626     return opOK;
1627   }
1628 }
1629 
divideSpecials(const IEEEFloat & rhs)1630 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1631   switch (PackCategoriesIntoKey(category, rhs.category)) {
1632   default:
1633     llvm_unreachable(nullptr);
1634 
1635   case PackCategoriesIntoKey(fcZero, fcNaN):
1636   case PackCategoriesIntoKey(fcNormal, fcNaN):
1637   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1638     assign(rhs);
1639     sign = false;
1640     LLVM_FALLTHROUGH;
1641   case PackCategoriesIntoKey(fcNaN, fcZero):
1642   case PackCategoriesIntoKey(fcNaN, fcNormal):
1643   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1644   case PackCategoriesIntoKey(fcNaN, fcNaN):
1645     sign ^= rhs.sign; // restore the original sign
1646     if (isSignaling()) {
1647       makeQuiet();
1648       return opInvalidOp;
1649     }
1650     return rhs.isSignaling() ? opInvalidOp : opOK;
1651 
1652   case PackCategoriesIntoKey(fcInfinity, fcZero):
1653   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1654   case PackCategoriesIntoKey(fcZero, fcInfinity):
1655   case PackCategoriesIntoKey(fcZero, fcNormal):
1656     return opOK;
1657 
1658   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1659     category = fcZero;
1660     return opOK;
1661 
1662   case PackCategoriesIntoKey(fcNormal, fcZero):
1663     category = fcInfinity;
1664     return opDivByZero;
1665 
1666   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1667   case PackCategoriesIntoKey(fcZero, fcZero):
1668     makeNaN();
1669     return opInvalidOp;
1670 
1671   case PackCategoriesIntoKey(fcNormal, fcNormal):
1672     return opOK;
1673   }
1674 }
1675 
modSpecials(const IEEEFloat & rhs)1676 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1677   switch (PackCategoriesIntoKey(category, rhs.category)) {
1678   default:
1679     llvm_unreachable(nullptr);
1680 
1681   case PackCategoriesIntoKey(fcZero, fcNaN):
1682   case PackCategoriesIntoKey(fcNormal, fcNaN):
1683   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1684     assign(rhs);
1685     LLVM_FALLTHROUGH;
1686   case PackCategoriesIntoKey(fcNaN, fcZero):
1687   case PackCategoriesIntoKey(fcNaN, fcNormal):
1688   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1689   case PackCategoriesIntoKey(fcNaN, fcNaN):
1690     if (isSignaling()) {
1691       makeQuiet();
1692       return opInvalidOp;
1693     }
1694     return rhs.isSignaling() ? opInvalidOp : opOK;
1695 
1696   case PackCategoriesIntoKey(fcZero, fcInfinity):
1697   case PackCategoriesIntoKey(fcZero, fcNormal):
1698   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1699     return opOK;
1700 
1701   case PackCategoriesIntoKey(fcNormal, fcZero):
1702   case PackCategoriesIntoKey(fcInfinity, fcZero):
1703   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1704   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1705   case PackCategoriesIntoKey(fcZero, fcZero):
1706     makeNaN();
1707     return opInvalidOp;
1708 
1709   case PackCategoriesIntoKey(fcNormal, fcNormal):
1710     return opOK;
1711   }
1712 }
1713 
remainderSpecials(const IEEEFloat & rhs)1714 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1715   switch (PackCategoriesIntoKey(category, rhs.category)) {
1716   default:
1717     llvm_unreachable(nullptr);
1718 
1719   case PackCategoriesIntoKey(fcZero, fcNaN):
1720   case PackCategoriesIntoKey(fcNormal, fcNaN):
1721   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1722     assign(rhs);
1723     LLVM_FALLTHROUGH;
1724   case PackCategoriesIntoKey(fcNaN, fcZero):
1725   case PackCategoriesIntoKey(fcNaN, fcNormal):
1726   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1727   case PackCategoriesIntoKey(fcNaN, fcNaN):
1728     if (isSignaling()) {
1729       makeQuiet();
1730       return opInvalidOp;
1731     }
1732     return rhs.isSignaling() ? opInvalidOp : opOK;
1733 
1734   case PackCategoriesIntoKey(fcZero, fcInfinity):
1735   case PackCategoriesIntoKey(fcZero, fcNormal):
1736   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1737     return opOK;
1738 
1739   case PackCategoriesIntoKey(fcNormal, fcZero):
1740   case PackCategoriesIntoKey(fcInfinity, fcZero):
1741   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1742   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1743   case PackCategoriesIntoKey(fcZero, fcZero):
1744     makeNaN();
1745     return opInvalidOp;
1746 
1747   case PackCategoriesIntoKey(fcNormal, fcNormal):
1748     return opDivByZero; // fake status, indicating this is not a special case
1749   }
1750 }
1751 
1752 /* Change sign.  */
changeSign()1753 void IEEEFloat::changeSign() {
1754   /* Look mummy, this one's easy.  */
1755   sign = !sign;
1756 }
1757 
1758 /* Normalized addition or subtraction.  */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1759 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1760                                              roundingMode rounding_mode,
1761                                              bool subtract) {
1762   opStatus fs;
1763 
1764   fs = addOrSubtractSpecials(rhs, subtract);
1765 
1766   /* This return code means it was not a simple case.  */
1767   if (fs == opDivByZero) {
1768     lostFraction lost_fraction;
1769 
1770     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1771     fs = normalize(rounding_mode, lost_fraction);
1772 
1773     /* Can only be zero if we lost no fraction.  */
1774     assert(category != fcZero || lost_fraction == lfExactlyZero);
1775   }
1776 
1777   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1778      positive zero unless rounding to minus infinity, except that
1779      adding two like-signed zeroes gives that zero.  */
1780   if (category == fcZero) {
1781     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1782       sign = (rounding_mode == rmTowardNegative);
1783   }
1784 
1785   return fs;
1786 }
1787 
1788 /* Normalized addition.  */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1789 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1790                                    roundingMode rounding_mode) {
1791   return addOrSubtract(rhs, rounding_mode, false);
1792 }
1793 
1794 /* Normalized subtraction.  */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1795 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1796                                         roundingMode rounding_mode) {
1797   return addOrSubtract(rhs, rounding_mode, true);
1798 }
1799 
1800 /* Normalized multiply.  */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1801 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1802                                         roundingMode rounding_mode) {
1803   opStatus fs;
1804 
1805   sign ^= rhs.sign;
1806   fs = multiplySpecials(rhs);
1807 
1808   if (isFiniteNonZero()) {
1809     lostFraction lost_fraction = multiplySignificand(rhs);
1810     fs = normalize(rounding_mode, lost_fraction);
1811     if (lost_fraction != lfExactlyZero)
1812       fs = (opStatus) (fs | opInexact);
1813   }
1814 
1815   return fs;
1816 }
1817 
1818 /* Normalized divide.  */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1819 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1820                                       roundingMode rounding_mode) {
1821   opStatus fs;
1822 
1823   sign ^= rhs.sign;
1824   fs = divideSpecials(rhs);
1825 
1826   if (isFiniteNonZero()) {
1827     lostFraction lost_fraction = divideSignificand(rhs);
1828     fs = normalize(rounding_mode, lost_fraction);
1829     if (lost_fraction != lfExactlyZero)
1830       fs = (opStatus) (fs | opInexact);
1831   }
1832 
1833   return fs;
1834 }
1835 
1836 /* Normalized remainder.  */
remainder(const IEEEFloat & rhs)1837 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1838   opStatus fs;
1839   unsigned int origSign = sign;
1840 
1841   // First handle the special cases.
1842   fs = remainderSpecials(rhs);
1843   if (fs != opDivByZero)
1844     return fs;
1845 
1846   fs = opOK;
1847 
1848   // Make sure the current value is less than twice the denom. If the addition
1849   // did not succeed (an overflow has happened), which means that the finite
1850   // value we currently posses must be less than twice the denom (as we are
1851   // using the same semantics).
1852   IEEEFloat P2 = rhs;
1853   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1854     fs = mod(P2);
1855     assert(fs == opOK);
1856   }
1857 
1858   // Lets work with absolute numbers.
1859   IEEEFloat P = rhs;
1860   P.sign = false;
1861   sign = false;
1862 
1863   //
1864   // To calculate the remainder we use the following scheme.
1865   //
1866   // The remainder is defained as follows:
1867   //
1868   // remainder = numer - rquot * denom = x - r * p
1869   //
1870   // Where r is the result of: x/p, rounded toward the nearest integral value
1871   // (with halfway cases rounded toward the even number).
1872   //
1873   // Currently, (after x mod 2p):
1874   // r is the number of 2p's present inside x, which is inherently, an even
1875   // number of p's.
1876   //
1877   // We may split the remaining calculation into 4 options:
1878   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1879   // - if x == 0.5p then we round to the nearest even number which is 0, and we
1880   //   are done as well.
1881   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
1882   //   to subtract 1p at least once.
1883   // - if x >= p then we must subtract p at least once, as x must be a
1884   //   remainder.
1885   //
1886   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
1887   //
1888   // We can now split the remaining calculation to the following 3 options:
1889   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1890   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
1891   //   must round up to the next even number. so we must subtract p once more.
1892   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
1893   //   integral, and subtract p once more.
1894   //
1895 
1896   // Extend the semantics to prevent an overflow/underflow or inexact result.
1897   bool losesInfo;
1898   fltSemantics extendedSemantics = *semantics;
1899   extendedSemantics.maxExponent++;
1900   extendedSemantics.minExponent--;
1901   extendedSemantics.precision += 2;
1902 
1903   IEEEFloat VEx = *this;
1904   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1905   assert(fs == opOK && !losesInfo);
1906   IEEEFloat PEx = P;
1907   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1908   assert(fs == opOK && !losesInfo);
1909 
1910   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
1911   // any fraction.
1912   fs = VEx.add(VEx, rmNearestTiesToEven);
1913   assert(fs == opOK);
1914 
1915   if (VEx.compare(PEx) == cmpGreaterThan) {
1916     fs = subtract(P, rmNearestTiesToEven);
1917     assert(fs == opOK);
1918 
1919     // Make VEx = this.add(this), but because we have different semantics, we do
1920     // not want to `convert` again, so we just subtract PEx twice (which equals
1921     // to the desired value).
1922     fs = VEx.subtract(PEx, rmNearestTiesToEven);
1923     assert(fs == opOK);
1924     fs = VEx.subtract(PEx, rmNearestTiesToEven);
1925     assert(fs == opOK);
1926 
1927     cmpResult result = VEx.compare(PEx);
1928     if (result == cmpGreaterThan || result == cmpEqual) {
1929       fs = subtract(P, rmNearestTiesToEven);
1930       assert(fs == opOK);
1931     }
1932   }
1933 
1934   if (isZero())
1935     sign = origSign;    // IEEE754 requires this
1936   else
1937     sign ^= origSign;
1938   return fs;
1939 }
1940 
1941 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)1942 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1943   opStatus fs;
1944   fs = modSpecials(rhs);
1945   unsigned int origSign = sign;
1946 
1947   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1948          compareAbsoluteValue(rhs) != cmpLessThan) {
1949     IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1950     if (compareAbsoluteValue(V) == cmpLessThan)
1951       V = scalbn(V, -1, rmNearestTiesToEven);
1952     V.sign = sign;
1953 
1954     fs = subtract(V, rmNearestTiesToEven);
1955     assert(fs==opOK);
1956   }
1957   if (isZero())
1958     sign = origSign; // fmod requires this
1959   return fs;
1960 }
1961 
1962 /* Normalized fused-multiply-add.  */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)1963 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1964                                                 const IEEEFloat &addend,
1965                                                 roundingMode rounding_mode) {
1966   opStatus fs;
1967 
1968   /* Post-multiplication sign, before addition.  */
1969   sign ^= multiplicand.sign;
1970 
1971   /* If and only if all arguments are normal do we need to do an
1972      extended-precision calculation.  */
1973   if (isFiniteNonZero() &&
1974       multiplicand.isFiniteNonZero() &&
1975       addend.isFinite()) {
1976     lostFraction lost_fraction;
1977 
1978     lost_fraction = multiplySignificand(multiplicand, addend);
1979     fs = normalize(rounding_mode, lost_fraction);
1980     if (lost_fraction != lfExactlyZero)
1981       fs = (opStatus) (fs | opInexact);
1982 
1983     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1984        positive zero unless rounding to minus infinity, except that
1985        adding two like-signed zeroes gives that zero.  */
1986     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1987       sign = (rounding_mode == rmTowardNegative);
1988   } else {
1989     fs = multiplySpecials(multiplicand);
1990 
1991     /* FS can only be opOK or opInvalidOp.  There is no more work
1992        to do in the latter case.  The IEEE-754R standard says it is
1993        implementation-defined in this case whether, if ADDEND is a
1994        quiet NaN, we raise invalid op; this implementation does so.
1995 
1996        If we need to do the addition we can do so with normal
1997        precision.  */
1998     if (fs == opOK)
1999       fs = addOrSubtract(addend, rounding_mode, false);
2000   }
2001 
2002   return fs;
2003 }
2004 
2005 /* Rounding-mode correct round to integral value.  */
roundToIntegral(roundingMode rounding_mode)2006 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2007   opStatus fs;
2008 
2009   if (isInfinity())
2010     // [IEEE Std 754-2008 6.1]:
2011     // The behavior of infinity in floating-point arithmetic is derived from the
2012     // limiting cases of real arithmetic with operands of arbitrarily
2013     // large magnitude, when such a limit exists.
2014     // ...
2015     // Operations on infinite operands are usually exact and therefore signal no
2016     // exceptions ...
2017     return opOK;
2018 
2019   if (isNaN()) {
2020     if (isSignaling()) {
2021       // [IEEE Std 754-2008 6.2]:
2022       // Under default exception handling, any operation signaling an invalid
2023       // operation exception and for which a floating-point result is to be
2024       // delivered shall deliver a quiet NaN.
2025       makeQuiet();
2026       // [IEEE Std 754-2008 6.2]:
2027       // Signaling NaNs shall be reserved operands that, under default exception
2028       // handling, signal the invalid operation exception(see 7.2) for every
2029       // general-computational and signaling-computational operation except for
2030       // the conversions described in 5.12.
2031       return opInvalidOp;
2032     } else {
2033       // [IEEE Std 754-2008 6.2]:
2034       // For an operation with quiet NaN inputs, other than maximum and minimum
2035       // operations, if a floating-point result is to be delivered the result
2036       // shall be a quiet NaN which should be one of the input NaNs.
2037       // ...
2038       // Every general-computational and quiet-computational operation involving
2039       // one or more input NaNs, none of them signaling, shall signal no
2040       // exception, except fusedMultiplyAdd might signal the invalid operation
2041       // exception(see 7.2).
2042       return opOK;
2043     }
2044   }
2045 
2046   if (isZero()) {
2047     // [IEEE Std 754-2008 6.3]:
2048     // ... the sign of the result of conversions, the quantize operation, the
2049     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2050     // the sign of the first or only operand.
2051     return opOK;
2052   }
2053 
2054   // If the exponent is large enough, we know that this value is already
2055   // integral, and the arithmetic below would potentially cause it to saturate
2056   // to +/-Inf.  Bail out early instead.
2057   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2058     return opOK;
2059 
2060   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2061   // precision of our format, and then subtract it back off again.  The choice
2062   // of rounding modes for the addition/subtraction determines the rounding mode
2063   // for our integral rounding as well.
2064   // NOTE: When the input value is negative, we do subtraction followed by
2065   // addition instead.
2066   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2067   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2068   IEEEFloat MagicConstant(*semantics);
2069   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2070                                       rmNearestTiesToEven);
2071   assert(fs == opOK);
2072   MagicConstant.sign = sign;
2073 
2074   // Preserve the input sign so that we can handle the case of zero result
2075   // correctly.
2076   bool inputSign = isNegative();
2077 
2078   fs = add(MagicConstant, rounding_mode);
2079 
2080   // Current value and 'MagicConstant' are both integers, so the result of the
2081   // subtraction is always exact according to Sterbenz' lemma.
2082   subtract(MagicConstant, rounding_mode);
2083 
2084   // Restore the input sign.
2085   if (inputSign != isNegative())
2086     changeSign();
2087 
2088   return fs;
2089 }
2090 
2091 
2092 /* Comparison requires normalized numbers.  */
compare(const IEEEFloat & rhs) const2093 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2094   cmpResult result;
2095 
2096   assert(semantics == rhs.semantics);
2097 
2098   switch (PackCategoriesIntoKey(category, rhs.category)) {
2099   default:
2100     llvm_unreachable(nullptr);
2101 
2102   case PackCategoriesIntoKey(fcNaN, fcZero):
2103   case PackCategoriesIntoKey(fcNaN, fcNormal):
2104   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2105   case PackCategoriesIntoKey(fcNaN, fcNaN):
2106   case PackCategoriesIntoKey(fcZero, fcNaN):
2107   case PackCategoriesIntoKey(fcNormal, fcNaN):
2108   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2109     return cmpUnordered;
2110 
2111   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2112   case PackCategoriesIntoKey(fcInfinity, fcZero):
2113   case PackCategoriesIntoKey(fcNormal, fcZero):
2114     if (sign)
2115       return cmpLessThan;
2116     else
2117       return cmpGreaterThan;
2118 
2119   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2120   case PackCategoriesIntoKey(fcZero, fcInfinity):
2121   case PackCategoriesIntoKey(fcZero, fcNormal):
2122     if (rhs.sign)
2123       return cmpGreaterThan;
2124     else
2125       return cmpLessThan;
2126 
2127   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2128     if (sign == rhs.sign)
2129       return cmpEqual;
2130     else if (sign)
2131       return cmpLessThan;
2132     else
2133       return cmpGreaterThan;
2134 
2135   case PackCategoriesIntoKey(fcZero, fcZero):
2136     return cmpEqual;
2137 
2138   case PackCategoriesIntoKey(fcNormal, fcNormal):
2139     break;
2140   }
2141 
2142   /* Two normal numbers.  Do they have the same sign?  */
2143   if (sign != rhs.sign) {
2144     if (sign)
2145       result = cmpLessThan;
2146     else
2147       result = cmpGreaterThan;
2148   } else {
2149     /* Compare absolute values; invert result if negative.  */
2150     result = compareAbsoluteValue(rhs);
2151 
2152     if (sign) {
2153       if (result == cmpLessThan)
2154         result = cmpGreaterThan;
2155       else if (result == cmpGreaterThan)
2156         result = cmpLessThan;
2157     }
2158   }
2159 
2160   return result;
2161 }
2162 
2163 /// IEEEFloat::convert - convert a value of one floating point type to another.
2164 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2165 /// records whether the transformation lost information, i.e. whether
2166 /// converting the result back to the original type will produce the
2167 /// original value (this is almost the same as return value==fsOK, but there
2168 /// are edge cases where this is not so).
2169 
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2170 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2171                                        roundingMode rounding_mode,
2172                                        bool *losesInfo) {
2173   lostFraction lostFraction;
2174   unsigned int newPartCount, oldPartCount;
2175   opStatus fs;
2176   int shift;
2177   const fltSemantics &fromSemantics = *semantics;
2178 
2179   lostFraction = lfExactlyZero;
2180   newPartCount = partCountForBits(toSemantics.precision + 1);
2181   oldPartCount = partCount();
2182   shift = toSemantics.precision - fromSemantics.precision;
2183 
2184   bool X86SpecialNan = false;
2185   if (&fromSemantics == &semX87DoubleExtended &&
2186       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2187       (!(*significandParts() & 0x8000000000000000ULL) ||
2188        !(*significandParts() & 0x4000000000000000ULL))) {
2189     // x86 has some unusual NaNs which cannot be represented in any other
2190     // format; note them here.
2191     X86SpecialNan = true;
2192   }
2193 
2194   // If this is a truncation of a denormal number, and the target semantics
2195   // has larger exponent range than the source semantics (this can happen
2196   // when truncating from PowerPC double-double to double format), the
2197   // right shift could lose result mantissa bits.  Adjust exponent instead
2198   // of performing excessive shift.
2199   if (shift < 0 && isFiniteNonZero()) {
2200     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2201     if (exponent + exponentChange < toSemantics.minExponent)
2202       exponentChange = toSemantics.minExponent - exponent;
2203     if (exponentChange < shift)
2204       exponentChange = shift;
2205     if (exponentChange < 0) {
2206       shift -= exponentChange;
2207       exponent += exponentChange;
2208     }
2209   }
2210 
2211   // If this is a truncation, perform the shift before we narrow the storage.
2212   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2213     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2214 
2215   // Fix the storage so it can hold to new value.
2216   if (newPartCount > oldPartCount) {
2217     // The new type requires more storage; make it available.
2218     integerPart *newParts;
2219     newParts = new integerPart[newPartCount];
2220     APInt::tcSet(newParts, 0, newPartCount);
2221     if (isFiniteNonZero() || category==fcNaN)
2222       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2223     freeSignificand();
2224     significand.parts = newParts;
2225   } else if (newPartCount == 1 && oldPartCount != 1) {
2226     // Switch to built-in storage for a single part.
2227     integerPart newPart = 0;
2228     if (isFiniteNonZero() || category==fcNaN)
2229       newPart = significandParts()[0];
2230     freeSignificand();
2231     significand.part = newPart;
2232   }
2233 
2234   // Now that we have the right storage, switch the semantics.
2235   semantics = &toSemantics;
2236 
2237   // If this is an extension, perform the shift now that the storage is
2238   // available.
2239   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2240     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2241 
2242   if (isFiniteNonZero()) {
2243     fs = normalize(rounding_mode, lostFraction);
2244     *losesInfo = (fs != opOK);
2245   } else if (category == fcNaN) {
2246     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2247 
2248     // For x87 extended precision, we want to make a NaN, not a special NaN if
2249     // the input wasn't special either.
2250     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2251       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2252 
2253     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2254     // This also guarantees that a sNaN does not become Inf on a truncation
2255     // that loses all payload bits.
2256     if (isSignaling()) {
2257       makeQuiet();
2258       fs = opInvalidOp;
2259     } else {
2260       fs = opOK;
2261     }
2262   } else {
2263     *losesInfo = false;
2264     fs = opOK;
2265   }
2266 
2267   return fs;
2268 }
2269 
2270 /* Convert a floating point number to an integer according to the
2271    rounding mode.  If the rounded integer value is out of range this
2272    returns an invalid operation exception and the contents of the
2273    destination parts are unspecified.  If the rounded value is in
2274    range but the floating point number is not the exact integer, the C
2275    standard doesn't require an inexact exception to be raised.  IEEE
2276    854 does require it so we do that.
2277 
2278    Note that for conversions to integer type the C standard requires
2279    round-to-zero to always be used.  */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2280 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2281     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2282     roundingMode rounding_mode, bool *isExact) const {
2283   lostFraction lost_fraction;
2284   const integerPart *src;
2285   unsigned int dstPartsCount, truncatedBits;
2286 
2287   *isExact = false;
2288 
2289   /* Handle the three special cases first.  */
2290   if (category == fcInfinity || category == fcNaN)
2291     return opInvalidOp;
2292 
2293   dstPartsCount = partCountForBits(width);
2294   assert(dstPartsCount <= parts.size() && "Integer too big");
2295 
2296   if (category == fcZero) {
2297     APInt::tcSet(parts.data(), 0, dstPartsCount);
2298     // Negative zero can't be represented as an int.
2299     *isExact = !sign;
2300     return opOK;
2301   }
2302 
2303   src = significandParts();
2304 
2305   /* Step 1: place our absolute value, with any fraction truncated, in
2306      the destination.  */
2307   if (exponent < 0) {
2308     /* Our absolute value is less than one; truncate everything.  */
2309     APInt::tcSet(parts.data(), 0, dstPartsCount);
2310     /* For exponent -1 the integer bit represents .5, look at that.
2311        For smaller exponents leftmost truncated bit is 0. */
2312     truncatedBits = semantics->precision -1U - exponent;
2313   } else {
2314     /* We want the most significant (exponent + 1) bits; the rest are
2315        truncated.  */
2316     unsigned int bits = exponent + 1U;
2317 
2318     /* Hopelessly large in magnitude?  */
2319     if (bits > width)
2320       return opInvalidOp;
2321 
2322     if (bits < semantics->precision) {
2323       /* We truncate (semantics->precision - bits) bits.  */
2324       truncatedBits = semantics->precision - bits;
2325       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2326     } else {
2327       /* We want at least as many bits as are available.  */
2328       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2329                        0);
2330       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2331                          bits - semantics->precision);
2332       truncatedBits = 0;
2333     }
2334   }
2335 
2336   /* Step 2: work out any lost fraction, and increment the absolute
2337      value if we would round away from zero.  */
2338   if (truncatedBits) {
2339     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2340                                                   truncatedBits);
2341     if (lost_fraction != lfExactlyZero &&
2342         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2343       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2344         return opInvalidOp;     /* Overflow.  */
2345     }
2346   } else {
2347     lost_fraction = lfExactlyZero;
2348   }
2349 
2350   /* Step 3: check if we fit in the destination.  */
2351   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2352 
2353   if (sign) {
2354     if (!isSigned) {
2355       /* Negative numbers cannot be represented as unsigned.  */
2356       if (omsb != 0)
2357         return opInvalidOp;
2358     } else {
2359       /* It takes omsb bits to represent the unsigned integer value.
2360          We lose a bit for the sign, but care is needed as the
2361          maximally negative integer is a special case.  */
2362       if (omsb == width &&
2363           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2364         return opInvalidOp;
2365 
2366       /* This case can happen because of rounding.  */
2367       if (omsb > width)
2368         return opInvalidOp;
2369     }
2370 
2371     APInt::tcNegate (parts.data(), dstPartsCount);
2372   } else {
2373     if (omsb >= width + !isSigned)
2374       return opInvalidOp;
2375   }
2376 
2377   if (lost_fraction == lfExactlyZero) {
2378     *isExact = true;
2379     return opOK;
2380   } else
2381     return opInexact;
2382 }
2383 
2384 /* Same as convertToSignExtendedInteger, except we provide
2385    deterministic values in case of an invalid operation exception,
2386    namely zero for NaNs and the minimal or maximal value respectively
2387    for underflow or overflow.
2388    The *isExact output tells whether the result is exact, in the sense
2389    that converting it back to the original floating point type produces
2390    the original value.  This is almost equivalent to result==opOK,
2391    except for negative zeroes.
2392 */
2393 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2394 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2395                             unsigned int width, bool isSigned,
2396                             roundingMode rounding_mode, bool *isExact) const {
2397   opStatus fs;
2398 
2399   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2400                                     isExact);
2401 
2402   if (fs == opInvalidOp) {
2403     unsigned int bits, dstPartsCount;
2404 
2405     dstPartsCount = partCountForBits(width);
2406     assert(dstPartsCount <= parts.size() && "Integer too big");
2407 
2408     if (category == fcNaN)
2409       bits = 0;
2410     else if (sign)
2411       bits = isSigned;
2412     else
2413       bits = width - isSigned;
2414 
2415     APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2416     if (sign && isSigned)
2417       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2418   }
2419 
2420   return fs;
2421 }
2422 
2423 /* Convert an unsigned integer SRC to a floating point number,
2424    rounding according to ROUNDING_MODE.  The sign of the floating
2425    point number is not modified.  */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2426 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2427     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2428   unsigned int omsb, precision, dstCount;
2429   integerPart *dst;
2430   lostFraction lost_fraction;
2431 
2432   category = fcNormal;
2433   omsb = APInt::tcMSB(src, srcCount) + 1;
2434   dst = significandParts();
2435   dstCount = partCount();
2436   precision = semantics->precision;
2437 
2438   /* We want the most significant PRECISION bits of SRC.  There may not
2439      be that many; extract what we can.  */
2440   if (precision <= omsb) {
2441     exponent = omsb - 1;
2442     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2443                                                   omsb - precision);
2444     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2445   } else {
2446     exponent = precision - 1;
2447     lost_fraction = lfExactlyZero;
2448     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2449   }
2450 
2451   return normalize(rounding_mode, lost_fraction);
2452 }
2453 
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2454 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2455                                                 roundingMode rounding_mode) {
2456   unsigned int partCount = Val.getNumWords();
2457   APInt api = Val;
2458 
2459   sign = false;
2460   if (isSigned && api.isNegative()) {
2461     sign = true;
2462     api = -api;
2463   }
2464 
2465   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2466 }
2467 
2468 /* Convert a two's complement integer SRC to a floating point number,
2469    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2470    integer is signed, in which case it must be sign-extended.  */
2471 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2472 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2473                                           unsigned int srcCount, bool isSigned,
2474                                           roundingMode rounding_mode) {
2475   opStatus status;
2476 
2477   if (isSigned &&
2478       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2479     integerPart *copy;
2480 
2481     /* If we're signed and negative negate a copy.  */
2482     sign = true;
2483     copy = new integerPart[srcCount];
2484     APInt::tcAssign(copy, src, srcCount);
2485     APInt::tcNegate(copy, srcCount);
2486     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2487     delete [] copy;
2488   } else {
2489     sign = false;
2490     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2491   }
2492 
2493   return status;
2494 }
2495 
2496 /* FIXME: should this just take a const APInt reference?  */
2497 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2498 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2499                                           unsigned int width, bool isSigned,
2500                                           roundingMode rounding_mode) {
2501   unsigned int partCount = partCountForBits(width);
2502   APInt api = APInt(width, makeArrayRef(parts, partCount));
2503 
2504   sign = false;
2505   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2506     sign = true;
2507     api = -api;
2508   }
2509 
2510   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2511 }
2512 
2513 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2514 IEEEFloat::convertFromHexadecimalString(StringRef s,
2515                                         roundingMode rounding_mode) {
2516   lostFraction lost_fraction = lfExactlyZero;
2517 
2518   category = fcNormal;
2519   zeroSignificand();
2520   exponent = 0;
2521 
2522   integerPart *significand = significandParts();
2523   unsigned partsCount = partCount();
2524   unsigned bitPos = partsCount * integerPartWidth;
2525   bool computedTrailingFraction = false;
2526 
2527   // Skip leading zeroes and any (hexa)decimal point.
2528   StringRef::iterator begin = s.begin();
2529   StringRef::iterator end = s.end();
2530   StringRef::iterator dot;
2531   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2532   if (!PtrOrErr)
2533     return PtrOrErr.takeError();
2534   StringRef::iterator p = *PtrOrErr;
2535   StringRef::iterator firstSignificantDigit = p;
2536 
2537   while (p != end) {
2538     integerPart hex_value;
2539 
2540     if (*p == '.') {
2541       if (dot != end)
2542         return createError("String contains multiple dots");
2543       dot = p++;
2544       continue;
2545     }
2546 
2547     hex_value = hexDigitValue(*p);
2548     if (hex_value == -1U)
2549       break;
2550 
2551     p++;
2552 
2553     // Store the number while we have space.
2554     if (bitPos) {
2555       bitPos -= 4;
2556       hex_value <<= bitPos % integerPartWidth;
2557       significand[bitPos / integerPartWidth] |= hex_value;
2558     } else if (!computedTrailingFraction) {
2559       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2560       if (!FractOrErr)
2561         return FractOrErr.takeError();
2562       lost_fraction = *FractOrErr;
2563       computedTrailingFraction = true;
2564     }
2565   }
2566 
2567   /* Hex floats require an exponent but not a hexadecimal point.  */
2568   if (p == end)
2569     return createError("Hex strings require an exponent");
2570   if (*p != 'p' && *p != 'P')
2571     return createError("Invalid character in significand");
2572   if (p == begin)
2573     return createError("Significand has no digits");
2574   if (dot != end && p - begin == 1)
2575     return createError("Significand has no digits");
2576 
2577   /* Ignore the exponent if we are zero.  */
2578   if (p != firstSignificantDigit) {
2579     int expAdjustment;
2580 
2581     /* Implicit hexadecimal point?  */
2582     if (dot == end)
2583       dot = p;
2584 
2585     /* Calculate the exponent adjustment implicit in the number of
2586        significant digits.  */
2587     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2588     if (expAdjustment < 0)
2589       expAdjustment++;
2590     expAdjustment = expAdjustment * 4 - 1;
2591 
2592     /* Adjust for writing the significand starting at the most
2593        significant nibble.  */
2594     expAdjustment += semantics->precision;
2595     expAdjustment -= partsCount * integerPartWidth;
2596 
2597     /* Adjust for the given exponent.  */
2598     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2599     if (!ExpOrErr)
2600       return ExpOrErr.takeError();
2601     exponent = *ExpOrErr;
2602   }
2603 
2604   return normalize(rounding_mode, lost_fraction);
2605 }
2606 
2607 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2608 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2609                                         unsigned sigPartCount, int exp,
2610                                         roundingMode rounding_mode) {
2611   unsigned int parts, pow5PartCount;
2612   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2613   integerPart pow5Parts[maxPowerOfFiveParts];
2614   bool isNearest;
2615 
2616   isNearest = (rounding_mode == rmNearestTiesToEven ||
2617                rounding_mode == rmNearestTiesToAway);
2618 
2619   parts = partCountForBits(semantics->precision + 11);
2620 
2621   /* Calculate pow(5, abs(exp)).  */
2622   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2623 
2624   for (;; parts *= 2) {
2625     opStatus sigStatus, powStatus;
2626     unsigned int excessPrecision, truncatedBits;
2627 
2628     calcSemantics.precision = parts * integerPartWidth - 1;
2629     excessPrecision = calcSemantics.precision - semantics->precision;
2630     truncatedBits = excessPrecision;
2631 
2632     IEEEFloat decSig(calcSemantics, uninitialized);
2633     decSig.makeZero(sign);
2634     IEEEFloat pow5(calcSemantics);
2635 
2636     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2637                                                 rmNearestTiesToEven);
2638     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2639                                               rmNearestTiesToEven);
2640     /* Add exp, as 10^n = 5^n * 2^n.  */
2641     decSig.exponent += exp;
2642 
2643     lostFraction calcLostFraction;
2644     integerPart HUerr, HUdistance;
2645     unsigned int powHUerr;
2646 
2647     if (exp >= 0) {
2648       /* multiplySignificand leaves the precision-th bit set to 1.  */
2649       calcLostFraction = decSig.multiplySignificand(pow5);
2650       powHUerr = powStatus != opOK;
2651     } else {
2652       calcLostFraction = decSig.divideSignificand(pow5);
2653       /* Denormal numbers have less precision.  */
2654       if (decSig.exponent < semantics->minExponent) {
2655         excessPrecision += (semantics->minExponent - decSig.exponent);
2656         truncatedBits = excessPrecision;
2657         if (excessPrecision > calcSemantics.precision)
2658           excessPrecision = calcSemantics.precision;
2659       }
2660       /* Extra half-ulp lost in reciprocal of exponent.  */
2661       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2662     }
2663 
2664     /* Both multiplySignificand and divideSignificand return the
2665        result with the integer bit set.  */
2666     assert(APInt::tcExtractBit
2667            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2668 
2669     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2670                        powHUerr);
2671     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2672                                       excessPrecision, isNearest);
2673 
2674     /* Are we guaranteed to round correctly if we truncate?  */
2675     if (HUdistance >= HUerr) {
2676       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2677                        calcSemantics.precision - excessPrecision,
2678                        excessPrecision);
2679       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2680          above we must adjust our exponent to compensate for the
2681          implicit right shift.  */
2682       exponent = (decSig.exponent + semantics->precision
2683                   - (calcSemantics.precision - excessPrecision));
2684       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2685                                                        decSig.partCount(),
2686                                                        truncatedBits);
2687       return normalize(rounding_mode, calcLostFraction);
2688     }
2689   }
2690 }
2691 
2692 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2693 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2694   decimalInfo D;
2695   opStatus fs;
2696 
2697   /* Scan the text.  */
2698   StringRef::iterator p = str.begin();
2699   if (Error Err = interpretDecimal(p, str.end(), &D))
2700     return std::move(Err);
2701 
2702   /* Handle the quick cases.  First the case of no significant digits,
2703      i.e. zero, and then exponents that are obviously too large or too
2704      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2705      definitely overflows if
2706 
2707            (exp - 1) * L >= maxExponent
2708 
2709      and definitely underflows to zero where
2710 
2711            (exp + 1) * L <= minExponent - precision
2712 
2713      With integer arithmetic the tightest bounds for L are
2714 
2715            93/28 < L < 196/59            [ numerator <= 256 ]
2716            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2717   */
2718 
2719   // Test if we have a zero number allowing for strings with no null terminators
2720   // and zero decimals with non-zero exponents.
2721   //
2722   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2723   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2724   // be at most one dot. On the other hand, if we have a zero with a non-zero
2725   // exponent, then we know that D.firstSigDigit will be non-numeric.
2726   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2727     category = fcZero;
2728     fs = opOK;
2729 
2730   /* Check whether the normalized exponent is high enough to overflow
2731      max during the log-rebasing in the max-exponent check below. */
2732   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2733     fs = handleOverflow(rounding_mode);
2734 
2735   /* If it wasn't, then it also wasn't high enough to overflow max
2736      during the log-rebasing in the min-exponent check.  Check that it
2737      won't overflow min in either check, then perform the min-exponent
2738      check. */
2739   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2740              (D.normalizedExponent + 1) * 28738 <=
2741                8651 * (semantics->minExponent - (int) semantics->precision)) {
2742     /* Underflow to zero and round.  */
2743     category = fcNormal;
2744     zeroSignificand();
2745     fs = normalize(rounding_mode, lfLessThanHalf);
2746 
2747   /* We can finally safely perform the max-exponent check. */
2748   } else if ((D.normalizedExponent - 1) * 42039
2749              >= 12655 * semantics->maxExponent) {
2750     /* Overflow and round.  */
2751     fs = handleOverflow(rounding_mode);
2752   } else {
2753     integerPart *decSignificand;
2754     unsigned int partCount;
2755 
2756     /* A tight upper bound on number of bits required to hold an
2757        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2758        to hold the full significand, and an extra part required by
2759        tcMultiplyPart.  */
2760     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2761     partCount = partCountForBits(1 + 196 * partCount / 59);
2762     decSignificand = new integerPart[partCount + 1];
2763     partCount = 0;
2764 
2765     /* Convert to binary efficiently - we do almost all multiplication
2766        in an integerPart.  When this would overflow do we do a single
2767        bignum multiplication, and then revert again to multiplication
2768        in an integerPart.  */
2769     do {
2770       integerPart decValue, val, multiplier;
2771 
2772       val = 0;
2773       multiplier = 1;
2774 
2775       do {
2776         if (*p == '.') {
2777           p++;
2778           if (p == str.end()) {
2779             break;
2780           }
2781         }
2782         decValue = decDigitValue(*p++);
2783         if (decValue >= 10U) {
2784           delete[] decSignificand;
2785           return createError("Invalid character in significand");
2786         }
2787         multiplier *= 10;
2788         val = val * 10 + decValue;
2789         /* The maximum number that can be multiplied by ten with any
2790            digit added without overflowing an integerPart.  */
2791       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2792 
2793       /* Multiply out the current part.  */
2794       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2795                             partCount, partCount + 1, false);
2796 
2797       /* If we used another part (likely but not guaranteed), increase
2798          the count.  */
2799       if (decSignificand[partCount])
2800         partCount++;
2801     } while (p <= D.lastSigDigit);
2802 
2803     category = fcNormal;
2804     fs = roundSignificandWithExponent(decSignificand, partCount,
2805                                       D.exponent, rounding_mode);
2806 
2807     delete [] decSignificand;
2808   }
2809 
2810   return fs;
2811 }
2812 
convertFromStringSpecials(StringRef str)2813 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2814   const size_t MIN_NAME_SIZE = 3;
2815 
2816   if (str.size() < MIN_NAME_SIZE)
2817     return false;
2818 
2819   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2820     makeInf(false);
2821     return true;
2822   }
2823 
2824   bool IsNegative = str.front() == '-';
2825   if (IsNegative) {
2826     str = str.drop_front();
2827     if (str.size() < MIN_NAME_SIZE)
2828       return false;
2829 
2830     if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2831       makeInf(true);
2832       return true;
2833     }
2834   }
2835 
2836   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2837   bool IsSignaling = str.front() == 's' || str.front() == 'S';
2838   if (IsSignaling) {
2839     str = str.drop_front();
2840     if (str.size() < MIN_NAME_SIZE)
2841       return false;
2842   }
2843 
2844   if (str.startswith("nan") || str.startswith("NaN")) {
2845     str = str.drop_front(3);
2846 
2847     // A NaN without payload.
2848     if (str.empty()) {
2849       makeNaN(IsSignaling, IsNegative);
2850       return true;
2851     }
2852 
2853     // Allow the payload to be inside parentheses.
2854     if (str.front() == '(') {
2855       // Parentheses should be balanced (and not empty).
2856       if (str.size() <= 2 || str.back() != ')')
2857         return false;
2858 
2859       str = str.slice(1, str.size() - 1);
2860     }
2861 
2862     // Determine the payload number's radix.
2863     unsigned Radix = 10;
2864     if (str[0] == '0') {
2865       if (str.size() > 1 && tolower(str[1]) == 'x') {
2866         str = str.drop_front(2);
2867         Radix = 16;
2868       } else
2869         Radix = 8;
2870     }
2871 
2872     // Parse the payload and make the NaN.
2873     APInt Payload;
2874     if (!str.getAsInteger(Radix, Payload)) {
2875       makeNaN(IsSignaling, IsNegative, &Payload);
2876       return true;
2877     }
2878   }
2879 
2880   return false;
2881 }
2882 
2883 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)2884 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2885   if (str.empty())
2886     return createError("Invalid string length");
2887 
2888   // Handle special cases.
2889   if (convertFromStringSpecials(str))
2890     return opOK;
2891 
2892   /* Handle a leading minus sign.  */
2893   StringRef::iterator p = str.begin();
2894   size_t slen = str.size();
2895   sign = *p == '-' ? 1 : 0;
2896   if (*p == '-' || *p == '+') {
2897     p++;
2898     slen--;
2899     if (!slen)
2900       return createError("String has no digits");
2901   }
2902 
2903   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2904     if (slen == 2)
2905       return createError("Invalid string");
2906     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2907                                         rounding_mode);
2908   }
2909 
2910   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2911 }
2912 
2913 /* Write out a hexadecimal representation of the floating point value
2914    to DST, which must be of sufficient size, in the C99 form
2915    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2916    excluding the terminating NUL.
2917 
2918    If UPPERCASE, the output is in upper case, otherwise in lower case.
2919 
2920    HEXDIGITS digits appear altogether, rounding the value if
2921    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2922    number precisely is used instead.  If nothing would appear after
2923    the decimal point it is suppressed.
2924 
2925    The decimal exponent is always printed and has at least one digit.
2926    Zero values display an exponent of zero.  Infinities and NaNs
2927    appear as "infinity" or "nan" respectively.
2928 
2929    The above rules are as specified by C99.  There is ambiguity about
2930    what the leading hexadecimal digit should be.  This implementation
2931    uses whatever is necessary so that the exponent is displayed as
2932    stored.  This implies the exponent will fall within the IEEE format
2933    range, and the leading hexadecimal digit will be 0 (for denormals),
2934    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2935    any other digits zero).
2936 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2937 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2938                                            bool upperCase,
2939                                            roundingMode rounding_mode) const {
2940   char *p;
2941 
2942   p = dst;
2943   if (sign)
2944     *dst++ = '-';
2945 
2946   switch (category) {
2947   case fcInfinity:
2948     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2949     dst += sizeof infinityL - 1;
2950     break;
2951 
2952   case fcNaN:
2953     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2954     dst += sizeof NaNU - 1;
2955     break;
2956 
2957   case fcZero:
2958     *dst++ = '0';
2959     *dst++ = upperCase ? 'X': 'x';
2960     *dst++ = '0';
2961     if (hexDigits > 1) {
2962       *dst++ = '.';
2963       memset (dst, '0', hexDigits - 1);
2964       dst += hexDigits - 1;
2965     }
2966     *dst++ = upperCase ? 'P': 'p';
2967     *dst++ = '0';
2968     break;
2969 
2970   case fcNormal:
2971     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2972     break;
2973   }
2974 
2975   *dst = 0;
2976 
2977   return static_cast<unsigned int>(dst - p);
2978 }
2979 
2980 /* Does the hard work of outputting the correctly rounded hexadecimal
2981    form of a normal floating point number with the specified number of
2982    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2983    digits necessary to print the value precisely is output.  */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2984 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2985                                           bool upperCase,
2986                                           roundingMode rounding_mode) const {
2987   unsigned int count, valueBits, shift, partsCount, outputDigits;
2988   const char *hexDigitChars;
2989   const integerPart *significand;
2990   char *p;
2991   bool roundUp;
2992 
2993   *dst++ = '0';
2994   *dst++ = upperCase ? 'X': 'x';
2995 
2996   roundUp = false;
2997   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2998 
2999   significand = significandParts();
3000   partsCount = partCount();
3001 
3002   /* +3 because the first digit only uses the single integer bit, so
3003      we have 3 virtual zero most-significant-bits.  */
3004   valueBits = semantics->precision + 3;
3005   shift = integerPartWidth - valueBits % integerPartWidth;
3006 
3007   /* The natural number of digits required ignoring trailing
3008      insignificant zeroes.  */
3009   outputDigits = (valueBits - significandLSB () + 3) / 4;
3010 
3011   /* hexDigits of zero means use the required number for the
3012      precision.  Otherwise, see if we are truncating.  If we are,
3013      find out if we need to round away from zero.  */
3014   if (hexDigits) {
3015     if (hexDigits < outputDigits) {
3016       /* We are dropping non-zero bits, so need to check how to round.
3017          "bits" is the number of dropped bits.  */
3018       unsigned int bits;
3019       lostFraction fraction;
3020 
3021       bits = valueBits - hexDigits * 4;
3022       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3023       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3024     }
3025     outputDigits = hexDigits;
3026   }
3027 
3028   /* Write the digits consecutively, and start writing in the location
3029      of the hexadecimal point.  We move the most significant digit
3030      left and add the hexadecimal point later.  */
3031   p = ++dst;
3032 
3033   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3034 
3035   while (outputDigits && count) {
3036     integerPart part;
3037 
3038     /* Put the most significant integerPartWidth bits in "part".  */
3039     if (--count == partsCount)
3040       part = 0;  /* An imaginary higher zero part.  */
3041     else
3042       part = significand[count] << shift;
3043 
3044     if (count && shift)
3045       part |= significand[count - 1] >> (integerPartWidth - shift);
3046 
3047     /* Convert as much of "part" to hexdigits as we can.  */
3048     unsigned int curDigits = integerPartWidth / 4;
3049 
3050     if (curDigits > outputDigits)
3051       curDigits = outputDigits;
3052     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3053     outputDigits -= curDigits;
3054   }
3055 
3056   if (roundUp) {
3057     char *q = dst;
3058 
3059     /* Note that hexDigitChars has a trailing '0'.  */
3060     do {
3061       q--;
3062       *q = hexDigitChars[hexDigitValue (*q) + 1];
3063     } while (*q == '0');
3064     assert(q >= p);
3065   } else {
3066     /* Add trailing zeroes.  */
3067     memset (dst, '0', outputDigits);
3068     dst += outputDigits;
3069   }
3070 
3071   /* Move the most significant digit to before the point, and if there
3072      is something after the decimal point add it.  This must come
3073      after rounding above.  */
3074   p[-1] = p[0];
3075   if (dst -1 == p)
3076     dst--;
3077   else
3078     p[0] = '.';
3079 
3080   /* Finally output the exponent.  */
3081   *dst++ = upperCase ? 'P': 'p';
3082 
3083   return writeSignedDecimal (dst, exponent);
3084 }
3085 
hash_value(const IEEEFloat & Arg)3086 hash_code hash_value(const IEEEFloat &Arg) {
3087   if (!Arg.isFiniteNonZero())
3088     return hash_combine((uint8_t)Arg.category,
3089                         // NaN has no sign, fix it at zero.
3090                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3091                         Arg.semantics->precision);
3092 
3093   // Normal floats need their exponent and significand hashed.
3094   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3095                       Arg.semantics->precision, Arg.exponent,
3096                       hash_combine_range(
3097                         Arg.significandParts(),
3098                         Arg.significandParts() + Arg.partCount()));
3099 }
3100 
3101 // Conversion from APFloat to/from host float/double.  It may eventually be
3102 // possible to eliminate these and have everybody deal with APFloats, but that
3103 // will take a while.  This approach will not easily extend to long double.
3104 // Current implementation requires integerPartWidth==64, which is correct at
3105 // the moment but could be made more general.
3106 
3107 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3108 // the actual IEEE respresentations.  We compensate for that here.
3109 
convertF80LongDoubleAPFloatToAPInt() const3110 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3111   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3112   assert(partCount()==2);
3113 
3114   uint64_t myexponent, mysignificand;
3115 
3116   if (isFiniteNonZero()) {
3117     myexponent = exponent+16383; //bias
3118     mysignificand = significandParts()[0];
3119     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3120       myexponent = 0;   // denormal
3121   } else if (category==fcZero) {
3122     myexponent = 0;
3123     mysignificand = 0;
3124   } else if (category==fcInfinity) {
3125     myexponent = 0x7fff;
3126     mysignificand = 0x8000000000000000ULL;
3127   } else {
3128     assert(category == fcNaN && "Unknown category");
3129     myexponent = 0x7fff;
3130     mysignificand = significandParts()[0];
3131   }
3132 
3133   uint64_t words[2];
3134   words[0] = mysignificand;
3135   words[1] =  ((uint64_t)(sign & 1) << 15) |
3136               (myexponent & 0x7fffLL);
3137   return APInt(80, words);
3138 }
3139 
convertPPCDoubleDoubleAPFloatToAPInt() const3140 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3141   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3142   assert(partCount()==2);
3143 
3144   uint64_t words[2];
3145   opStatus fs;
3146   bool losesInfo;
3147 
3148   // Convert number to double.  To avoid spurious underflows, we re-
3149   // normalize against the "double" minExponent first, and only *then*
3150   // truncate the mantissa.  The result of that second conversion
3151   // may be inexact, but should never underflow.
3152   // Declare fltSemantics before APFloat that uses it (and
3153   // saves pointer to it) to ensure correct destruction order.
3154   fltSemantics extendedSemantics = *semantics;
3155   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3156   IEEEFloat extended(*this);
3157   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3158   assert(fs == opOK && !losesInfo);
3159   (void)fs;
3160 
3161   IEEEFloat u(extended);
3162   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3163   assert(fs == opOK || fs == opInexact);
3164   (void)fs;
3165   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3166 
3167   // If conversion was exact or resulted in a special case, we're done;
3168   // just set the second double to zero.  Otherwise, re-convert back to
3169   // the extended format and compute the difference.  This now should
3170   // convert exactly to double.
3171   if (u.isFiniteNonZero() && losesInfo) {
3172     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3173     assert(fs == opOK && !losesInfo);
3174     (void)fs;
3175 
3176     IEEEFloat v(extended);
3177     v.subtract(u, rmNearestTiesToEven);
3178     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3179     assert(fs == opOK && !losesInfo);
3180     (void)fs;
3181     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3182   } else {
3183     words[1] = 0;
3184   }
3185 
3186   return APInt(128, words);
3187 }
3188 
convertQuadrupleAPFloatToAPInt() const3189 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3190   assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3191   assert(partCount()==2);
3192 
3193   uint64_t myexponent, mysignificand, mysignificand2;
3194 
3195   if (isFiniteNonZero()) {
3196     myexponent = exponent+16383; //bias
3197     mysignificand = significandParts()[0];
3198     mysignificand2 = significandParts()[1];
3199     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3200       myexponent = 0;   // denormal
3201   } else if (category==fcZero) {
3202     myexponent = 0;
3203     mysignificand = mysignificand2 = 0;
3204   } else if (category==fcInfinity) {
3205     myexponent = 0x7fff;
3206     mysignificand = mysignificand2 = 0;
3207   } else {
3208     assert(category == fcNaN && "Unknown category!");
3209     myexponent = 0x7fff;
3210     mysignificand = significandParts()[0];
3211     mysignificand2 = significandParts()[1];
3212   }
3213 
3214   uint64_t words[2];
3215   words[0] = mysignificand;
3216   words[1] = ((uint64_t)(sign & 1) << 63) |
3217              ((myexponent & 0x7fff) << 48) |
3218              (mysignificand2 & 0xffffffffffffLL);
3219 
3220   return APInt(128, words);
3221 }
3222 
convertDoubleAPFloatToAPInt() const3223 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3224   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3225   assert(partCount()==1);
3226 
3227   uint64_t myexponent, mysignificand;
3228 
3229   if (isFiniteNonZero()) {
3230     myexponent = exponent+1023; //bias
3231     mysignificand = *significandParts();
3232     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3233       myexponent = 0;   // denormal
3234   } else if (category==fcZero) {
3235     myexponent = 0;
3236     mysignificand = 0;
3237   } else if (category==fcInfinity) {
3238     myexponent = 0x7ff;
3239     mysignificand = 0;
3240   } else {
3241     assert(category == fcNaN && "Unknown category!");
3242     myexponent = 0x7ff;
3243     mysignificand = *significandParts();
3244   }
3245 
3246   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3247                      ((myexponent & 0x7ff) <<  52) |
3248                      (mysignificand & 0xfffffffffffffLL))));
3249 }
3250 
convertFloatAPFloatToAPInt() const3251 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3252   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3253   assert(partCount()==1);
3254 
3255   uint32_t myexponent, mysignificand;
3256 
3257   if (isFiniteNonZero()) {
3258     myexponent = exponent+127; //bias
3259     mysignificand = (uint32_t)*significandParts();
3260     if (myexponent == 1 && !(mysignificand & 0x800000))
3261       myexponent = 0;   // denormal
3262   } else if (category==fcZero) {
3263     myexponent = 0;
3264     mysignificand = 0;
3265   } else if (category==fcInfinity) {
3266     myexponent = 0xff;
3267     mysignificand = 0;
3268   } else {
3269     assert(category == fcNaN && "Unknown category!");
3270     myexponent = 0xff;
3271     mysignificand = (uint32_t)*significandParts();
3272   }
3273 
3274   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3275                     (mysignificand & 0x7fffff)));
3276 }
3277 
convertBFloatAPFloatToAPInt() const3278 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3279   assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3280   assert(partCount() == 1);
3281 
3282   uint32_t myexponent, mysignificand;
3283 
3284   if (isFiniteNonZero()) {
3285     myexponent = exponent + 127; // bias
3286     mysignificand = (uint32_t)*significandParts();
3287     if (myexponent == 1 && !(mysignificand & 0x80))
3288       myexponent = 0; // denormal
3289   } else if (category == fcZero) {
3290     myexponent = 0;
3291     mysignificand = 0;
3292   } else if (category == fcInfinity) {
3293     myexponent = 0xff;
3294     mysignificand = 0;
3295   } else {
3296     assert(category == fcNaN && "Unknown category!");
3297     myexponent = 0xff;
3298     mysignificand = (uint32_t)*significandParts();
3299   }
3300 
3301   return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3302                     (mysignificand & 0x7f)));
3303 }
3304 
convertHalfAPFloatToAPInt() const3305 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3306   assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3307   assert(partCount()==1);
3308 
3309   uint32_t myexponent, mysignificand;
3310 
3311   if (isFiniteNonZero()) {
3312     myexponent = exponent+15; //bias
3313     mysignificand = (uint32_t)*significandParts();
3314     if (myexponent == 1 && !(mysignificand & 0x400))
3315       myexponent = 0;   // denormal
3316   } else if (category==fcZero) {
3317     myexponent = 0;
3318     mysignificand = 0;
3319   } else if (category==fcInfinity) {
3320     myexponent = 0x1f;
3321     mysignificand = 0;
3322   } else {
3323     assert(category == fcNaN && "Unknown category!");
3324     myexponent = 0x1f;
3325     mysignificand = (uint32_t)*significandParts();
3326   }
3327 
3328   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3329                     (mysignificand & 0x3ff)));
3330 }
3331 
3332 // This function creates an APInt that is just a bit map of the floating
3333 // point constant as it would appear in memory.  It is not a conversion,
3334 // and treating the result as a normal integer is unlikely to be useful.
3335 
bitcastToAPInt() const3336 APInt IEEEFloat::bitcastToAPInt() const {
3337   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3338     return convertHalfAPFloatToAPInt();
3339 
3340   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3341     return convertBFloatAPFloatToAPInt();
3342 
3343   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3344     return convertFloatAPFloatToAPInt();
3345 
3346   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3347     return convertDoubleAPFloatToAPInt();
3348 
3349   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3350     return convertQuadrupleAPFloatToAPInt();
3351 
3352   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3353     return convertPPCDoubleDoubleAPFloatToAPInt();
3354 
3355   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3356          "unknown format!");
3357   return convertF80LongDoubleAPFloatToAPInt();
3358 }
3359 
convertToFloat() const3360 float IEEEFloat::convertToFloat() const {
3361   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3362          "Float semantics are not IEEEsingle");
3363   APInt api = bitcastToAPInt();
3364   return api.bitsToFloat();
3365 }
3366 
convertToDouble() const3367 double IEEEFloat::convertToDouble() const {
3368   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3369          "Float semantics are not IEEEdouble");
3370   APInt api = bitcastToAPInt();
3371   return api.bitsToDouble();
3372 }
3373 
3374 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3375 /// does not support these bit patterns:
3376 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3377 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3378 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3379 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3380 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3381 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3382   assert(api.getBitWidth()==80);
3383   uint64_t i1 = api.getRawData()[0];
3384   uint64_t i2 = api.getRawData()[1];
3385   uint64_t myexponent = (i2 & 0x7fff);
3386   uint64_t mysignificand = i1;
3387   uint8_t myintegerbit = mysignificand >> 63;
3388 
3389   initialize(&semX87DoubleExtended);
3390   assert(partCount()==2);
3391 
3392   sign = static_cast<unsigned int>(i2>>15);
3393   if (myexponent == 0 && mysignificand == 0) {
3394     makeZero(sign);
3395   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3396     makeInf(sign);
3397   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3398              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3399     category = fcNaN;
3400     exponent = exponentNaN();
3401     significandParts()[0] = mysignificand;
3402     significandParts()[1] = 0;
3403   } else {
3404     category = fcNormal;
3405     exponent = myexponent - 16383;
3406     significandParts()[0] = mysignificand;
3407     significandParts()[1] = 0;
3408     if (myexponent==0)          // denormal
3409       exponent = -16382;
3410   }
3411 }
3412 
initFromPPCDoubleDoubleAPInt(const APInt & api)3413 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3414   assert(api.getBitWidth()==128);
3415   uint64_t i1 = api.getRawData()[0];
3416   uint64_t i2 = api.getRawData()[1];
3417   opStatus fs;
3418   bool losesInfo;
3419 
3420   // Get the first double and convert to our format.
3421   initFromDoubleAPInt(APInt(64, i1));
3422   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3423   assert(fs == opOK && !losesInfo);
3424   (void)fs;
3425 
3426   // Unless we have a special case, add in second double.
3427   if (isFiniteNonZero()) {
3428     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3429     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3430     assert(fs == opOK && !losesInfo);
3431     (void)fs;
3432 
3433     add(v, rmNearestTiesToEven);
3434   }
3435 }
3436 
initFromQuadrupleAPInt(const APInt & api)3437 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3438   assert(api.getBitWidth()==128);
3439   uint64_t i1 = api.getRawData()[0];
3440   uint64_t i2 = api.getRawData()[1];
3441   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3442   uint64_t mysignificand  = i1;
3443   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3444 
3445   initialize(&semIEEEquad);
3446   assert(partCount()==2);
3447 
3448   sign = static_cast<unsigned int>(i2>>63);
3449   if (myexponent==0 &&
3450       (mysignificand==0 && mysignificand2==0)) {
3451     makeZero(sign);
3452   } else if (myexponent==0x7fff &&
3453              (mysignificand==0 && mysignificand2==0)) {
3454     makeInf(sign);
3455   } else if (myexponent==0x7fff &&
3456              (mysignificand!=0 || mysignificand2 !=0)) {
3457     category = fcNaN;
3458     exponent = exponentNaN();
3459     significandParts()[0] = mysignificand;
3460     significandParts()[1] = mysignificand2;
3461   } else {
3462     category = fcNormal;
3463     exponent = myexponent - 16383;
3464     significandParts()[0] = mysignificand;
3465     significandParts()[1] = mysignificand2;
3466     if (myexponent==0)          // denormal
3467       exponent = -16382;
3468     else
3469       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3470   }
3471 }
3472 
initFromDoubleAPInt(const APInt & api)3473 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3474   assert(api.getBitWidth()==64);
3475   uint64_t i = *api.getRawData();
3476   uint64_t myexponent = (i >> 52) & 0x7ff;
3477   uint64_t mysignificand = i & 0xfffffffffffffLL;
3478 
3479   initialize(&semIEEEdouble);
3480   assert(partCount()==1);
3481 
3482   sign = static_cast<unsigned int>(i>>63);
3483   if (myexponent==0 && mysignificand==0) {
3484     makeZero(sign);
3485   } else if (myexponent==0x7ff && mysignificand==0) {
3486     makeInf(sign);
3487   } else if (myexponent==0x7ff && mysignificand!=0) {
3488     category = fcNaN;
3489     exponent = exponentNaN();
3490     *significandParts() = mysignificand;
3491   } else {
3492     category = fcNormal;
3493     exponent = myexponent - 1023;
3494     *significandParts() = mysignificand;
3495     if (myexponent==0)          // denormal
3496       exponent = -1022;
3497     else
3498       *significandParts() |= 0x10000000000000LL;  // integer bit
3499   }
3500 }
3501 
initFromFloatAPInt(const APInt & api)3502 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3503   assert(api.getBitWidth()==32);
3504   uint32_t i = (uint32_t)*api.getRawData();
3505   uint32_t myexponent = (i >> 23) & 0xff;
3506   uint32_t mysignificand = i & 0x7fffff;
3507 
3508   initialize(&semIEEEsingle);
3509   assert(partCount()==1);
3510 
3511   sign = i >> 31;
3512   if (myexponent==0 && mysignificand==0) {
3513     makeZero(sign);
3514   } else if (myexponent==0xff && mysignificand==0) {
3515     makeInf(sign);
3516   } else if (myexponent==0xff && mysignificand!=0) {
3517     category = fcNaN;
3518     exponent = exponentNaN();
3519     *significandParts() = mysignificand;
3520   } else {
3521     category = fcNormal;
3522     exponent = myexponent - 127;  //bias
3523     *significandParts() = mysignificand;
3524     if (myexponent==0)    // denormal
3525       exponent = -126;
3526     else
3527       *significandParts() |= 0x800000; // integer bit
3528   }
3529 }
3530 
initFromBFloatAPInt(const APInt & api)3531 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3532   assert(api.getBitWidth() == 16);
3533   uint32_t i = (uint32_t)*api.getRawData();
3534   uint32_t myexponent = (i >> 7) & 0xff;
3535   uint32_t mysignificand = i & 0x7f;
3536 
3537   initialize(&semBFloat);
3538   assert(partCount() == 1);
3539 
3540   sign = i >> 15;
3541   if (myexponent == 0 && mysignificand == 0) {
3542     makeZero(sign);
3543   } else if (myexponent == 0xff && mysignificand == 0) {
3544     makeInf(sign);
3545   } else if (myexponent == 0xff && mysignificand != 0) {
3546     category = fcNaN;
3547     exponent = exponentNaN();
3548     *significandParts() = mysignificand;
3549   } else {
3550     category = fcNormal;
3551     exponent = myexponent - 127; // bias
3552     *significandParts() = mysignificand;
3553     if (myexponent == 0) // denormal
3554       exponent = -126;
3555     else
3556       *significandParts() |= 0x80; // integer bit
3557   }
3558 }
3559 
initFromHalfAPInt(const APInt & api)3560 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3561   assert(api.getBitWidth()==16);
3562   uint32_t i = (uint32_t)*api.getRawData();
3563   uint32_t myexponent = (i >> 10) & 0x1f;
3564   uint32_t mysignificand = i & 0x3ff;
3565 
3566   initialize(&semIEEEhalf);
3567   assert(partCount()==1);
3568 
3569   sign = i >> 15;
3570   if (myexponent==0 && mysignificand==0) {
3571     makeZero(sign);
3572   } else if (myexponent==0x1f && mysignificand==0) {
3573     makeInf(sign);
3574   } else if (myexponent==0x1f && mysignificand!=0) {
3575     category = fcNaN;
3576     exponent = exponentNaN();
3577     *significandParts() = mysignificand;
3578   } else {
3579     category = fcNormal;
3580     exponent = myexponent - 15;  //bias
3581     *significandParts() = mysignificand;
3582     if (myexponent==0)    // denormal
3583       exponent = -14;
3584     else
3585       *significandParts() |= 0x400; // integer bit
3586   }
3587 }
3588 
3589 /// Treat api as containing the bits of a floating point number.  Currently
3590 /// we infer the floating point type from the size of the APInt.  The
3591 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3592 /// when the size is anything else).
initFromAPInt(const fltSemantics * Sem,const APInt & api)3593 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3594   if (Sem == &semIEEEhalf)
3595     return initFromHalfAPInt(api);
3596   if (Sem == &semBFloat)
3597     return initFromBFloatAPInt(api);
3598   if (Sem == &semIEEEsingle)
3599     return initFromFloatAPInt(api);
3600   if (Sem == &semIEEEdouble)
3601     return initFromDoubleAPInt(api);
3602   if (Sem == &semX87DoubleExtended)
3603     return initFromF80LongDoubleAPInt(api);
3604   if (Sem == &semIEEEquad)
3605     return initFromQuadrupleAPInt(api);
3606   if (Sem == &semPPCDoubleDoubleLegacy)
3607     return initFromPPCDoubleDoubleAPInt(api);
3608 
3609   llvm_unreachable(nullptr);
3610 }
3611 
3612 /// Make this number the largest magnitude normal number in the given
3613 /// semantics.
makeLargest(bool Negative)3614 void IEEEFloat::makeLargest(bool Negative) {
3615   // We want (in interchange format):
3616   //   sign = {Negative}
3617   //   exponent = 1..10
3618   //   significand = 1..1
3619   category = fcNormal;
3620   sign = Negative;
3621   exponent = semantics->maxExponent;
3622 
3623   // Use memset to set all but the highest integerPart to all ones.
3624   integerPart *significand = significandParts();
3625   unsigned PartCount = partCount();
3626   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3627 
3628   // Set the high integerPart especially setting all unused top bits for
3629   // internal consistency.
3630   const unsigned NumUnusedHighBits =
3631     PartCount*integerPartWidth - semantics->precision;
3632   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3633                                    ? (~integerPart(0) >> NumUnusedHighBits)
3634                                    : 0;
3635 }
3636 
3637 /// Make this number the smallest magnitude denormal number in the given
3638 /// semantics.
makeSmallest(bool Negative)3639 void IEEEFloat::makeSmallest(bool Negative) {
3640   // We want (in interchange format):
3641   //   sign = {Negative}
3642   //   exponent = 0..0
3643   //   significand = 0..01
3644   category = fcNormal;
3645   sign = Negative;
3646   exponent = semantics->minExponent;
3647   APInt::tcSet(significandParts(), 1, partCount());
3648 }
3649 
makeSmallestNormalized(bool Negative)3650 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3651   // We want (in interchange format):
3652   //   sign = {Negative}
3653   //   exponent = 0..0
3654   //   significand = 10..0
3655 
3656   category = fcNormal;
3657   zeroSignificand();
3658   sign = Negative;
3659   exponent = semantics->minExponent;
3660   significandParts()[partCountForBits(semantics->precision) - 1] |=
3661       (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3662 }
3663 
IEEEFloat(const fltSemantics & Sem,const APInt & API)3664 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3665   initFromAPInt(&Sem, API);
3666 }
3667 
IEEEFloat(float f)3668 IEEEFloat::IEEEFloat(float f) {
3669   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3670 }
3671 
IEEEFloat(double d)3672 IEEEFloat::IEEEFloat(double d) {
3673   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3674 }
3675 
3676 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3677   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3678     Buffer.append(Str.begin(), Str.end());
3679   }
3680 
3681   /// Removes data from the given significand until it is no more
3682   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3683   void AdjustToPrecision(APInt &significand,
3684                          int &exp, unsigned FormatPrecision) {
3685     unsigned bits = significand.getActiveBits();
3686 
3687     // 196/59 is a very slight overestimate of lg_2(10).
3688     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3689 
3690     if (bits <= bitsRequired) return;
3691 
3692     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3693     if (!tensRemovable) return;
3694 
3695     exp += tensRemovable;
3696 
3697     APInt divisor(significand.getBitWidth(), 1);
3698     APInt powten(significand.getBitWidth(), 10);
3699     while (true) {
3700       if (tensRemovable & 1)
3701         divisor *= powten;
3702       tensRemovable >>= 1;
3703       if (!tensRemovable) break;
3704       powten *= powten;
3705     }
3706 
3707     significand = significand.udiv(divisor);
3708 
3709     // Truncate the significand down to its active bit count.
3710     significand = significand.trunc(significand.getActiveBits());
3711   }
3712 
3713 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3714   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3715                          int &exp, unsigned FormatPrecision) {
3716     unsigned N = buffer.size();
3717     if (N <= FormatPrecision) return;
3718 
3719     // The most significant figures are the last ones in the buffer.
3720     unsigned FirstSignificant = N - FormatPrecision;
3721 
3722     // Round.
3723     // FIXME: this probably shouldn't use 'round half up'.
3724 
3725     // Rounding down is just a truncation, except we also want to drop
3726     // trailing zeros from the new result.
3727     if (buffer[FirstSignificant - 1] < '5') {
3728       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3729         FirstSignificant++;
3730 
3731       exp += FirstSignificant;
3732       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3733       return;
3734     }
3735 
3736     // Rounding up requires a decimal add-with-carry.  If we continue
3737     // the carry, the newly-introduced zeros will just be truncated.
3738     for (unsigned I = FirstSignificant; I != N; ++I) {
3739       if (buffer[I] == '9') {
3740         FirstSignificant++;
3741       } else {
3742         buffer[I]++;
3743         break;
3744       }
3745     }
3746 
3747     // If we carried through, we have exactly one digit of precision.
3748     if (FirstSignificant == N) {
3749       exp += FirstSignificant;
3750       buffer.clear();
3751       buffer.push_back('1');
3752       return;
3753     }
3754 
3755     exp += FirstSignificant;
3756     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3757   }
3758 } // namespace
3759 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const3760 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3761                          unsigned FormatMaxPadding, bool TruncateZero) const {
3762   switch (category) {
3763   case fcInfinity:
3764     if (isNegative())
3765       return append(Str, "-Inf");
3766     else
3767       return append(Str, "+Inf");
3768 
3769   case fcNaN: return append(Str, "NaN");
3770 
3771   case fcZero:
3772     if (isNegative())
3773       Str.push_back('-');
3774 
3775     if (!FormatMaxPadding) {
3776       if (TruncateZero)
3777         append(Str, "0.0E+0");
3778       else {
3779         append(Str, "0.0");
3780         if (FormatPrecision > 1)
3781           Str.append(FormatPrecision - 1, '0');
3782         append(Str, "e+00");
3783       }
3784     } else
3785       Str.push_back('0');
3786     return;
3787 
3788   case fcNormal:
3789     break;
3790   }
3791 
3792   if (isNegative())
3793     Str.push_back('-');
3794 
3795   // Decompose the number into an APInt and an exponent.
3796   int exp = exponent - ((int) semantics->precision - 1);
3797   APInt significand(semantics->precision,
3798                     makeArrayRef(significandParts(),
3799                                  partCountForBits(semantics->precision)));
3800 
3801   // Set FormatPrecision if zero.  We want to do this before we
3802   // truncate trailing zeros, as those are part of the precision.
3803   if (!FormatPrecision) {
3804     // We use enough digits so the number can be round-tripped back to an
3805     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3806     // Accurately" by Steele and White.
3807     // FIXME: Using a formula based purely on the precision is conservative;
3808     // we can print fewer digits depending on the actual value being printed.
3809 
3810     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3811     FormatPrecision = 2 + semantics->precision * 59 / 196;
3812   }
3813 
3814   // Ignore trailing binary zeros.
3815   int trailingZeros = significand.countTrailingZeros();
3816   exp += trailingZeros;
3817   significand.lshrInPlace(trailingZeros);
3818 
3819   // Change the exponent from 2^e to 10^e.
3820   if (exp == 0) {
3821     // Nothing to do.
3822   } else if (exp > 0) {
3823     // Just shift left.
3824     significand = significand.zext(semantics->precision + exp);
3825     significand <<= exp;
3826     exp = 0;
3827   } else { /* exp < 0 */
3828     int texp = -exp;
3829 
3830     // We transform this using the identity:
3831     //   (N)(2^-e) == (N)(5^e)(10^-e)
3832     // This means we have to multiply N (the significand) by 5^e.
3833     // To avoid overflow, we have to operate on numbers large
3834     // enough to store N * 5^e:
3835     //   log2(N * 5^e) == log2(N) + e * log2(5)
3836     //                 <= semantics->precision + e * 137 / 59
3837     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3838 
3839     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3840 
3841     // Multiply significand by 5^e.
3842     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3843     significand = significand.zext(precision);
3844     APInt five_to_the_i(precision, 5);
3845     while (true) {
3846       if (texp & 1) significand *= five_to_the_i;
3847 
3848       texp >>= 1;
3849       if (!texp) break;
3850       five_to_the_i *= five_to_the_i;
3851     }
3852   }
3853 
3854   AdjustToPrecision(significand, exp, FormatPrecision);
3855 
3856   SmallVector<char, 256> buffer;
3857 
3858   // Fill the buffer.
3859   unsigned precision = significand.getBitWidth();
3860   APInt ten(precision, 10);
3861   APInt digit(precision, 0);
3862 
3863   bool inTrail = true;
3864   while (significand != 0) {
3865     // digit <- significand % 10
3866     // significand <- significand / 10
3867     APInt::udivrem(significand, ten, significand, digit);
3868 
3869     unsigned d = digit.getZExtValue();
3870 
3871     // Drop trailing zeros.
3872     if (inTrail && !d) exp++;
3873     else {
3874       buffer.push_back((char) ('0' + d));
3875       inTrail = false;
3876     }
3877   }
3878 
3879   assert(!buffer.empty() && "no characters in buffer!");
3880 
3881   // Drop down to FormatPrecision.
3882   // TODO: don't do more precise calculations above than are required.
3883   AdjustToPrecision(buffer, exp, FormatPrecision);
3884 
3885   unsigned NDigits = buffer.size();
3886 
3887   // Check whether we should use scientific notation.
3888   bool FormatScientific;
3889   if (!FormatMaxPadding)
3890     FormatScientific = true;
3891   else {
3892     if (exp >= 0) {
3893       // 765e3 --> 765000
3894       //              ^^^
3895       // But we shouldn't make the number look more precise than it is.
3896       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3897                           NDigits + (unsigned) exp > FormatPrecision);
3898     } else {
3899       // Power of the most significant digit.
3900       int MSD = exp + (int) (NDigits - 1);
3901       if (MSD >= 0) {
3902         // 765e-2 == 7.65
3903         FormatScientific = false;
3904       } else {
3905         // 765e-5 == 0.00765
3906         //           ^ ^^
3907         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3908       }
3909     }
3910   }
3911 
3912   // Scientific formatting is pretty straightforward.
3913   if (FormatScientific) {
3914     exp += (NDigits - 1);
3915 
3916     Str.push_back(buffer[NDigits-1]);
3917     Str.push_back('.');
3918     if (NDigits == 1 && TruncateZero)
3919       Str.push_back('0');
3920     else
3921       for (unsigned I = 1; I != NDigits; ++I)
3922         Str.push_back(buffer[NDigits-1-I]);
3923     // Fill with zeros up to FormatPrecision.
3924     if (!TruncateZero && FormatPrecision > NDigits - 1)
3925       Str.append(FormatPrecision - NDigits + 1, '0');
3926     // For !TruncateZero we use lower 'e'.
3927     Str.push_back(TruncateZero ? 'E' : 'e');
3928 
3929     Str.push_back(exp >= 0 ? '+' : '-');
3930     if (exp < 0) exp = -exp;
3931     SmallVector<char, 6> expbuf;
3932     do {
3933       expbuf.push_back((char) ('0' + (exp % 10)));
3934       exp /= 10;
3935     } while (exp);
3936     // Exponent always at least two digits if we do not truncate zeros.
3937     if (!TruncateZero && expbuf.size() < 2)
3938       expbuf.push_back('0');
3939     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3940       Str.push_back(expbuf[E-1-I]);
3941     return;
3942   }
3943 
3944   // Non-scientific, positive exponents.
3945   if (exp >= 0) {
3946     for (unsigned I = 0; I != NDigits; ++I)
3947       Str.push_back(buffer[NDigits-1-I]);
3948     for (unsigned I = 0; I != (unsigned) exp; ++I)
3949       Str.push_back('0');
3950     return;
3951   }
3952 
3953   // Non-scientific, negative exponents.
3954 
3955   // The number of digits to the left of the decimal point.
3956   int NWholeDigits = exp + (int) NDigits;
3957 
3958   unsigned I = 0;
3959   if (NWholeDigits > 0) {
3960     for (; I != (unsigned) NWholeDigits; ++I)
3961       Str.push_back(buffer[NDigits-I-1]);
3962     Str.push_back('.');
3963   } else {
3964     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3965 
3966     Str.push_back('0');
3967     Str.push_back('.');
3968     for (unsigned Z = 1; Z != NZeros; ++Z)
3969       Str.push_back('0');
3970   }
3971 
3972   for (; I != NDigits; ++I)
3973     Str.push_back(buffer[NDigits-I-1]);
3974 }
3975 
getExactInverse(APFloat * inv) const3976 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3977   // Special floats and denormals have no exact inverse.
3978   if (!isFiniteNonZero())
3979     return false;
3980 
3981   // Check that the number is a power of two by making sure that only the
3982   // integer bit is set in the significand.
3983   if (significandLSB() != semantics->precision - 1)
3984     return false;
3985 
3986   // Get the inverse.
3987   IEEEFloat reciprocal(*semantics, 1ULL);
3988   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3989     return false;
3990 
3991   // Avoid multiplication with a denormal, it is not safe on all platforms and
3992   // may be slower than a normal division.
3993   if (reciprocal.isDenormal())
3994     return false;
3995 
3996   assert(reciprocal.isFiniteNonZero() &&
3997          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3998 
3999   if (inv)
4000     *inv = APFloat(reciprocal, *semantics);
4001 
4002   return true;
4003 }
4004 
isSignaling() const4005 bool IEEEFloat::isSignaling() const {
4006   if (!isNaN())
4007     return false;
4008 
4009   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4010   // first bit of the trailing significand being 0.
4011   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4012 }
4013 
4014 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4015 ///
4016 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4017 /// appropriate sign switching before/after the computation.
next(bool nextDown)4018 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4019   // If we are performing nextDown, swap sign so we have -x.
4020   if (nextDown)
4021     changeSign();
4022 
4023   // Compute nextUp(x)
4024   opStatus result = opOK;
4025 
4026   // Handle each float category separately.
4027   switch (category) {
4028   case fcInfinity:
4029     // nextUp(+inf) = +inf
4030     if (!isNegative())
4031       break;
4032     // nextUp(-inf) = -getLargest()
4033     makeLargest(true);
4034     break;
4035   case fcNaN:
4036     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4037     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4038     //                     change the payload.
4039     if (isSignaling()) {
4040       result = opInvalidOp;
4041       // For consistency, propagate the sign of the sNaN to the qNaN.
4042       makeNaN(false, isNegative(), nullptr);
4043     }
4044     break;
4045   case fcZero:
4046     // nextUp(pm 0) = +getSmallest()
4047     makeSmallest(false);
4048     break;
4049   case fcNormal:
4050     // nextUp(-getSmallest()) = -0
4051     if (isSmallest() && isNegative()) {
4052       APInt::tcSet(significandParts(), 0, partCount());
4053       category = fcZero;
4054       exponent = 0;
4055       break;
4056     }
4057 
4058     // nextUp(getLargest()) == INFINITY
4059     if (isLargest() && !isNegative()) {
4060       APInt::tcSet(significandParts(), 0, partCount());
4061       category = fcInfinity;
4062       exponent = semantics->maxExponent + 1;
4063       break;
4064     }
4065 
4066     // nextUp(normal) == normal + inc.
4067     if (isNegative()) {
4068       // If we are negative, we need to decrement the significand.
4069 
4070       // We only cross a binade boundary that requires adjusting the exponent
4071       // if:
4072       //   1. exponent != semantics->minExponent. This implies we are not in the
4073       //   smallest binade or are dealing with denormals.
4074       //   2. Our significand excluding the integral bit is all zeros.
4075       bool WillCrossBinadeBoundary =
4076         exponent != semantics->minExponent && isSignificandAllZeros();
4077 
4078       // Decrement the significand.
4079       //
4080       // We always do this since:
4081       //   1. If we are dealing with a non-binade decrement, by definition we
4082       //   just decrement the significand.
4083       //   2. If we are dealing with a normal -> normal binade decrement, since
4084       //   we have an explicit integral bit the fact that all bits but the
4085       //   integral bit are zero implies that subtracting one will yield a
4086       //   significand with 0 integral bit and 1 in all other spots. Thus we
4087       //   must just adjust the exponent and set the integral bit to 1.
4088       //   3. If we are dealing with a normal -> denormal binade decrement,
4089       //   since we set the integral bit to 0 when we represent denormals, we
4090       //   just decrement the significand.
4091       integerPart *Parts = significandParts();
4092       APInt::tcDecrement(Parts, partCount());
4093 
4094       if (WillCrossBinadeBoundary) {
4095         // Our result is a normal number. Do the following:
4096         // 1. Set the integral bit to 1.
4097         // 2. Decrement the exponent.
4098         APInt::tcSetBit(Parts, semantics->precision - 1);
4099         exponent--;
4100       }
4101     } else {
4102       // If we are positive, we need to increment the significand.
4103 
4104       // We only cross a binade boundary that requires adjusting the exponent if
4105       // the input is not a denormal and all of said input's significand bits
4106       // are set. If all of said conditions are true: clear the significand, set
4107       // the integral bit to 1, and increment the exponent. If we have a
4108       // denormal always increment since moving denormals and the numbers in the
4109       // smallest normal binade have the same exponent in our representation.
4110       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4111 
4112       if (WillCrossBinadeBoundary) {
4113         integerPart *Parts = significandParts();
4114         APInt::tcSet(Parts, 0, partCount());
4115         APInt::tcSetBit(Parts, semantics->precision - 1);
4116         assert(exponent != semantics->maxExponent &&
4117                "We can not increment an exponent beyond the maxExponent allowed"
4118                " by the given floating point semantics.");
4119         exponent++;
4120       } else {
4121         incrementSignificand();
4122       }
4123     }
4124     break;
4125   }
4126 
4127   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4128   if (nextDown)
4129     changeSign();
4130 
4131   return result;
4132 }
4133 
exponentNaN() const4134 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4135   return semantics->maxExponent + 1;
4136 }
4137 
exponentInf() const4138 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4139   return semantics->maxExponent + 1;
4140 }
4141 
exponentZero() const4142 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4143   return semantics->minExponent - 1;
4144 }
4145 
makeInf(bool Negative)4146 void IEEEFloat::makeInf(bool Negative) {
4147   category = fcInfinity;
4148   sign = Negative;
4149   exponent = exponentInf();
4150   APInt::tcSet(significandParts(), 0, partCount());
4151 }
4152 
makeZero(bool Negative)4153 void IEEEFloat::makeZero(bool Negative) {
4154   category = fcZero;
4155   sign = Negative;
4156   exponent = exponentZero();
4157   APInt::tcSet(significandParts(), 0, partCount());
4158 }
4159 
makeQuiet()4160 void IEEEFloat::makeQuiet() {
4161   assert(isNaN());
4162   APInt::tcSetBit(significandParts(), semantics->precision - 2);
4163 }
4164 
ilogb(const IEEEFloat & Arg)4165 int ilogb(const IEEEFloat &Arg) {
4166   if (Arg.isNaN())
4167     return IEEEFloat::IEK_NaN;
4168   if (Arg.isZero())
4169     return IEEEFloat::IEK_Zero;
4170   if (Arg.isInfinity())
4171     return IEEEFloat::IEK_Inf;
4172   if (!Arg.isDenormal())
4173     return Arg.exponent;
4174 
4175   IEEEFloat Normalized(Arg);
4176   int SignificandBits = Arg.getSemantics().precision - 1;
4177 
4178   Normalized.exponent += SignificandBits;
4179   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4180   return Normalized.exponent - SignificandBits;
4181 }
4182 
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)4183 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4184   auto MaxExp = X.getSemantics().maxExponent;
4185   auto MinExp = X.getSemantics().minExponent;
4186 
4187   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4188   // overflow; clamp it to a safe range before adding, but ensure that the range
4189   // is large enough that the clamp does not change the result. The range we
4190   // need to support is the difference between the largest possible exponent and
4191   // the normalized exponent of half the smallest denormal.
4192 
4193   int SignificandBits = X.getSemantics().precision - 1;
4194   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4195 
4196   // Clamp to one past the range ends to let normalize handle overlflow.
4197   X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
4198   X.normalize(RoundingMode, lfExactlyZero);
4199   if (X.isNaN())
4200     X.makeQuiet();
4201   return X;
4202 }
4203 
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)4204 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4205   Exp = ilogb(Val);
4206 
4207   // Quiet signalling nans.
4208   if (Exp == IEEEFloat::IEK_NaN) {
4209     IEEEFloat Quiet(Val);
4210     Quiet.makeQuiet();
4211     return Quiet;
4212   }
4213 
4214   if (Exp == IEEEFloat::IEK_Inf)
4215     return Val;
4216 
4217   // 1 is added because frexp is defined to return a normalized fraction in
4218   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4219   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4220   return scalbn(Val, -Exp, RM);
4221 }
4222 
DoubleAPFloat(const fltSemantics & S)4223 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4224     : Semantics(&S),
4225       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4226   assert(Semantics == &semPPCDoubleDouble);
4227 }
4228 
DoubleAPFloat(const fltSemantics & S,uninitializedTag)4229 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4230     : Semantics(&S),
4231       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4232                             APFloat(semIEEEdouble, uninitialized)}) {
4233   assert(Semantics == &semPPCDoubleDouble);
4234 }
4235 
DoubleAPFloat(const fltSemantics & S,integerPart I)4236 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4237     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4238                                            APFloat(semIEEEdouble)}) {
4239   assert(Semantics == &semPPCDoubleDouble);
4240 }
4241 
DoubleAPFloat(const fltSemantics & S,const APInt & I)4242 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4243     : Semantics(&S),
4244       Floats(new APFloat[2]{
4245           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4246           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4247   assert(Semantics == &semPPCDoubleDouble);
4248 }
4249 
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)4250 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4251                              APFloat &&Second)
4252     : Semantics(&S),
4253       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4254   assert(Semantics == &semPPCDoubleDouble);
4255   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4256   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4257 }
4258 
DoubleAPFloat(const DoubleAPFloat & RHS)4259 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4260     : Semantics(RHS.Semantics),
4261       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4262                                          APFloat(RHS.Floats[1])}
4263                         : nullptr) {
4264   assert(Semantics == &semPPCDoubleDouble);
4265 }
4266 
DoubleAPFloat(DoubleAPFloat && RHS)4267 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4268     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4269   RHS.Semantics = &semBogus;
4270   assert(Semantics == &semPPCDoubleDouble);
4271 }
4272 
operator =(const DoubleAPFloat & RHS)4273 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4274   if (Semantics == RHS.Semantics && RHS.Floats) {
4275     Floats[0] = RHS.Floats[0];
4276     Floats[1] = RHS.Floats[1];
4277   } else if (this != &RHS) {
4278     this->~DoubleAPFloat();
4279     new (this) DoubleAPFloat(RHS);
4280   }
4281   return *this;
4282 }
4283 
4284 // Implement addition, subtraction, multiplication and division based on:
4285 // "Software for Doubled-Precision Floating-Point Computations",
4286 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)4287 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4288                                          const APFloat &c, const APFloat &cc,
4289                                          roundingMode RM) {
4290   int Status = opOK;
4291   APFloat z = a;
4292   Status |= z.add(c, RM);
4293   if (!z.isFinite()) {
4294     if (!z.isInfinity()) {
4295       Floats[0] = std::move(z);
4296       Floats[1].makeZero(/* Neg = */ false);
4297       return (opStatus)Status;
4298     }
4299     Status = opOK;
4300     auto AComparedToC = a.compareAbsoluteValue(c);
4301     z = cc;
4302     Status |= z.add(aa, RM);
4303     if (AComparedToC == APFloat::cmpGreaterThan) {
4304       // z = cc + aa + c + a;
4305       Status |= z.add(c, RM);
4306       Status |= z.add(a, RM);
4307     } else {
4308       // z = cc + aa + a + c;
4309       Status |= z.add(a, RM);
4310       Status |= z.add(c, RM);
4311     }
4312     if (!z.isFinite()) {
4313       Floats[0] = std::move(z);
4314       Floats[1].makeZero(/* Neg = */ false);
4315       return (opStatus)Status;
4316     }
4317     Floats[0] = z;
4318     APFloat zz = aa;
4319     Status |= zz.add(cc, RM);
4320     if (AComparedToC == APFloat::cmpGreaterThan) {
4321       // Floats[1] = a - z + c + zz;
4322       Floats[1] = a;
4323       Status |= Floats[1].subtract(z, RM);
4324       Status |= Floats[1].add(c, RM);
4325       Status |= Floats[1].add(zz, RM);
4326     } else {
4327       // Floats[1] = c - z + a + zz;
4328       Floats[1] = c;
4329       Status |= Floats[1].subtract(z, RM);
4330       Status |= Floats[1].add(a, RM);
4331       Status |= Floats[1].add(zz, RM);
4332     }
4333   } else {
4334     // q = a - z;
4335     APFloat q = a;
4336     Status |= q.subtract(z, RM);
4337 
4338     // zz = q + c + (a - (q + z)) + aa + cc;
4339     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4340     auto zz = q;
4341     Status |= zz.add(c, RM);
4342     Status |= q.add(z, RM);
4343     Status |= q.subtract(a, RM);
4344     q.changeSign();
4345     Status |= zz.add(q, RM);
4346     Status |= zz.add(aa, RM);
4347     Status |= zz.add(cc, RM);
4348     if (zz.isZero() && !zz.isNegative()) {
4349       Floats[0] = std::move(z);
4350       Floats[1].makeZero(/* Neg = */ false);
4351       return opOK;
4352     }
4353     Floats[0] = z;
4354     Status |= Floats[0].add(zz, RM);
4355     if (!Floats[0].isFinite()) {
4356       Floats[1].makeZero(/* Neg = */ false);
4357       return (opStatus)Status;
4358     }
4359     Floats[1] = std::move(z);
4360     Status |= Floats[1].subtract(Floats[0], RM);
4361     Status |= Floats[1].add(zz, RM);
4362   }
4363   return (opStatus)Status;
4364 }
4365 
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4366 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4367                                                 const DoubleAPFloat &RHS,
4368                                                 DoubleAPFloat &Out,
4369                                                 roundingMode RM) {
4370   if (LHS.getCategory() == fcNaN) {
4371     Out = LHS;
4372     return opOK;
4373   }
4374   if (RHS.getCategory() == fcNaN) {
4375     Out = RHS;
4376     return opOK;
4377   }
4378   if (LHS.getCategory() == fcZero) {
4379     Out = RHS;
4380     return opOK;
4381   }
4382   if (RHS.getCategory() == fcZero) {
4383     Out = LHS;
4384     return opOK;
4385   }
4386   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4387       LHS.isNegative() != RHS.isNegative()) {
4388     Out.makeNaN(false, Out.isNegative(), nullptr);
4389     return opInvalidOp;
4390   }
4391   if (LHS.getCategory() == fcInfinity) {
4392     Out = LHS;
4393     return opOK;
4394   }
4395   if (RHS.getCategory() == fcInfinity) {
4396     Out = RHS;
4397     return opOK;
4398   }
4399   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4400 
4401   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4402       CC(RHS.Floats[1]);
4403   assert(&A.getSemantics() == &semIEEEdouble);
4404   assert(&AA.getSemantics() == &semIEEEdouble);
4405   assert(&C.getSemantics() == &semIEEEdouble);
4406   assert(&CC.getSemantics() == &semIEEEdouble);
4407   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4408   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4409   return Out.addImpl(A, AA, C, CC, RM);
4410 }
4411 
add(const DoubleAPFloat & RHS,roundingMode RM)4412 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4413                                      roundingMode RM) {
4414   return addWithSpecial(*this, RHS, *this, RM);
4415 }
4416 
subtract(const DoubleAPFloat & RHS,roundingMode RM)4417 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4418                                           roundingMode RM) {
4419   changeSign();
4420   auto Ret = add(RHS, RM);
4421   changeSign();
4422   return Ret;
4423 }
4424 
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4425 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4426                                           APFloat::roundingMode RM) {
4427   const auto &LHS = *this;
4428   auto &Out = *this;
4429   /* Interesting observation: For special categories, finding the lowest
4430      common ancestor of the following layered graph gives the correct
4431      return category:
4432 
4433         NaN
4434        /   \
4435      Zero  Inf
4436        \   /
4437        Normal
4438 
4439      e.g. NaN * NaN = NaN
4440           Zero * Inf = NaN
4441           Normal * Zero = Zero
4442           Normal * Inf = Inf
4443   */
4444   if (LHS.getCategory() == fcNaN) {
4445     Out = LHS;
4446     return opOK;
4447   }
4448   if (RHS.getCategory() == fcNaN) {
4449     Out = RHS;
4450     return opOK;
4451   }
4452   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4453       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4454     Out.makeNaN(false, false, nullptr);
4455     return opOK;
4456   }
4457   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4458     Out = LHS;
4459     return opOK;
4460   }
4461   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4462     Out = RHS;
4463     return opOK;
4464   }
4465   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4466          "Special cases not handled exhaustively");
4467 
4468   int Status = opOK;
4469   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4470   // t = a * c
4471   APFloat T = A;
4472   Status |= T.multiply(C, RM);
4473   if (!T.isFiniteNonZero()) {
4474     Floats[0] = T;
4475     Floats[1].makeZero(/* Neg = */ false);
4476     return (opStatus)Status;
4477   }
4478 
4479   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4480   APFloat Tau = A;
4481   T.changeSign();
4482   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4483   T.changeSign();
4484   {
4485     // v = a * d
4486     APFloat V = A;
4487     Status |= V.multiply(D, RM);
4488     // w = b * c
4489     APFloat W = B;
4490     Status |= W.multiply(C, RM);
4491     Status |= V.add(W, RM);
4492     // tau += v + w
4493     Status |= Tau.add(V, RM);
4494   }
4495   // u = t + tau
4496   APFloat U = T;
4497   Status |= U.add(Tau, RM);
4498 
4499   Floats[0] = U;
4500   if (!U.isFinite()) {
4501     Floats[1].makeZero(/* Neg = */ false);
4502   } else {
4503     // Floats[1] = (t - u) + tau
4504     Status |= T.subtract(U, RM);
4505     Status |= T.add(Tau, RM);
4506     Floats[1] = T;
4507   }
4508   return (opStatus)Status;
4509 }
4510 
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4511 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4512                                         APFloat::roundingMode RM) {
4513   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4514   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4515   auto Ret =
4516       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4517   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4518   return Ret;
4519 }
4520 
remainder(const DoubleAPFloat & RHS)4521 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4522   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4523   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4524   auto Ret =
4525       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4526   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4527   return Ret;
4528 }
4529 
mod(const DoubleAPFloat & RHS)4530 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4531   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4532   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4533   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4534   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4535   return Ret;
4536 }
4537 
4538 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4539 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4540                                 const DoubleAPFloat &Addend,
4541                                 APFloat::roundingMode RM) {
4542   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4543   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4544   auto Ret = Tmp.fusedMultiplyAdd(
4545       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4546       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4547   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4548   return Ret;
4549 }
4550 
roundToIntegral(APFloat::roundingMode RM)4551 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4552   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4553   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4554   auto Ret = Tmp.roundToIntegral(RM);
4555   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4556   return Ret;
4557 }
4558 
changeSign()4559 void DoubleAPFloat::changeSign() {
4560   Floats[0].changeSign();
4561   Floats[1].changeSign();
4562 }
4563 
4564 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4565 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4566   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4567   if (Result != cmpEqual)
4568     return Result;
4569   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4570   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4571     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4572     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4573     if (Against && !RHSAgainst)
4574       return cmpLessThan;
4575     if (!Against && RHSAgainst)
4576       return cmpGreaterThan;
4577     if (!Against && !RHSAgainst)
4578       return Result;
4579     if (Against && RHSAgainst)
4580       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4581   }
4582   return Result;
4583 }
4584 
getCategory() const4585 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4586   return Floats[0].getCategory();
4587 }
4588 
isNegative() const4589 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4590 
makeInf(bool Neg)4591 void DoubleAPFloat::makeInf(bool Neg) {
4592   Floats[0].makeInf(Neg);
4593   Floats[1].makeZero(/* Neg = */ false);
4594 }
4595 
makeZero(bool Neg)4596 void DoubleAPFloat::makeZero(bool Neg) {
4597   Floats[0].makeZero(Neg);
4598   Floats[1].makeZero(/* Neg = */ false);
4599 }
4600 
makeLargest(bool Neg)4601 void DoubleAPFloat::makeLargest(bool Neg) {
4602   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4603   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4604   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4605   if (Neg)
4606     changeSign();
4607 }
4608 
makeSmallest(bool Neg)4609 void DoubleAPFloat::makeSmallest(bool Neg) {
4610   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4611   Floats[0].makeSmallest(Neg);
4612   Floats[1].makeZero(/* Neg = */ false);
4613 }
4614 
makeSmallestNormalized(bool Neg)4615 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4616   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4617   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4618   if (Neg)
4619     Floats[0].changeSign();
4620   Floats[1].makeZero(/* Neg = */ false);
4621 }
4622 
makeNaN(bool SNaN,bool Neg,const APInt * fill)4623 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4624   Floats[0].makeNaN(SNaN, Neg, fill);
4625   Floats[1].makeZero(/* Neg = */ false);
4626 }
4627 
compare(const DoubleAPFloat & RHS) const4628 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4629   auto Result = Floats[0].compare(RHS.Floats[0]);
4630   // |Float[0]| > |Float[1]|
4631   if (Result == APFloat::cmpEqual)
4632     return Floats[1].compare(RHS.Floats[1]);
4633   return Result;
4634 }
4635 
bitwiseIsEqual(const DoubleAPFloat & RHS) const4636 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4637   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4638          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4639 }
4640 
hash_value(const DoubleAPFloat & Arg)4641 hash_code hash_value(const DoubleAPFloat &Arg) {
4642   if (Arg.Floats)
4643     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4644   return hash_combine(Arg.Semantics);
4645 }
4646 
bitcastToAPInt() const4647 APInt DoubleAPFloat::bitcastToAPInt() const {
4648   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4649   uint64_t Data[] = {
4650       Floats[0].bitcastToAPInt().getRawData()[0],
4651       Floats[1].bitcastToAPInt().getRawData()[0],
4652   };
4653   return APInt(128, 2, Data);
4654 }
4655 
convertFromString(StringRef S,roundingMode RM)4656 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4657                                                              roundingMode RM) {
4658   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4659   APFloat Tmp(semPPCDoubleDoubleLegacy);
4660   auto Ret = Tmp.convertFromString(S, RM);
4661   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4662   return Ret;
4663 }
4664 
next(bool nextDown)4665 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4666   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4667   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4668   auto Ret = Tmp.next(nextDown);
4669   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4670   return Ret;
4671 }
4672 
4673 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4674 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4675                                 unsigned int Width, bool IsSigned,
4676                                 roundingMode RM, bool *IsExact) const {
4677   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4678   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4679       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4680 }
4681 
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4682 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4683                                                   bool IsSigned,
4684                                                   roundingMode RM) {
4685   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4686   APFloat Tmp(semPPCDoubleDoubleLegacy);
4687   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4688   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4689   return Ret;
4690 }
4691 
4692 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4693 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4694                                               unsigned int InputSize,
4695                                               bool IsSigned, roundingMode RM) {
4696   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4697   APFloat Tmp(semPPCDoubleDoubleLegacy);
4698   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4699   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4700   return Ret;
4701 }
4702 
4703 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4704 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4705                                               unsigned int InputSize,
4706                                               bool IsSigned, roundingMode RM) {
4707   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4708   APFloat Tmp(semPPCDoubleDoubleLegacy);
4709   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4710   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4711   return Ret;
4712 }
4713 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4714 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4715                                                unsigned int HexDigits,
4716                                                bool UpperCase,
4717                                                roundingMode RM) const {
4718   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4719   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4720       .convertToHexString(DST, HexDigits, UpperCase, RM);
4721 }
4722 
isDenormal() const4723 bool DoubleAPFloat::isDenormal() const {
4724   return getCategory() == fcNormal &&
4725          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4726           // (double)(Hi + Lo) == Hi defines a normal number.
4727           Floats[0] != Floats[0] + Floats[1]);
4728 }
4729 
isSmallest() const4730 bool DoubleAPFloat::isSmallest() const {
4731   if (getCategory() != fcNormal)
4732     return false;
4733   DoubleAPFloat Tmp(*this);
4734   Tmp.makeSmallest(this->isNegative());
4735   return Tmp.compare(*this) == cmpEqual;
4736 }
4737 
isLargest() const4738 bool DoubleAPFloat::isLargest() const {
4739   if (getCategory() != fcNormal)
4740     return false;
4741   DoubleAPFloat Tmp(*this);
4742   Tmp.makeLargest(this->isNegative());
4743   return Tmp.compare(*this) == cmpEqual;
4744 }
4745 
isInteger() const4746 bool DoubleAPFloat::isInteger() const {
4747   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4748   return Floats[0].isInteger() && Floats[1].isInteger();
4749 }
4750 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4751 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4752                              unsigned FormatPrecision,
4753                              unsigned FormatMaxPadding,
4754                              bool TruncateZero) const {
4755   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4756   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4757       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4758 }
4759 
getExactInverse(APFloat * inv) const4760 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4761   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4762   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4763   if (!inv)
4764     return Tmp.getExactInverse(nullptr);
4765   APFloat Inv(semPPCDoubleDoubleLegacy);
4766   auto Ret = Tmp.getExactInverse(&Inv);
4767   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4768   return Ret;
4769 }
4770 
scalbn(const DoubleAPFloat & Arg,int Exp,APFloat::roundingMode RM)4771 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
4772                      APFloat::roundingMode RM) {
4773   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4774   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4775                        scalbn(Arg.Floats[1], Exp, RM));
4776 }
4777 
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)4778 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4779                     APFloat::roundingMode RM) {
4780   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4781   APFloat First = frexp(Arg.Floats[0], Exp, RM);
4782   APFloat Second = Arg.Floats[1];
4783   if (Arg.getCategory() == APFloat::fcNormal)
4784     Second = scalbn(Second, -Exp, RM);
4785   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4786 }
4787 
4788 } // namespace detail
4789 
Storage(IEEEFloat F,const fltSemantics & Semantics)4790 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4791   if (usesLayout<IEEEFloat>(Semantics)) {
4792     new (&IEEE) IEEEFloat(std::move(F));
4793     return;
4794   }
4795   if (usesLayout<DoubleAPFloat>(Semantics)) {
4796     const fltSemantics& S = F.getSemantics();
4797     new (&Double)
4798         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4799                       APFloat(semIEEEdouble));
4800     return;
4801   }
4802   llvm_unreachable("Unexpected semantics");
4803 }
4804 
convertFromString(StringRef Str,roundingMode RM)4805 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4806                                                        roundingMode RM) {
4807   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4808 }
4809 
hash_value(const APFloat & Arg)4810 hash_code hash_value(const APFloat &Arg) {
4811   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4812     return hash_value(Arg.U.IEEE);
4813   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4814     return hash_value(Arg.U.Double);
4815   llvm_unreachable("Unexpected semantics");
4816 }
4817 
APFloat(const fltSemantics & Semantics,StringRef S)4818 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4819     : APFloat(Semantics) {
4820   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4821   assert(StatusOrErr && "Invalid floating point representation");
4822   consumeError(StatusOrErr.takeError());
4823 }
4824 
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)4825 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4826                                    roundingMode RM, bool *losesInfo) {
4827   if (&getSemantics() == &ToSemantics) {
4828     *losesInfo = false;
4829     return opOK;
4830   }
4831   if (usesLayout<IEEEFloat>(getSemantics()) &&
4832       usesLayout<IEEEFloat>(ToSemantics))
4833     return U.IEEE.convert(ToSemantics, RM, losesInfo);
4834   if (usesLayout<IEEEFloat>(getSemantics()) &&
4835       usesLayout<DoubleAPFloat>(ToSemantics)) {
4836     assert(&ToSemantics == &semPPCDoubleDouble);
4837     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4838     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4839     return Ret;
4840   }
4841   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4842       usesLayout<IEEEFloat>(ToSemantics)) {
4843     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4844     *this = APFloat(std::move(getIEEE()), ToSemantics);
4845     return Ret;
4846   }
4847   llvm_unreachable("Unexpected semantics");
4848 }
4849 
getAllOnesValue(const fltSemantics & Semantics,unsigned BitWidth)4850 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics,
4851                                  unsigned BitWidth) {
4852   return APFloat(Semantics, APInt::getAllOnesValue(BitWidth));
4853 }
4854 
print(raw_ostream & OS) const4855 void APFloat::print(raw_ostream &OS) const {
4856   SmallVector<char, 16> Buffer;
4857   toString(Buffer);
4858   OS << Buffer << "\n";
4859 }
4860 
4861 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const4862 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4863 #endif
4864 
Profile(FoldingSetNodeID & NID) const4865 void APFloat::Profile(FoldingSetNodeID &NID) const {
4866   NID.Add(bitcastToAPInt());
4867 }
4868 
4869 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4870    an APSInt, whose initial bit-width and signed-ness are used to determine the
4871    precision of the conversion.
4872  */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const4873 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4874                                             roundingMode rounding_mode,
4875                                             bool *isExact) const {
4876   unsigned bitWidth = result.getBitWidth();
4877   SmallVector<uint64_t, 4> parts(result.getNumWords());
4878   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4879                                      rounding_mode, isExact);
4880   // Keeps the original signed-ness.
4881   result = APInt(bitWidth, parts);
4882   return status;
4883 }
4884 
convertToDouble() const4885 double APFloat::convertToDouble() const {
4886   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
4887     return getIEEE().convertToDouble();
4888   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
4889          "Float semantics is not representable by IEEEdouble");
4890   APFloat Temp = *this;
4891   bool LosesInfo;
4892   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
4893   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
4894   (void)St;
4895   return Temp.getIEEE().convertToDouble();
4896 }
4897 
convertToFloat() const4898 float APFloat::convertToFloat() const {
4899   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
4900     return getIEEE().convertToFloat();
4901   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
4902          "Float semantics is not representable by IEEEsingle");
4903   APFloat Temp = *this;
4904   bool LosesInfo;
4905   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
4906   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
4907   (void)St;
4908   return Temp.getIEEE().convertToFloat();
4909 }
4910 
4911 } // namespace llvm
4912 
4913 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4914