xref: /openbsd/gnu/llvm/llvm/lib/Support/APFloat.cpp (revision d415bd75)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28 
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
30   do {                                                                         \
31     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
32       return U.IEEE.METHOD_CALL;                                               \
33     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
34       return U.Double.METHOD_CALL;                                             \
35     llvm_unreachable("Unexpected semantics");                                  \
36   } while (false)
37 
38 using namespace llvm;
39 
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47 
48 /* Assumed in hexadecimal significand parsing, and conversion to
49    hexadecimal strings.  */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51 
52 namespace llvm {
53 
54   // How the nonfinite values Inf and NaN are represented.
55   enum class fltNonfiniteBehavior {
56     // Represents standard IEEE 754 behavior. A value is nonfinite if the
57     // exponent field is all 1s. In such cases, a value is Inf if the
58     // significand bits are all zero, and NaN otherwise
59     IEEE754,
60 
61     // Only the Float8E5M2 has this behavior. There is no Inf representation. A
62     // value is NaN if the exponent field and the mantissa field are all 1s.
63     // This behavior matches the FP8 E4M3 type described in
64     // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
65     // as non-signalling, although the paper does not state whether the NaN
66     // values are signalling or not.
67     NanOnly,
68   };
69 
70   /* Represents floating point arithmetic semantics.  */
71   struct fltSemantics {
72     /* The largest E such that 2^E is representable; this matches the
73        definition of IEEE 754.  */
74     APFloatBase::ExponentType maxExponent;
75 
76     /* The smallest E such that 2^E is a normalized number; this
77        matches the definition of IEEE 754.  */
78     APFloatBase::ExponentType minExponent;
79 
80     /* Number of bits in the significand.  This includes the integer
81        bit.  */
82     unsigned int precision;
83 
84     /* Number of bits actually used in the semantics. */
85     unsigned int sizeInBits;
86 
87     fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
88 
89     // Returns true if any number described by this semantics can be precisely
90     // represented by the specified semantics. Does not take into account
91     // the value of fltNonfiniteBehavior.
isRepresentableByllvm::fltSemantics92     bool isRepresentableBy(const fltSemantics &S) const {
93       return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
94              precision <= S.precision;
95     }
96   };
97 
98   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
99   static const fltSemantics semBFloat = {127, -126, 8, 16};
100   static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
101   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
102   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
103   static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
104   static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
105                                                fltNonfiniteBehavior::NanOnly};
106   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
107   static const fltSemantics semBogus = {0, 0, 0, 0};
108 
109   /* The IBM double-double semantics. Such a number consists of a pair of IEEE
110      64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
111      (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
112      Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
113      to each other, and two 11-bit exponents.
114 
115      Note: we need to make the value different from semBogus as otherwise
116      an unsafe optimization may collapse both values to a single address,
117      and we heavily rely on them having distinct addresses.             */
118   static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
119 
120   /* These are legacy semantics for the fallback, inaccrurate implementation of
121      IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
122      operation. It's equivalent to having an IEEE number with consecutive 106
123      bits of mantissa and 11 bits of exponent.
124 
125      It's not equivalent to IBM double-double. For example, a legit IBM
126      double-double, 1 + epsilon:
127 
128        1 + epsilon = 1 + (1 >> 1076)
129 
130      is not representable by a consecutive 106 bits of mantissa.
131 
132      Currently, these semantics are used in the following way:
133 
134        semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
135        (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
136        semPPCDoubleDoubleLegacy -> IEEE operations
137 
138      We use bitcastToAPInt() to get the bit representation (in APInt) of the
139      underlying IEEEdouble, then use the APInt constructor to construct the
140      legacy IEEE float.
141 
142      TODO: Implement all operations in semPPCDoubleDouble, and delete these
143      semantics.  */
144   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
145                                                         53 + 53, 128};
146 
EnumToSemantics(Semantics S)147   const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
148     switch (S) {
149     case S_IEEEhalf:
150       return IEEEhalf();
151     case S_BFloat:
152       return BFloat();
153     case S_IEEEsingle:
154       return IEEEsingle();
155     case S_IEEEdouble:
156       return IEEEdouble();
157     case S_IEEEquad:
158       return IEEEquad();
159     case S_PPCDoubleDouble:
160       return PPCDoubleDouble();
161     case S_Float8E5M2:
162       return Float8E5M2();
163     case S_Float8E4M3FN:
164       return Float8E4M3FN();
165     case S_x87DoubleExtended:
166       return x87DoubleExtended();
167     }
168     llvm_unreachable("Unrecognised floating semantics");
169   }
170 
171   APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)172   APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
173     if (&Sem == &llvm::APFloat::IEEEhalf())
174       return S_IEEEhalf;
175     else if (&Sem == &llvm::APFloat::BFloat())
176       return S_BFloat;
177     else if (&Sem == &llvm::APFloat::IEEEsingle())
178       return S_IEEEsingle;
179     else if (&Sem == &llvm::APFloat::IEEEdouble())
180       return S_IEEEdouble;
181     else if (&Sem == &llvm::APFloat::IEEEquad())
182       return S_IEEEquad;
183     else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
184       return S_PPCDoubleDouble;
185     else if (&Sem == &llvm::APFloat::Float8E5M2())
186       return S_Float8E5M2;
187     else if (&Sem == &llvm::APFloat::Float8E4M3FN())
188       return S_Float8E4M3FN;
189     else if (&Sem == &llvm::APFloat::x87DoubleExtended())
190       return S_x87DoubleExtended;
191     else
192       llvm_unreachable("Unknown floating semantics");
193   }
194 
IEEEhalf()195   const fltSemantics &APFloatBase::IEEEhalf() {
196     return semIEEEhalf;
197   }
BFloat()198   const fltSemantics &APFloatBase::BFloat() {
199     return semBFloat;
200   }
IEEEsingle()201   const fltSemantics &APFloatBase::IEEEsingle() {
202     return semIEEEsingle;
203   }
IEEEdouble()204   const fltSemantics &APFloatBase::IEEEdouble() {
205     return semIEEEdouble;
206   }
IEEEquad()207   const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
PPCDoubleDouble()208   const fltSemantics &APFloatBase::PPCDoubleDouble() {
209     return semPPCDoubleDouble;
210   }
Float8E5M2()211   const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
Float8E4M3FN()212   const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
x87DoubleExtended()213   const fltSemantics &APFloatBase::x87DoubleExtended() {
214     return semX87DoubleExtended;
215   }
Bogus()216   const fltSemantics &APFloatBase::Bogus() { return semBogus; }
217 
218   constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
219   constexpr RoundingMode APFloatBase::rmTowardPositive;
220   constexpr RoundingMode APFloatBase::rmTowardNegative;
221   constexpr RoundingMode APFloatBase::rmTowardZero;
222   constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
223 
224   /* A tight upper bound on number of parts required to hold the value
225      pow(5, power) is
226 
227        power * 815 / (351 * integerPartWidth) + 1
228 
229      However, whilst the result may require only this many parts,
230      because we are multiplying two values to get it, the
231      multiplication may require an extra part with the excess part
232      being zero (consider the trivial case of 1 * 1, tcFullMultiply
233      requires two parts to hold the single-part result).  So we add an
234      extra one to guarantee enough space whilst multiplying.  */
235   const unsigned int maxExponent = 16383;
236   const unsigned int maxPrecision = 113;
237   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
238   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
239 
semanticsPrecision(const fltSemantics & semantics)240   unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
241     return semantics.precision;
242   }
243   APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)244   APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
245     return semantics.maxExponent;
246   }
247   APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)248   APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
249     return semantics.minExponent;
250   }
semanticsSizeInBits(const fltSemantics & semantics)251   unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
252     return semantics.sizeInBits;
253   }
254 
getSizeInBits(const fltSemantics & Sem)255   unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
256     return Sem.sizeInBits;
257 }
258 
259 /* A bunch of private, handy routines.  */
260 
createError(const Twine & Err)261 static inline Error createError(const Twine &Err) {
262   return make_error<StringError>(Err, inconvertibleErrorCode());
263 }
264 
265 static inline unsigned int
partCountForBits(unsigned int bits)266 partCountForBits(unsigned int bits)
267 {
268   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
269 }
270 
271 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
272 static inline unsigned int
decDigitValue(unsigned int c)273 decDigitValue(unsigned int c)
274 {
275   return c - '0';
276 }
277 
278 /* Return the value of a decimal exponent of the form
279    [+-]ddddddd.
280 
281    If the exponent overflows, returns a large exponent with the
282    appropriate sign.  */
readExponent(StringRef::iterator begin,StringRef::iterator end)283 static Expected<int> readExponent(StringRef::iterator begin,
284                                   StringRef::iterator end) {
285   bool isNegative;
286   unsigned int absExponent;
287   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
288   StringRef::iterator p = begin;
289 
290   // Treat no exponent as 0 to match binutils
291   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
292     return 0;
293   }
294 
295   isNegative = (*p == '-');
296   if (*p == '-' || *p == '+') {
297     p++;
298     if (p == end)
299       return createError("Exponent has no digits");
300   }
301 
302   absExponent = decDigitValue(*p++);
303   if (absExponent >= 10U)
304     return createError("Invalid character in exponent");
305 
306   for (; p != end; ++p) {
307     unsigned int value;
308 
309     value = decDigitValue(*p);
310     if (value >= 10U)
311       return createError("Invalid character in exponent");
312 
313     absExponent = absExponent * 10U + value;
314     if (absExponent >= overlargeExponent) {
315       absExponent = overlargeExponent;
316       break;
317     }
318   }
319 
320   if (isNegative)
321     return -(int) absExponent;
322   else
323     return (int) absExponent;
324 }
325 
326 /* This is ugly and needs cleaning up, but I don't immediately see
327    how whilst remaining safe.  */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)328 static Expected<int> totalExponent(StringRef::iterator p,
329                                    StringRef::iterator end,
330                                    int exponentAdjustment) {
331   int unsignedExponent;
332   bool negative, overflow;
333   int exponent = 0;
334 
335   if (p == end)
336     return createError("Exponent has no digits");
337 
338   negative = *p == '-';
339   if (*p == '-' || *p == '+') {
340     p++;
341     if (p == end)
342       return createError("Exponent has no digits");
343   }
344 
345   unsignedExponent = 0;
346   overflow = false;
347   for (; p != end; ++p) {
348     unsigned int value;
349 
350     value = decDigitValue(*p);
351     if (value >= 10U)
352       return createError("Invalid character in exponent");
353 
354     unsignedExponent = unsignedExponent * 10 + value;
355     if (unsignedExponent > 32767) {
356       overflow = true;
357       break;
358     }
359   }
360 
361   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
362     overflow = true;
363 
364   if (!overflow) {
365     exponent = unsignedExponent;
366     if (negative)
367       exponent = -exponent;
368     exponent += exponentAdjustment;
369     if (exponent > 32767 || exponent < -32768)
370       overflow = true;
371   }
372 
373   if (overflow)
374     exponent = negative ? -32768: 32767;
375 
376   return exponent;
377 }
378 
379 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)380 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
381                            StringRef::iterator *dot) {
382   StringRef::iterator p = begin;
383   *dot = end;
384   while (p != end && *p == '0')
385     p++;
386 
387   if (p != end && *p == '.') {
388     *dot = p++;
389 
390     if (end - begin == 1)
391       return createError("Significand has no digits");
392 
393     while (p != end && *p == '0')
394       p++;
395   }
396 
397   return p;
398 }
399 
400 /* Given a normal decimal floating point number of the form
401 
402      dddd.dddd[eE][+-]ddd
403 
404    where the decimal point and exponent are optional, fill out the
405    structure D.  Exponent is appropriate if the significand is
406    treated as an integer, and normalizedExponent if the significand
407    is taken to have the decimal point after a single leading
408    non-zero digit.
409 
410    If the value is zero, V->firstSigDigit points to a non-digit, and
411    the return exponent is zero.
412 */
413 struct decimalInfo {
414   const char *firstSigDigit;
415   const char *lastSigDigit;
416   int exponent;
417   int normalizedExponent;
418 };
419 
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)420 static Error interpretDecimal(StringRef::iterator begin,
421                               StringRef::iterator end, decimalInfo *D) {
422   StringRef::iterator dot = end;
423 
424   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
425   if (!PtrOrErr)
426     return PtrOrErr.takeError();
427   StringRef::iterator p = *PtrOrErr;
428 
429   D->firstSigDigit = p;
430   D->exponent = 0;
431   D->normalizedExponent = 0;
432 
433   for (; p != end; ++p) {
434     if (*p == '.') {
435       if (dot != end)
436         return createError("String contains multiple dots");
437       dot = p++;
438       if (p == end)
439         break;
440     }
441     if (decDigitValue(*p) >= 10U)
442       break;
443   }
444 
445   if (p != end) {
446     if (*p != 'e' && *p != 'E')
447       return createError("Invalid character in significand");
448     if (p == begin)
449       return createError("Significand has no digits");
450     if (dot != end && p - begin == 1)
451       return createError("Significand has no digits");
452 
453     /* p points to the first non-digit in the string */
454     auto ExpOrErr = readExponent(p + 1, end);
455     if (!ExpOrErr)
456       return ExpOrErr.takeError();
457     D->exponent = *ExpOrErr;
458 
459     /* Implied decimal point?  */
460     if (dot == end)
461       dot = p;
462   }
463 
464   /* If number is all zeroes accept any exponent.  */
465   if (p != D->firstSigDigit) {
466     /* Drop insignificant trailing zeroes.  */
467     if (p != begin) {
468       do
469         do
470           p--;
471         while (p != begin && *p == '0');
472       while (p != begin && *p == '.');
473     }
474 
475     /* Adjust the exponents for any decimal point.  */
476     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
477     D->normalizedExponent = (D->exponent +
478               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
479                                       - (dot > D->firstSigDigit && dot < p)));
480   }
481 
482   D->lastSigDigit = p;
483   return Error::success();
484 }
485 
486 /* Return the trailing fraction of a hexadecimal number.
487    DIGITVALUE is the first hex digit of the fraction, P points to
488    the next digit.  */
489 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)490 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
491                             unsigned int digitValue) {
492   unsigned int hexDigit;
493 
494   /* If the first trailing digit isn't 0 or 8 we can work out the
495      fraction immediately.  */
496   if (digitValue > 8)
497     return lfMoreThanHalf;
498   else if (digitValue < 8 && digitValue > 0)
499     return lfLessThanHalf;
500 
501   // Otherwise we need to find the first non-zero digit.
502   while (p != end && (*p == '0' || *p == '.'))
503     p++;
504 
505   if (p == end)
506     return createError("Invalid trailing hexadecimal fraction!");
507 
508   hexDigit = hexDigitValue(*p);
509 
510   /* If we ran off the end it is exactly zero or one-half, otherwise
511      a little more.  */
512   if (hexDigit == -1U)
513     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
514   else
515     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
516 }
517 
518 /* Return the fraction lost were a bignum truncated losing the least
519    significant BITS bits.  */
520 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)521 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
522                               unsigned int partCount,
523                               unsigned int bits)
524 {
525   unsigned int lsb;
526 
527   lsb = APInt::tcLSB(parts, partCount);
528 
529   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
530   if (bits <= lsb)
531     return lfExactlyZero;
532   if (bits == lsb + 1)
533     return lfExactlyHalf;
534   if (bits <= partCount * APFloatBase::integerPartWidth &&
535       APInt::tcExtractBit(parts, bits - 1))
536     return lfMoreThanHalf;
537 
538   return lfLessThanHalf;
539 }
540 
541 /* Shift DST right BITS bits noting lost fraction.  */
542 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)543 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
544 {
545   lostFraction lost_fraction;
546 
547   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
548 
549   APInt::tcShiftRight(dst, parts, bits);
550 
551   return lost_fraction;
552 }
553 
554 /* Combine the effect of two lost fractions.  */
555 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)556 combineLostFractions(lostFraction moreSignificant,
557                      lostFraction lessSignificant)
558 {
559   if (lessSignificant != lfExactlyZero) {
560     if (moreSignificant == lfExactlyZero)
561       moreSignificant = lfLessThanHalf;
562     else if (moreSignificant == lfExactlyHalf)
563       moreSignificant = lfMoreThanHalf;
564   }
565 
566   return moreSignificant;
567 }
568 
569 /* The error from the true value, in half-ulps, on multiplying two
570    floating point numbers, which differ from the value they
571    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
572    than the returned value.
573 
574    See "How to Read Floating Point Numbers Accurately" by William D
575    Clinger.  */
576 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)577 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
578 {
579   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
580 
581   if (HUerr1 + HUerr2 == 0)
582     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
583   else
584     return inexactMultiply + 2 * (HUerr1 + HUerr2);
585 }
586 
587 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
588    when the least significant BITS are truncated.  BITS cannot be
589    zero.  */
590 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)591 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
592                  bool isNearest) {
593   unsigned int count, partBits;
594   APFloatBase::integerPart part, boundary;
595 
596   assert(bits != 0);
597 
598   bits--;
599   count = bits / APFloatBase::integerPartWidth;
600   partBits = bits % APFloatBase::integerPartWidth + 1;
601 
602   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
603 
604   if (isNearest)
605     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
606   else
607     boundary = 0;
608 
609   if (count == 0) {
610     if (part - boundary <= boundary - part)
611       return part - boundary;
612     else
613       return boundary - part;
614   }
615 
616   if (part == boundary) {
617     while (--count)
618       if (parts[count])
619         return ~(APFloatBase::integerPart) 0; /* A lot.  */
620 
621     return parts[0];
622   } else if (part == boundary - 1) {
623     while (--count)
624       if (~parts[count])
625         return ~(APFloatBase::integerPart) 0; /* A lot.  */
626 
627     return -parts[0];
628   }
629 
630   return ~(APFloatBase::integerPart) 0; /* A lot.  */
631 }
632 
633 /* Place pow(5, power) in DST, and return the number of parts used.
634    DST must be at least one part larger than size of the answer.  */
635 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)636 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
637   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
638   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
639   pow5s[0] = 78125 * 5;
640 
641   unsigned int partsCount[16] = { 1 };
642   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
643   unsigned int result;
644   assert(power <= maxExponent);
645 
646   p1 = dst;
647   p2 = scratch;
648 
649   *p1 = firstEightPowers[power & 7];
650   power >>= 3;
651 
652   result = 1;
653   pow5 = pow5s;
654 
655   for (unsigned int n = 0; power; power >>= 1, n++) {
656     unsigned int pc;
657 
658     pc = partsCount[n];
659 
660     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
661     if (pc == 0) {
662       pc = partsCount[n - 1];
663       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
664       pc *= 2;
665       if (pow5[pc - 1] == 0)
666         pc--;
667       partsCount[n] = pc;
668     }
669 
670     if (power & 1) {
671       APFloatBase::integerPart *tmp;
672 
673       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
674       result += pc;
675       if (p2[result - 1] == 0)
676         result--;
677 
678       /* Now result is in p1 with partsCount parts and p2 is scratch
679          space.  */
680       tmp = p1;
681       p1 = p2;
682       p2 = tmp;
683     }
684 
685     pow5 += pc;
686   }
687 
688   if (p1 != dst)
689     APInt::tcAssign(dst, p1, result);
690 
691   return result;
692 }
693 
694 /* Zero at the end to avoid modular arithmetic when adding one; used
695    when rounding up during hexadecimal output.  */
696 static const char hexDigitsLower[] = "0123456789abcdef0";
697 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
698 static const char infinityL[] = "infinity";
699 static const char infinityU[] = "INFINITY";
700 static const char NaNL[] = "nan";
701 static const char NaNU[] = "NAN";
702 
703 /* Write out an integerPart in hexadecimal, starting with the most
704    significant nibble.  Write out exactly COUNT hexdigits, return
705    COUNT.  */
706 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)707 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
708            const char *hexDigitChars)
709 {
710   unsigned int result = count;
711 
712   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
713 
714   part >>= (APFloatBase::integerPartWidth - 4 * count);
715   while (count--) {
716     dst[count] = hexDigitChars[part & 0xf];
717     part >>= 4;
718   }
719 
720   return result;
721 }
722 
723 /* Write out an unsigned decimal integer.  */
724 static char *
writeUnsignedDecimal(char * dst,unsigned int n)725 writeUnsignedDecimal (char *dst, unsigned int n)
726 {
727   char buff[40], *p;
728 
729   p = buff;
730   do
731     *p++ = '0' + n % 10;
732   while (n /= 10);
733 
734   do
735     *dst++ = *--p;
736   while (p != buff);
737 
738   return dst;
739 }
740 
741 /* Write out a signed decimal integer.  */
742 static char *
writeSignedDecimal(char * dst,int value)743 writeSignedDecimal (char *dst, int value)
744 {
745   if (value < 0) {
746     *dst++ = '-';
747     dst = writeUnsignedDecimal(dst, -(unsigned) value);
748   } else
749     dst = writeUnsignedDecimal(dst, value);
750 
751   return dst;
752 }
753 
754 namespace detail {
755 /* Constructors.  */
initialize(const fltSemantics * ourSemantics)756 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
757   unsigned int count;
758 
759   semantics = ourSemantics;
760   count = partCount();
761   if (count > 1)
762     significand.parts = new integerPart[count];
763 }
764 
freeSignificand()765 void IEEEFloat::freeSignificand() {
766   if (needsCleanup())
767     delete [] significand.parts;
768 }
769 
assign(const IEEEFloat & rhs)770 void IEEEFloat::assign(const IEEEFloat &rhs) {
771   assert(semantics == rhs.semantics);
772 
773   sign = rhs.sign;
774   category = rhs.category;
775   exponent = rhs.exponent;
776   if (isFiniteNonZero() || category == fcNaN)
777     copySignificand(rhs);
778 }
779 
copySignificand(const IEEEFloat & rhs)780 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
781   assert(isFiniteNonZero() || category == fcNaN);
782   assert(rhs.partCount() >= partCount());
783 
784   APInt::tcAssign(significandParts(), rhs.significandParts(),
785                   partCount());
786 }
787 
788 /* Make this number a NaN, with an arbitrary but deterministic value
789    for the significand.  If double or longer, this is a signalling NaN,
790    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)791 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
792   category = fcNaN;
793   sign = Negative;
794   exponent = exponentNaN();
795 
796   integerPart *significand = significandParts();
797   unsigned numParts = partCount();
798 
799   APInt fill_storage;
800   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
801     // The only NaN representation is where the mantissa is all 1s, which is
802     // non-signalling.
803     SNaN = false;
804     fill_storage = APInt::getAllOnes(semantics->precision - 1);
805     fill = &fill_storage;
806   }
807 
808   // Set the significand bits to the fill.
809   if (!fill || fill->getNumWords() < numParts)
810     APInt::tcSet(significand, 0, numParts);
811   if (fill) {
812     APInt::tcAssign(significand, fill->getRawData(),
813                     std::min(fill->getNumWords(), numParts));
814 
815     // Zero out the excess bits of the significand.
816     unsigned bitsToPreserve = semantics->precision - 1;
817     unsigned part = bitsToPreserve / 64;
818     bitsToPreserve %= 64;
819     significand[part] &= ((1ULL << bitsToPreserve) - 1);
820     for (part++; part != numParts; ++part)
821       significand[part] = 0;
822   }
823 
824   unsigned QNaNBit = semantics->precision - 2;
825 
826   if (SNaN) {
827     // We always have to clear the QNaN bit to make it an SNaN.
828     APInt::tcClearBit(significand, QNaNBit);
829 
830     // If there are no bits set in the payload, we have to set
831     // *something* to make it a NaN instead of an infinity;
832     // conventionally, this is the next bit down from the QNaN bit.
833     if (APInt::tcIsZero(significand, numParts))
834       APInt::tcSetBit(significand, QNaNBit - 1);
835   } else {
836     // We always have to set the QNaN bit to make it a QNaN.
837     APInt::tcSetBit(significand, QNaNBit);
838   }
839 
840   // For x87 extended precision, we want to make a NaN, not a
841   // pseudo-NaN.  Maybe we should expose the ability to make
842   // pseudo-NaNs?
843   if (semantics == &semX87DoubleExtended)
844     APInt::tcSetBit(significand, QNaNBit + 1);
845 }
846 
operator =(const IEEEFloat & rhs)847 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
848   if (this != &rhs) {
849     if (semantics != rhs.semantics) {
850       freeSignificand();
851       initialize(rhs.semantics);
852     }
853     assign(rhs);
854   }
855 
856   return *this;
857 }
858 
operator =(IEEEFloat && rhs)859 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
860   freeSignificand();
861 
862   semantics = rhs.semantics;
863   significand = rhs.significand;
864   exponent = rhs.exponent;
865   category = rhs.category;
866   sign = rhs.sign;
867 
868   rhs.semantics = &semBogus;
869   return *this;
870 }
871 
isDenormal() const872 bool IEEEFloat::isDenormal() const {
873   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
874          (APInt::tcExtractBit(significandParts(),
875                               semantics->precision - 1) == 0);
876 }
877 
isSmallest() const878 bool IEEEFloat::isSmallest() const {
879   // The smallest number by magnitude in our format will be the smallest
880   // denormal, i.e. the floating point number with exponent being minimum
881   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
882   return isFiniteNonZero() && exponent == semantics->minExponent &&
883     significandMSB() == 0;
884 }
885 
isSmallestNormalized() const886 bool IEEEFloat::isSmallestNormalized() const {
887   return getCategory() == fcNormal && exponent == semantics->minExponent &&
888          isSignificandAllZerosExceptMSB();
889 }
890 
isSignificandAllOnes() const891 bool IEEEFloat::isSignificandAllOnes() const {
892   // Test if the significand excluding the integral bit is all ones. This allows
893   // us to test for binade boundaries.
894   const integerPart *Parts = significandParts();
895   const unsigned PartCount = partCountForBits(semantics->precision);
896   for (unsigned i = 0; i < PartCount - 1; i++)
897     if (~Parts[i])
898       return false;
899 
900   // Set the unused high bits to all ones when we compare.
901   const unsigned NumHighBits =
902     PartCount*integerPartWidth - semantics->precision + 1;
903   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
904          "Can not have more high bits to fill than integerPartWidth");
905   const integerPart HighBitFill =
906     ~integerPart(0) << (integerPartWidth - NumHighBits);
907   if (~(Parts[PartCount - 1] | HighBitFill))
908     return false;
909 
910   return true;
911 }
912 
isSignificandAllOnesExceptLSB() const913 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
914   // Test if the significand excluding the integral bit is all ones except for
915   // the least significant bit.
916   const integerPart *Parts = significandParts();
917 
918   if (Parts[0] & 1)
919     return false;
920 
921   const unsigned PartCount = partCountForBits(semantics->precision);
922   for (unsigned i = 0; i < PartCount - 1; i++) {
923     if (~Parts[i] & ~unsigned{!i})
924       return false;
925   }
926 
927   // Set the unused high bits to all ones when we compare.
928   const unsigned NumHighBits =
929       PartCount * integerPartWidth - semantics->precision + 1;
930   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
931          "Can not have more high bits to fill than integerPartWidth");
932   const integerPart HighBitFill = ~integerPart(0)
933                                   << (integerPartWidth - NumHighBits);
934   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
935     return false;
936 
937   return true;
938 }
939 
isSignificandAllZeros() const940 bool IEEEFloat::isSignificandAllZeros() const {
941   // Test if the significand excluding the integral bit is all zeros. This
942   // allows us to test for binade boundaries.
943   const integerPart *Parts = significandParts();
944   const unsigned PartCount = partCountForBits(semantics->precision);
945 
946   for (unsigned i = 0; i < PartCount - 1; i++)
947     if (Parts[i])
948       return false;
949 
950   // Compute how many bits are used in the final word.
951   const unsigned NumHighBits =
952     PartCount*integerPartWidth - semantics->precision + 1;
953   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
954          "clear than integerPartWidth");
955   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
956 
957   if (Parts[PartCount - 1] & HighBitMask)
958     return false;
959 
960   return true;
961 }
962 
isSignificandAllZerosExceptMSB() const963 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
964   const integerPart *Parts = significandParts();
965   const unsigned PartCount = partCountForBits(semantics->precision);
966 
967   for (unsigned i = 0; i < PartCount - 1; i++) {
968     if (Parts[i])
969       return false;
970   }
971 
972   const unsigned NumHighBits =
973       PartCount * integerPartWidth - semantics->precision + 1;
974   return Parts[PartCount - 1] == integerPart(1)
975                                      << (integerPartWidth - NumHighBits);
976 }
977 
isLargest() const978 bool IEEEFloat::isLargest() const {
979   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
980     // The largest number by magnitude in our format will be the floating point
981     // number with maximum exponent and with significand that is all ones except
982     // the LSB.
983     return isFiniteNonZero() && exponent == semantics->maxExponent &&
984            isSignificandAllOnesExceptLSB();
985   } else {
986     // The largest number by magnitude in our format will be the floating point
987     // number with maximum exponent and with significand that is all ones.
988     return isFiniteNonZero() && exponent == semantics->maxExponent &&
989            isSignificandAllOnes();
990   }
991 }
992 
isInteger() const993 bool IEEEFloat::isInteger() const {
994   // This could be made more efficient; I'm going for obviously correct.
995   if (!isFinite()) return false;
996   IEEEFloat truncated = *this;
997   truncated.roundToIntegral(rmTowardZero);
998   return compare(truncated) == cmpEqual;
999 }
1000 
bitwiseIsEqual(const IEEEFloat & rhs) const1001 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1002   if (this == &rhs)
1003     return true;
1004   if (semantics != rhs.semantics ||
1005       category != rhs.category ||
1006       sign != rhs.sign)
1007     return false;
1008   if (category==fcZero || category==fcInfinity)
1009     return true;
1010 
1011   if (isFiniteNonZero() && exponent != rhs.exponent)
1012     return false;
1013 
1014   return std::equal(significandParts(), significandParts() + partCount(),
1015                     rhs.significandParts());
1016 }
1017 
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)1018 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1019   initialize(&ourSemantics);
1020   sign = 0;
1021   category = fcNormal;
1022   zeroSignificand();
1023   exponent = ourSemantics.precision - 1;
1024   significandParts()[0] = value;
1025   normalize(rmNearestTiesToEven, lfExactlyZero);
1026 }
1027 
IEEEFloat(const fltSemantics & ourSemantics)1028 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1029   initialize(&ourSemantics);
1030   makeZero(false);
1031 }
1032 
1033 // Delegate to the previous constructor, because later copy constructor may
1034 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)1035 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1036     : IEEEFloat(ourSemantics) {}
1037 
IEEEFloat(const IEEEFloat & rhs)1038 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1039   initialize(rhs.semantics);
1040   assign(rhs);
1041 }
1042 
IEEEFloat(IEEEFloat && rhs)1043 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1044   *this = std::move(rhs);
1045 }
1046 
~IEEEFloat()1047 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1048 
partCount() const1049 unsigned int IEEEFloat::partCount() const {
1050   return partCountForBits(semantics->precision + 1);
1051 }
1052 
significandParts() const1053 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1054   return const_cast<IEEEFloat *>(this)->significandParts();
1055 }
1056 
significandParts()1057 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1058   if (partCount() > 1)
1059     return significand.parts;
1060   else
1061     return &significand.part;
1062 }
1063 
zeroSignificand()1064 void IEEEFloat::zeroSignificand() {
1065   APInt::tcSet(significandParts(), 0, partCount());
1066 }
1067 
1068 /* Increment an fcNormal floating point number's significand.  */
incrementSignificand()1069 void IEEEFloat::incrementSignificand() {
1070   integerPart carry;
1071 
1072   carry = APInt::tcIncrement(significandParts(), partCount());
1073 
1074   /* Our callers should never cause us to overflow.  */
1075   assert(carry == 0);
1076   (void)carry;
1077 }
1078 
1079 /* Add the significand of the RHS.  Returns the carry flag.  */
addSignificand(const IEEEFloat & rhs)1080 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1081   integerPart *parts;
1082 
1083   parts = significandParts();
1084 
1085   assert(semantics == rhs.semantics);
1086   assert(exponent == rhs.exponent);
1087 
1088   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1089 }
1090 
1091 /* Subtract the significand of the RHS with a borrow flag.  Returns
1092    the borrow flag.  */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)1093 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1094                                                       integerPart borrow) {
1095   integerPart *parts;
1096 
1097   parts = significandParts();
1098 
1099   assert(semantics == rhs.semantics);
1100   assert(exponent == rhs.exponent);
1101 
1102   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1103                            partCount());
1104 }
1105 
1106 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1107    on to the full-precision result of the multiplication.  Returns the
1108    lost fraction.  */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)1109 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1110                                             IEEEFloat addend) {
1111   unsigned int omsb;        // One, not zero, based MSB.
1112   unsigned int partsCount, newPartsCount, precision;
1113   integerPart *lhsSignificand;
1114   integerPart scratch[4];
1115   integerPart *fullSignificand;
1116   lostFraction lost_fraction;
1117   bool ignored;
1118 
1119   assert(semantics == rhs.semantics);
1120 
1121   precision = semantics->precision;
1122 
1123   // Allocate space for twice as many bits as the original significand, plus one
1124   // extra bit for the addition to overflow into.
1125   newPartsCount = partCountForBits(precision * 2 + 1);
1126 
1127   if (newPartsCount > 4)
1128     fullSignificand = new integerPart[newPartsCount];
1129   else
1130     fullSignificand = scratch;
1131 
1132   lhsSignificand = significandParts();
1133   partsCount = partCount();
1134 
1135   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1136                         rhs.significandParts(), partsCount, partsCount);
1137 
1138   lost_fraction = lfExactlyZero;
1139   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1140   exponent += rhs.exponent;
1141 
1142   // Assume the operands involved in the multiplication are single-precision
1143   // FP, and the two multiplicants are:
1144   //   *this = a23 . a22 ... a0 * 2^e1
1145   //     rhs = b23 . b22 ... b0 * 2^e2
1146   // the result of multiplication is:
1147   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1148   // Note that there are three significant bits at the left-hand side of the
1149   // radix point: two for the multiplication, and an overflow bit for the
1150   // addition (that will always be zero at this point). Move the radix point
1151   // toward left by two bits, and adjust exponent accordingly.
1152   exponent += 2;
1153 
1154   if (addend.isNonZero()) {
1155     // The intermediate result of the multiplication has "2 * precision"
1156     // signicant bit; adjust the addend to be consistent with mul result.
1157     //
1158     Significand savedSignificand = significand;
1159     const fltSemantics *savedSemantics = semantics;
1160     fltSemantics extendedSemantics;
1161     opStatus status;
1162     unsigned int extendedPrecision;
1163 
1164     // Normalize our MSB to one below the top bit to allow for overflow.
1165     extendedPrecision = 2 * precision + 1;
1166     if (omsb != extendedPrecision - 1) {
1167       assert(extendedPrecision > omsb);
1168       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1169                          (extendedPrecision - 1) - omsb);
1170       exponent -= (extendedPrecision - 1) - omsb;
1171     }
1172 
1173     /* Create new semantics.  */
1174     extendedSemantics = *semantics;
1175     extendedSemantics.precision = extendedPrecision;
1176 
1177     if (newPartsCount == 1)
1178       significand.part = fullSignificand[0];
1179     else
1180       significand.parts = fullSignificand;
1181     semantics = &extendedSemantics;
1182 
1183     // Make a copy so we can convert it to the extended semantics.
1184     // Note that we cannot convert the addend directly, as the extendedSemantics
1185     // is a local variable (which we take a reference to).
1186     IEEEFloat extendedAddend(addend);
1187     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1188     assert(status == opOK);
1189     (void)status;
1190 
1191     // Shift the significand of the addend right by one bit. This guarantees
1192     // that the high bit of the significand is zero (same as fullSignificand),
1193     // so the addition will overflow (if it does overflow at all) into the top bit.
1194     lost_fraction = extendedAddend.shiftSignificandRight(1);
1195     assert(lost_fraction == lfExactlyZero &&
1196            "Lost precision while shifting addend for fused-multiply-add.");
1197 
1198     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1199 
1200     /* Restore our state.  */
1201     if (newPartsCount == 1)
1202       fullSignificand[0] = significand.part;
1203     significand = savedSignificand;
1204     semantics = savedSemantics;
1205 
1206     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1207   }
1208 
1209   // Convert the result having "2 * precision" significant-bits back to the one
1210   // having "precision" significant-bits. First, move the radix point from
1211   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1212   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1213   exponent -= precision + 1;
1214 
1215   // In case MSB resides at the left-hand side of radix point, shift the
1216   // mantissa right by some amount to make sure the MSB reside right before
1217   // the radix point (i.e. "MSB . rest-significant-bits").
1218   //
1219   // Note that the result is not normalized when "omsb < precision". So, the
1220   // caller needs to call IEEEFloat::normalize() if normalized value is
1221   // expected.
1222   if (omsb > precision) {
1223     unsigned int bits, significantParts;
1224     lostFraction lf;
1225 
1226     bits = omsb - precision;
1227     significantParts = partCountForBits(omsb);
1228     lf = shiftRight(fullSignificand, significantParts, bits);
1229     lost_fraction = combineLostFractions(lf, lost_fraction);
1230     exponent += bits;
1231   }
1232 
1233   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1234 
1235   if (newPartsCount > 4)
1236     delete [] fullSignificand;
1237 
1238   return lost_fraction;
1239 }
1240 
multiplySignificand(const IEEEFloat & rhs)1241 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1242   return multiplySignificand(rhs, IEEEFloat(*semantics));
1243 }
1244 
1245 /* Multiply the significands of LHS and RHS to DST.  */
divideSignificand(const IEEEFloat & rhs)1246 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1247   unsigned int bit, i, partsCount;
1248   const integerPart *rhsSignificand;
1249   integerPart *lhsSignificand, *dividend, *divisor;
1250   integerPart scratch[4];
1251   lostFraction lost_fraction;
1252 
1253   assert(semantics == rhs.semantics);
1254 
1255   lhsSignificand = significandParts();
1256   rhsSignificand = rhs.significandParts();
1257   partsCount = partCount();
1258 
1259   if (partsCount > 2)
1260     dividend = new integerPart[partsCount * 2];
1261   else
1262     dividend = scratch;
1263 
1264   divisor = dividend + partsCount;
1265 
1266   /* Copy the dividend and divisor as they will be modified in-place.  */
1267   for (i = 0; i < partsCount; i++) {
1268     dividend[i] = lhsSignificand[i];
1269     divisor[i] = rhsSignificand[i];
1270     lhsSignificand[i] = 0;
1271   }
1272 
1273   exponent -= rhs.exponent;
1274 
1275   unsigned int precision = semantics->precision;
1276 
1277   /* Normalize the divisor.  */
1278   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1279   if (bit) {
1280     exponent += bit;
1281     APInt::tcShiftLeft(divisor, partsCount, bit);
1282   }
1283 
1284   /* Normalize the dividend.  */
1285   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1286   if (bit) {
1287     exponent -= bit;
1288     APInt::tcShiftLeft(dividend, partsCount, bit);
1289   }
1290 
1291   /* Ensure the dividend >= divisor initially for the loop below.
1292      Incidentally, this means that the division loop below is
1293      guaranteed to set the integer bit to one.  */
1294   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1295     exponent--;
1296     APInt::tcShiftLeft(dividend, partsCount, 1);
1297     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1298   }
1299 
1300   /* Long division.  */
1301   for (bit = precision; bit; bit -= 1) {
1302     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1303       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1304       APInt::tcSetBit(lhsSignificand, bit - 1);
1305     }
1306 
1307     APInt::tcShiftLeft(dividend, partsCount, 1);
1308   }
1309 
1310   /* Figure out the lost fraction.  */
1311   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1312 
1313   if (cmp > 0)
1314     lost_fraction = lfMoreThanHalf;
1315   else if (cmp == 0)
1316     lost_fraction = lfExactlyHalf;
1317   else if (APInt::tcIsZero(dividend, partsCount))
1318     lost_fraction = lfExactlyZero;
1319   else
1320     lost_fraction = lfLessThanHalf;
1321 
1322   if (partsCount > 2)
1323     delete [] dividend;
1324 
1325   return lost_fraction;
1326 }
1327 
significandMSB() const1328 unsigned int IEEEFloat::significandMSB() const {
1329   return APInt::tcMSB(significandParts(), partCount());
1330 }
1331 
significandLSB() const1332 unsigned int IEEEFloat::significandLSB() const {
1333   return APInt::tcLSB(significandParts(), partCount());
1334 }
1335 
1336 /* Note that a zero result is NOT normalized to fcZero.  */
shiftSignificandRight(unsigned int bits)1337 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1338   /* Our exponent should not overflow.  */
1339   assert((ExponentType) (exponent + bits) >= exponent);
1340 
1341   exponent += bits;
1342 
1343   return shiftRight(significandParts(), partCount(), bits);
1344 }
1345 
1346 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
shiftSignificandLeft(unsigned int bits)1347 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1348   assert(bits < semantics->precision);
1349 
1350   if (bits) {
1351     unsigned int partsCount = partCount();
1352 
1353     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1354     exponent -= bits;
1355 
1356     assert(!APInt::tcIsZero(significandParts(), partsCount));
1357   }
1358 }
1359 
1360 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1361 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1362   int compare;
1363 
1364   assert(semantics == rhs.semantics);
1365   assert(isFiniteNonZero());
1366   assert(rhs.isFiniteNonZero());
1367 
1368   compare = exponent - rhs.exponent;
1369 
1370   /* If exponents are equal, do an unsigned bignum comparison of the
1371      significands.  */
1372   if (compare == 0)
1373     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1374                                partCount());
1375 
1376   if (compare > 0)
1377     return cmpGreaterThan;
1378   else if (compare < 0)
1379     return cmpLessThan;
1380   else
1381     return cmpEqual;
1382 }
1383 
1384 /* Set the least significant BITS bits of a bignum, clear the
1385    rest.  */
tcSetLeastSignificantBits(APInt::WordType * dst,unsigned parts,unsigned bits)1386 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1387                                       unsigned bits) {
1388   unsigned i = 0;
1389   while (bits > APInt::APINT_BITS_PER_WORD) {
1390     dst[i++] = ~(APInt::WordType)0;
1391     bits -= APInt::APINT_BITS_PER_WORD;
1392   }
1393 
1394   if (bits)
1395     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1396 
1397   while (i < parts)
1398     dst[i++] = 0;
1399 }
1400 
1401 /* Handle overflow.  Sign is preserved.  We either become infinity or
1402    the largest finite number.  */
handleOverflow(roundingMode rounding_mode)1403 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1404   /* Infinity?  */
1405   if (rounding_mode == rmNearestTiesToEven ||
1406       rounding_mode == rmNearestTiesToAway ||
1407       (rounding_mode == rmTowardPositive && !sign) ||
1408       (rounding_mode == rmTowardNegative && sign)) {
1409     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1410       makeNaN(false, sign);
1411     else
1412       category = fcInfinity;
1413     return (opStatus) (opOverflow | opInexact);
1414   }
1415 
1416   /* Otherwise we become the largest finite number.  */
1417   category = fcNormal;
1418   exponent = semantics->maxExponent;
1419   tcSetLeastSignificantBits(significandParts(), partCount(),
1420                             semantics->precision);
1421   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1422     APInt::tcClearBit(significandParts(), 0);
1423 
1424   return opInexact;
1425 }
1426 
1427 /* Returns TRUE if, when truncating the current number, with BIT the
1428    new LSB, with the given lost fraction and rounding mode, the result
1429    would need to be rounded away from zero (i.e., by increasing the
1430    signficand).  This routine must work for fcZero of both signs, and
1431    fcNormal numbers.  */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1432 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1433                                   lostFraction lost_fraction,
1434                                   unsigned int bit) const {
1435   /* NaNs and infinities should not have lost fractions.  */
1436   assert(isFiniteNonZero() || category == fcZero);
1437 
1438   /* Current callers never pass this so we don't handle it.  */
1439   assert(lost_fraction != lfExactlyZero);
1440 
1441   switch (rounding_mode) {
1442   case rmNearestTiesToAway:
1443     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1444 
1445   case rmNearestTiesToEven:
1446     if (lost_fraction == lfMoreThanHalf)
1447       return true;
1448 
1449     /* Our zeroes don't have a significand to test.  */
1450     if (lost_fraction == lfExactlyHalf && category != fcZero)
1451       return APInt::tcExtractBit(significandParts(), bit);
1452 
1453     return false;
1454 
1455   case rmTowardZero:
1456     return false;
1457 
1458   case rmTowardPositive:
1459     return !sign;
1460 
1461   case rmTowardNegative:
1462     return sign;
1463 
1464   default:
1465     break;
1466   }
1467   llvm_unreachable("Invalid rounding mode found");
1468 }
1469 
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1470 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1471                                          lostFraction lost_fraction) {
1472   unsigned int omsb;                /* One, not zero, based MSB.  */
1473   int exponentChange;
1474 
1475   if (!isFiniteNonZero())
1476     return opOK;
1477 
1478   /* Before rounding normalize the exponent of fcNormal numbers.  */
1479   omsb = significandMSB() + 1;
1480 
1481   if (omsb) {
1482     /* OMSB is numbered from 1.  We want to place it in the integer
1483        bit numbered PRECISION if possible, with a compensating change in
1484        the exponent.  */
1485     exponentChange = omsb - semantics->precision;
1486 
1487     /* If the resulting exponent is too high, overflow according to
1488        the rounding mode.  */
1489     if (exponent + exponentChange > semantics->maxExponent)
1490       return handleOverflow(rounding_mode);
1491 
1492     /* Subnormal numbers have exponent minExponent, and their MSB
1493        is forced based on that.  */
1494     if (exponent + exponentChange < semantics->minExponent)
1495       exponentChange = semantics->minExponent - exponent;
1496 
1497     /* Shifting left is easy as we don't lose precision.  */
1498     if (exponentChange < 0) {
1499       assert(lost_fraction == lfExactlyZero);
1500 
1501       shiftSignificandLeft(-exponentChange);
1502 
1503       return opOK;
1504     }
1505 
1506     if (exponentChange > 0) {
1507       lostFraction lf;
1508 
1509       /* Shift right and capture any new lost fraction.  */
1510       lf = shiftSignificandRight(exponentChange);
1511 
1512       lost_fraction = combineLostFractions(lf, lost_fraction);
1513 
1514       /* Keep OMSB up-to-date.  */
1515       if (omsb > (unsigned) exponentChange)
1516         omsb -= exponentChange;
1517       else
1518         omsb = 0;
1519     }
1520   }
1521 
1522   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1523       exponent == semantics->maxExponent && isSignificandAllOnes())
1524     return handleOverflow(rounding_mode);
1525 
1526   /* Now round the number according to rounding_mode given the lost
1527      fraction.  */
1528 
1529   /* As specified in IEEE 754, since we do not trap we do not report
1530      underflow for exact results.  */
1531   if (lost_fraction == lfExactlyZero) {
1532     /* Canonicalize zeroes.  */
1533     if (omsb == 0)
1534       category = fcZero;
1535 
1536     return opOK;
1537   }
1538 
1539   /* Increment the significand if we're rounding away from zero.  */
1540   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1541     if (omsb == 0)
1542       exponent = semantics->minExponent;
1543 
1544     incrementSignificand();
1545     omsb = significandMSB() + 1;
1546 
1547     /* Did the significand increment overflow?  */
1548     if (omsb == (unsigned) semantics->precision + 1) {
1549       /* Renormalize by incrementing the exponent and shifting our
1550          significand right one.  However if we already have the
1551          maximum exponent we overflow to infinity.  */
1552       if (exponent == semantics->maxExponent) {
1553         category = fcInfinity;
1554 
1555         return (opStatus) (opOverflow | opInexact);
1556       }
1557 
1558       shiftSignificandRight(1);
1559 
1560       return opInexact;
1561     }
1562 
1563     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1564         exponent == semantics->maxExponent && isSignificandAllOnes())
1565       return handleOverflow(rounding_mode);
1566   }
1567 
1568   /* The normal case - we were and are not denormal, and any
1569      significand increment above didn't overflow.  */
1570   if (omsb == semantics->precision)
1571     return opInexact;
1572 
1573   /* We have a non-zero denormal.  */
1574   assert(omsb < semantics->precision);
1575 
1576   /* Canonicalize zeroes.  */
1577   if (omsb == 0)
1578     category = fcZero;
1579 
1580   /* The fcZero case is a denormal that underflowed to zero.  */
1581   return (opStatus) (opUnderflow | opInexact);
1582 }
1583 
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1584 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1585                                                      bool subtract) {
1586   switch (PackCategoriesIntoKey(category, rhs.category)) {
1587   default:
1588     llvm_unreachable(nullptr);
1589 
1590   case PackCategoriesIntoKey(fcZero, fcNaN):
1591   case PackCategoriesIntoKey(fcNormal, fcNaN):
1592   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1593     assign(rhs);
1594     [[fallthrough]];
1595   case PackCategoriesIntoKey(fcNaN, fcZero):
1596   case PackCategoriesIntoKey(fcNaN, fcNormal):
1597   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1598   case PackCategoriesIntoKey(fcNaN, fcNaN):
1599     if (isSignaling()) {
1600       makeQuiet();
1601       return opInvalidOp;
1602     }
1603     return rhs.isSignaling() ? opInvalidOp : opOK;
1604 
1605   case PackCategoriesIntoKey(fcNormal, fcZero):
1606   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1607   case PackCategoriesIntoKey(fcInfinity, fcZero):
1608     return opOK;
1609 
1610   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1611   case PackCategoriesIntoKey(fcZero, fcInfinity):
1612     category = fcInfinity;
1613     sign = rhs.sign ^ subtract;
1614     return opOK;
1615 
1616   case PackCategoriesIntoKey(fcZero, fcNormal):
1617     assign(rhs);
1618     sign = rhs.sign ^ subtract;
1619     return opOK;
1620 
1621   case PackCategoriesIntoKey(fcZero, fcZero):
1622     /* Sign depends on rounding mode; handled by caller.  */
1623     return opOK;
1624 
1625   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1626     /* Differently signed infinities can only be validly
1627        subtracted.  */
1628     if (((sign ^ rhs.sign)!=0) != subtract) {
1629       makeNaN();
1630       return opInvalidOp;
1631     }
1632 
1633     return opOK;
1634 
1635   case PackCategoriesIntoKey(fcNormal, fcNormal):
1636     return opDivByZero;
1637   }
1638 }
1639 
1640 /* Add or subtract two normal numbers.  */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1641 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1642                                                  bool subtract) {
1643   integerPart carry;
1644   lostFraction lost_fraction;
1645   int bits;
1646 
1647   /* Determine if the operation on the absolute values is effectively
1648      an addition or subtraction.  */
1649   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1650 
1651   /* Are we bigger exponent-wise than the RHS?  */
1652   bits = exponent - rhs.exponent;
1653 
1654   /* Subtraction is more subtle than one might naively expect.  */
1655   if (subtract) {
1656     IEEEFloat temp_rhs(rhs);
1657 
1658     if (bits == 0)
1659       lost_fraction = lfExactlyZero;
1660     else if (bits > 0) {
1661       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1662       shiftSignificandLeft(1);
1663     } else {
1664       lost_fraction = shiftSignificandRight(-bits - 1);
1665       temp_rhs.shiftSignificandLeft(1);
1666     }
1667 
1668     // Should we reverse the subtraction.
1669     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1670       carry = temp_rhs.subtractSignificand
1671         (*this, lost_fraction != lfExactlyZero);
1672       copySignificand(temp_rhs);
1673       sign = !sign;
1674     } else {
1675       carry = subtractSignificand
1676         (temp_rhs, lost_fraction != lfExactlyZero);
1677     }
1678 
1679     /* Invert the lost fraction - it was on the RHS and
1680        subtracted.  */
1681     if (lost_fraction == lfLessThanHalf)
1682       lost_fraction = lfMoreThanHalf;
1683     else if (lost_fraction == lfMoreThanHalf)
1684       lost_fraction = lfLessThanHalf;
1685 
1686     /* The code above is intended to ensure that no borrow is
1687        necessary.  */
1688     assert(!carry);
1689     (void)carry;
1690   } else {
1691     if (bits > 0) {
1692       IEEEFloat temp_rhs(rhs);
1693 
1694       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1695       carry = addSignificand(temp_rhs);
1696     } else {
1697       lost_fraction = shiftSignificandRight(-bits);
1698       carry = addSignificand(rhs);
1699     }
1700 
1701     /* We have a guard bit; generating a carry cannot happen.  */
1702     assert(!carry);
1703     (void)carry;
1704   }
1705 
1706   return lost_fraction;
1707 }
1708 
multiplySpecials(const IEEEFloat & rhs)1709 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1710   switch (PackCategoriesIntoKey(category, rhs.category)) {
1711   default:
1712     llvm_unreachable(nullptr);
1713 
1714   case PackCategoriesIntoKey(fcZero, fcNaN):
1715   case PackCategoriesIntoKey(fcNormal, fcNaN):
1716   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1717     assign(rhs);
1718     sign = false;
1719     [[fallthrough]];
1720   case PackCategoriesIntoKey(fcNaN, fcZero):
1721   case PackCategoriesIntoKey(fcNaN, fcNormal):
1722   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1723   case PackCategoriesIntoKey(fcNaN, fcNaN):
1724     sign ^= rhs.sign; // restore the original sign
1725     if (isSignaling()) {
1726       makeQuiet();
1727       return opInvalidOp;
1728     }
1729     return rhs.isSignaling() ? opInvalidOp : opOK;
1730 
1731   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1732   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1733   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1734     category = fcInfinity;
1735     return opOK;
1736 
1737   case PackCategoriesIntoKey(fcZero, fcNormal):
1738   case PackCategoriesIntoKey(fcNormal, fcZero):
1739   case PackCategoriesIntoKey(fcZero, fcZero):
1740     category = fcZero;
1741     return opOK;
1742 
1743   case PackCategoriesIntoKey(fcZero, fcInfinity):
1744   case PackCategoriesIntoKey(fcInfinity, fcZero):
1745     makeNaN();
1746     return opInvalidOp;
1747 
1748   case PackCategoriesIntoKey(fcNormal, fcNormal):
1749     return opOK;
1750   }
1751 }
1752 
divideSpecials(const IEEEFloat & rhs)1753 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1754   switch (PackCategoriesIntoKey(category, rhs.category)) {
1755   default:
1756     llvm_unreachable(nullptr);
1757 
1758   case PackCategoriesIntoKey(fcZero, fcNaN):
1759   case PackCategoriesIntoKey(fcNormal, fcNaN):
1760   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1761     assign(rhs);
1762     sign = false;
1763     [[fallthrough]];
1764   case PackCategoriesIntoKey(fcNaN, fcZero):
1765   case PackCategoriesIntoKey(fcNaN, fcNormal):
1766   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1767   case PackCategoriesIntoKey(fcNaN, fcNaN):
1768     sign ^= rhs.sign; // restore the original sign
1769     if (isSignaling()) {
1770       makeQuiet();
1771       return opInvalidOp;
1772     }
1773     return rhs.isSignaling() ? opInvalidOp : opOK;
1774 
1775   case PackCategoriesIntoKey(fcInfinity, fcZero):
1776   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1777   case PackCategoriesIntoKey(fcZero, fcInfinity):
1778   case PackCategoriesIntoKey(fcZero, fcNormal):
1779     return opOK;
1780 
1781   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1782     category = fcZero;
1783     return opOK;
1784 
1785   case PackCategoriesIntoKey(fcNormal, fcZero):
1786     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1787       makeNaN(false, sign);
1788     else
1789       category = fcInfinity;
1790     return opDivByZero;
1791 
1792   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1793   case PackCategoriesIntoKey(fcZero, fcZero):
1794     makeNaN();
1795     return opInvalidOp;
1796 
1797   case PackCategoriesIntoKey(fcNormal, fcNormal):
1798     return opOK;
1799   }
1800 }
1801 
modSpecials(const IEEEFloat & rhs)1802 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1803   switch (PackCategoriesIntoKey(category, rhs.category)) {
1804   default:
1805     llvm_unreachable(nullptr);
1806 
1807   case PackCategoriesIntoKey(fcZero, fcNaN):
1808   case PackCategoriesIntoKey(fcNormal, fcNaN):
1809   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1810     assign(rhs);
1811     [[fallthrough]];
1812   case PackCategoriesIntoKey(fcNaN, fcZero):
1813   case PackCategoriesIntoKey(fcNaN, fcNormal):
1814   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1815   case PackCategoriesIntoKey(fcNaN, fcNaN):
1816     if (isSignaling()) {
1817       makeQuiet();
1818       return opInvalidOp;
1819     }
1820     return rhs.isSignaling() ? opInvalidOp : opOK;
1821 
1822   case PackCategoriesIntoKey(fcZero, fcInfinity):
1823   case PackCategoriesIntoKey(fcZero, fcNormal):
1824   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1825     return opOK;
1826 
1827   case PackCategoriesIntoKey(fcNormal, fcZero):
1828   case PackCategoriesIntoKey(fcInfinity, fcZero):
1829   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1830   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1831   case PackCategoriesIntoKey(fcZero, fcZero):
1832     makeNaN();
1833     return opInvalidOp;
1834 
1835   case PackCategoriesIntoKey(fcNormal, fcNormal):
1836     return opOK;
1837   }
1838 }
1839 
remainderSpecials(const IEEEFloat & rhs)1840 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1841   switch (PackCategoriesIntoKey(category, rhs.category)) {
1842   default:
1843     llvm_unreachable(nullptr);
1844 
1845   case PackCategoriesIntoKey(fcZero, fcNaN):
1846   case PackCategoriesIntoKey(fcNormal, fcNaN):
1847   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1848     assign(rhs);
1849     [[fallthrough]];
1850   case PackCategoriesIntoKey(fcNaN, fcZero):
1851   case PackCategoriesIntoKey(fcNaN, fcNormal):
1852   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1853   case PackCategoriesIntoKey(fcNaN, fcNaN):
1854     if (isSignaling()) {
1855       makeQuiet();
1856       return opInvalidOp;
1857     }
1858     return rhs.isSignaling() ? opInvalidOp : opOK;
1859 
1860   case PackCategoriesIntoKey(fcZero, fcInfinity):
1861   case PackCategoriesIntoKey(fcZero, fcNormal):
1862   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1863     return opOK;
1864 
1865   case PackCategoriesIntoKey(fcNormal, fcZero):
1866   case PackCategoriesIntoKey(fcInfinity, fcZero):
1867   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1868   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1869   case PackCategoriesIntoKey(fcZero, fcZero):
1870     makeNaN();
1871     return opInvalidOp;
1872 
1873   case PackCategoriesIntoKey(fcNormal, fcNormal):
1874     return opDivByZero; // fake status, indicating this is not a special case
1875   }
1876 }
1877 
1878 /* Change sign.  */
changeSign()1879 void IEEEFloat::changeSign() {
1880   /* Look mummy, this one's easy.  */
1881   sign = !sign;
1882 }
1883 
1884 /* Normalized addition or subtraction.  */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1885 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1886                                              roundingMode rounding_mode,
1887                                              bool subtract) {
1888   opStatus fs;
1889 
1890   fs = addOrSubtractSpecials(rhs, subtract);
1891 
1892   /* This return code means it was not a simple case.  */
1893   if (fs == opDivByZero) {
1894     lostFraction lost_fraction;
1895 
1896     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1897     fs = normalize(rounding_mode, lost_fraction);
1898 
1899     /* Can only be zero if we lost no fraction.  */
1900     assert(category != fcZero || lost_fraction == lfExactlyZero);
1901   }
1902 
1903   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1904      positive zero unless rounding to minus infinity, except that
1905      adding two like-signed zeroes gives that zero.  */
1906   if (category == fcZero) {
1907     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1908       sign = (rounding_mode == rmTowardNegative);
1909   }
1910 
1911   return fs;
1912 }
1913 
1914 /* Normalized addition.  */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1915 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1916                                    roundingMode rounding_mode) {
1917   return addOrSubtract(rhs, rounding_mode, false);
1918 }
1919 
1920 /* Normalized subtraction.  */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1921 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1922                                         roundingMode rounding_mode) {
1923   return addOrSubtract(rhs, rounding_mode, true);
1924 }
1925 
1926 /* Normalized multiply.  */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1927 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1928                                         roundingMode rounding_mode) {
1929   opStatus fs;
1930 
1931   sign ^= rhs.sign;
1932   fs = multiplySpecials(rhs);
1933 
1934   if (isFiniteNonZero()) {
1935     lostFraction lost_fraction = multiplySignificand(rhs);
1936     fs = normalize(rounding_mode, lost_fraction);
1937     if (lost_fraction != lfExactlyZero)
1938       fs = (opStatus) (fs | opInexact);
1939   }
1940 
1941   return fs;
1942 }
1943 
1944 /* Normalized divide.  */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1945 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1946                                       roundingMode rounding_mode) {
1947   opStatus fs;
1948 
1949   sign ^= rhs.sign;
1950   fs = divideSpecials(rhs);
1951 
1952   if (isFiniteNonZero()) {
1953     lostFraction lost_fraction = divideSignificand(rhs);
1954     fs = normalize(rounding_mode, lost_fraction);
1955     if (lost_fraction != lfExactlyZero)
1956       fs = (opStatus) (fs | opInexact);
1957   }
1958 
1959   return fs;
1960 }
1961 
1962 /* Normalized remainder.  */
remainder(const IEEEFloat & rhs)1963 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1964   opStatus fs;
1965   unsigned int origSign = sign;
1966 
1967   // First handle the special cases.
1968   fs = remainderSpecials(rhs);
1969   if (fs != opDivByZero)
1970     return fs;
1971 
1972   fs = opOK;
1973 
1974   // Make sure the current value is less than twice the denom. If the addition
1975   // did not succeed (an overflow has happened), which means that the finite
1976   // value we currently posses must be less than twice the denom (as we are
1977   // using the same semantics).
1978   IEEEFloat P2 = rhs;
1979   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1980     fs = mod(P2);
1981     assert(fs == opOK);
1982   }
1983 
1984   // Lets work with absolute numbers.
1985   IEEEFloat P = rhs;
1986   P.sign = false;
1987   sign = false;
1988 
1989   //
1990   // To calculate the remainder we use the following scheme.
1991   //
1992   // The remainder is defained as follows:
1993   //
1994   // remainder = numer - rquot * denom = x - r * p
1995   //
1996   // Where r is the result of: x/p, rounded toward the nearest integral value
1997   // (with halfway cases rounded toward the even number).
1998   //
1999   // Currently, (after x mod 2p):
2000   // r is the number of 2p's present inside x, which is inherently, an even
2001   // number of p's.
2002   //
2003   // We may split the remaining calculation into 4 options:
2004   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2005   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2006   //   are done as well.
2007   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2008   //   to subtract 1p at least once.
2009   // - if x >= p then we must subtract p at least once, as x must be a
2010   //   remainder.
2011   //
2012   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2013   //
2014   // We can now split the remaining calculation to the following 3 options:
2015   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2016   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2017   //   must round up to the next even number. so we must subtract p once more.
2018   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2019   //   integral, and subtract p once more.
2020   //
2021 
2022   // Extend the semantics to prevent an overflow/underflow or inexact result.
2023   bool losesInfo;
2024   fltSemantics extendedSemantics = *semantics;
2025   extendedSemantics.maxExponent++;
2026   extendedSemantics.minExponent--;
2027   extendedSemantics.precision += 2;
2028 
2029   IEEEFloat VEx = *this;
2030   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2031   assert(fs == opOK && !losesInfo);
2032   IEEEFloat PEx = P;
2033   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2034   assert(fs == opOK && !losesInfo);
2035 
2036   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2037   // any fraction.
2038   fs = VEx.add(VEx, rmNearestTiesToEven);
2039   assert(fs == opOK);
2040 
2041   if (VEx.compare(PEx) == cmpGreaterThan) {
2042     fs = subtract(P, rmNearestTiesToEven);
2043     assert(fs == opOK);
2044 
2045     // Make VEx = this.add(this), but because we have different semantics, we do
2046     // not want to `convert` again, so we just subtract PEx twice (which equals
2047     // to the desired value).
2048     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2049     assert(fs == opOK);
2050     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2051     assert(fs == opOK);
2052 
2053     cmpResult result = VEx.compare(PEx);
2054     if (result == cmpGreaterThan || result == cmpEqual) {
2055       fs = subtract(P, rmNearestTiesToEven);
2056       assert(fs == opOK);
2057     }
2058   }
2059 
2060   if (isZero())
2061     sign = origSign;    // IEEE754 requires this
2062   else
2063     sign ^= origSign;
2064   return fs;
2065 }
2066 
2067 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)2068 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2069   opStatus fs;
2070   fs = modSpecials(rhs);
2071   unsigned int origSign = sign;
2072 
2073   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2074          compareAbsoluteValue(rhs) != cmpLessThan) {
2075     int Exp = ilogb(*this) - ilogb(rhs);
2076     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2077     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2078     // check for it.
2079     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2080       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2081     V.sign = sign;
2082 
2083     fs = subtract(V, rmNearestTiesToEven);
2084     assert(fs==opOK);
2085   }
2086   if (isZero())
2087     sign = origSign; // fmod requires this
2088   return fs;
2089 }
2090 
2091 /* Normalized fused-multiply-add.  */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)2092 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2093                                                 const IEEEFloat &addend,
2094                                                 roundingMode rounding_mode) {
2095   opStatus fs;
2096 
2097   /* Post-multiplication sign, before addition.  */
2098   sign ^= multiplicand.sign;
2099 
2100   /* If and only if all arguments are normal do we need to do an
2101      extended-precision calculation.  */
2102   if (isFiniteNonZero() &&
2103       multiplicand.isFiniteNonZero() &&
2104       addend.isFinite()) {
2105     lostFraction lost_fraction;
2106 
2107     lost_fraction = multiplySignificand(multiplicand, addend);
2108     fs = normalize(rounding_mode, lost_fraction);
2109     if (lost_fraction != lfExactlyZero)
2110       fs = (opStatus) (fs | opInexact);
2111 
2112     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2113        positive zero unless rounding to minus infinity, except that
2114        adding two like-signed zeroes gives that zero.  */
2115     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
2116       sign = (rounding_mode == rmTowardNegative);
2117   } else {
2118     fs = multiplySpecials(multiplicand);
2119 
2120     /* FS can only be opOK or opInvalidOp.  There is no more work
2121        to do in the latter case.  The IEEE-754R standard says it is
2122        implementation-defined in this case whether, if ADDEND is a
2123        quiet NaN, we raise invalid op; this implementation does so.
2124 
2125        If we need to do the addition we can do so with normal
2126        precision.  */
2127     if (fs == opOK)
2128       fs = addOrSubtract(addend, rounding_mode, false);
2129   }
2130 
2131   return fs;
2132 }
2133 
2134 /* Rounding-mode correct round to integral value.  */
roundToIntegral(roundingMode rounding_mode)2135 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2136   opStatus fs;
2137 
2138   if (isInfinity())
2139     // [IEEE Std 754-2008 6.1]:
2140     // The behavior of infinity in floating-point arithmetic is derived from the
2141     // limiting cases of real arithmetic with operands of arbitrarily
2142     // large magnitude, when such a limit exists.
2143     // ...
2144     // Operations on infinite operands are usually exact and therefore signal no
2145     // exceptions ...
2146     return opOK;
2147 
2148   if (isNaN()) {
2149     if (isSignaling()) {
2150       // [IEEE Std 754-2008 6.2]:
2151       // Under default exception handling, any operation signaling an invalid
2152       // operation exception and for which a floating-point result is to be
2153       // delivered shall deliver a quiet NaN.
2154       makeQuiet();
2155       // [IEEE Std 754-2008 6.2]:
2156       // Signaling NaNs shall be reserved operands that, under default exception
2157       // handling, signal the invalid operation exception(see 7.2) for every
2158       // general-computational and signaling-computational operation except for
2159       // the conversions described in 5.12.
2160       return opInvalidOp;
2161     } else {
2162       // [IEEE Std 754-2008 6.2]:
2163       // For an operation with quiet NaN inputs, other than maximum and minimum
2164       // operations, if a floating-point result is to be delivered the result
2165       // shall be a quiet NaN which should be one of the input NaNs.
2166       // ...
2167       // Every general-computational and quiet-computational operation involving
2168       // one or more input NaNs, none of them signaling, shall signal no
2169       // exception, except fusedMultiplyAdd might signal the invalid operation
2170       // exception(see 7.2).
2171       return opOK;
2172     }
2173   }
2174 
2175   if (isZero()) {
2176     // [IEEE Std 754-2008 6.3]:
2177     // ... the sign of the result of conversions, the quantize operation, the
2178     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2179     // the sign of the first or only operand.
2180     return opOK;
2181   }
2182 
2183   // If the exponent is large enough, we know that this value is already
2184   // integral, and the arithmetic below would potentially cause it to saturate
2185   // to +/-Inf.  Bail out early instead.
2186   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2187     return opOK;
2188 
2189   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2190   // precision of our format, and then subtract it back off again.  The choice
2191   // of rounding modes for the addition/subtraction determines the rounding mode
2192   // for our integral rounding as well.
2193   // NOTE: When the input value is negative, we do subtraction followed by
2194   // addition instead.
2195   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2196   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2197   IEEEFloat MagicConstant(*semantics);
2198   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2199                                       rmNearestTiesToEven);
2200   assert(fs == opOK);
2201   MagicConstant.sign = sign;
2202 
2203   // Preserve the input sign so that we can handle the case of zero result
2204   // correctly.
2205   bool inputSign = isNegative();
2206 
2207   fs = add(MagicConstant, rounding_mode);
2208 
2209   // Current value and 'MagicConstant' are both integers, so the result of the
2210   // subtraction is always exact according to Sterbenz' lemma.
2211   subtract(MagicConstant, rounding_mode);
2212 
2213   // Restore the input sign.
2214   if (inputSign != isNegative())
2215     changeSign();
2216 
2217   return fs;
2218 }
2219 
2220 
2221 /* Comparison requires normalized numbers.  */
compare(const IEEEFloat & rhs) const2222 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2223   cmpResult result;
2224 
2225   assert(semantics == rhs.semantics);
2226 
2227   switch (PackCategoriesIntoKey(category, rhs.category)) {
2228   default:
2229     llvm_unreachable(nullptr);
2230 
2231   case PackCategoriesIntoKey(fcNaN, fcZero):
2232   case PackCategoriesIntoKey(fcNaN, fcNormal):
2233   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2234   case PackCategoriesIntoKey(fcNaN, fcNaN):
2235   case PackCategoriesIntoKey(fcZero, fcNaN):
2236   case PackCategoriesIntoKey(fcNormal, fcNaN):
2237   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2238     return cmpUnordered;
2239 
2240   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2241   case PackCategoriesIntoKey(fcInfinity, fcZero):
2242   case PackCategoriesIntoKey(fcNormal, fcZero):
2243     if (sign)
2244       return cmpLessThan;
2245     else
2246       return cmpGreaterThan;
2247 
2248   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2249   case PackCategoriesIntoKey(fcZero, fcInfinity):
2250   case PackCategoriesIntoKey(fcZero, fcNormal):
2251     if (rhs.sign)
2252       return cmpGreaterThan;
2253     else
2254       return cmpLessThan;
2255 
2256   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2257     if (sign == rhs.sign)
2258       return cmpEqual;
2259     else if (sign)
2260       return cmpLessThan;
2261     else
2262       return cmpGreaterThan;
2263 
2264   case PackCategoriesIntoKey(fcZero, fcZero):
2265     return cmpEqual;
2266 
2267   case PackCategoriesIntoKey(fcNormal, fcNormal):
2268     break;
2269   }
2270 
2271   /* Two normal numbers.  Do they have the same sign?  */
2272   if (sign != rhs.sign) {
2273     if (sign)
2274       result = cmpLessThan;
2275     else
2276       result = cmpGreaterThan;
2277   } else {
2278     /* Compare absolute values; invert result if negative.  */
2279     result = compareAbsoluteValue(rhs);
2280 
2281     if (sign) {
2282       if (result == cmpLessThan)
2283         result = cmpGreaterThan;
2284       else if (result == cmpGreaterThan)
2285         result = cmpLessThan;
2286     }
2287   }
2288 
2289   return result;
2290 }
2291 
2292 /// IEEEFloat::convert - convert a value of one floating point type to another.
2293 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2294 /// records whether the transformation lost information, i.e. whether
2295 /// converting the result back to the original type will produce the
2296 /// original value (this is almost the same as return value==fsOK, but there
2297 /// are edge cases where this is not so).
2298 
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2299 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2300                                        roundingMode rounding_mode,
2301                                        bool *losesInfo) {
2302   lostFraction lostFraction;
2303   unsigned int newPartCount, oldPartCount;
2304   opStatus fs;
2305   int shift;
2306   const fltSemantics &fromSemantics = *semantics;
2307   bool is_signaling = isSignaling();
2308 
2309   lostFraction = lfExactlyZero;
2310   newPartCount = partCountForBits(toSemantics.precision + 1);
2311   oldPartCount = partCount();
2312   shift = toSemantics.precision - fromSemantics.precision;
2313 
2314   bool X86SpecialNan = false;
2315   if (&fromSemantics == &semX87DoubleExtended &&
2316       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2317       (!(*significandParts() & 0x8000000000000000ULL) ||
2318        !(*significandParts() & 0x4000000000000000ULL))) {
2319     // x86 has some unusual NaNs which cannot be represented in any other
2320     // format; note them here.
2321     X86SpecialNan = true;
2322   }
2323 
2324   // If this is a truncation of a denormal number, and the target semantics
2325   // has larger exponent range than the source semantics (this can happen
2326   // when truncating from PowerPC double-double to double format), the
2327   // right shift could lose result mantissa bits.  Adjust exponent instead
2328   // of performing excessive shift.
2329   // Also do a similar trick in case shifting denormal would produce zero
2330   // significand as this case isn't handled correctly by normalize.
2331   if (shift < 0 && isFiniteNonZero()) {
2332     int omsb = significandMSB() + 1;
2333     int exponentChange = omsb - fromSemantics.precision;
2334     if (exponent + exponentChange < toSemantics.minExponent)
2335       exponentChange = toSemantics.minExponent - exponent;
2336     if (exponentChange < shift)
2337       exponentChange = shift;
2338     if (exponentChange < 0) {
2339       shift -= exponentChange;
2340       exponent += exponentChange;
2341     } else if (omsb <= -shift) {
2342       exponentChange = omsb + shift - 1; // leave at least one bit set
2343       shift -= exponentChange;
2344       exponent += exponentChange;
2345     }
2346   }
2347 
2348   // If this is a truncation, perform the shift before we narrow the storage.
2349   if (shift < 0 && (isFiniteNonZero() ||
2350                     (category == fcNaN && semantics->nonFiniteBehavior !=
2351                                               fltNonfiniteBehavior::NanOnly)))
2352     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2353 
2354   // Fix the storage so it can hold to new value.
2355   if (newPartCount > oldPartCount) {
2356     // The new type requires more storage; make it available.
2357     integerPart *newParts;
2358     newParts = new integerPart[newPartCount];
2359     APInt::tcSet(newParts, 0, newPartCount);
2360     if (isFiniteNonZero() || category==fcNaN)
2361       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2362     freeSignificand();
2363     significand.parts = newParts;
2364   } else if (newPartCount == 1 && oldPartCount != 1) {
2365     // Switch to built-in storage for a single part.
2366     integerPart newPart = 0;
2367     if (isFiniteNonZero() || category==fcNaN)
2368       newPart = significandParts()[0];
2369     freeSignificand();
2370     significand.part = newPart;
2371   }
2372 
2373   // Now that we have the right storage, switch the semantics.
2374   semantics = &toSemantics;
2375 
2376   // If this is an extension, perform the shift now that the storage is
2377   // available.
2378   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2379     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2380 
2381   if (isFiniteNonZero()) {
2382     fs = normalize(rounding_mode, lostFraction);
2383     *losesInfo = (fs != opOK);
2384   } else if (category == fcNaN) {
2385     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2386       *losesInfo =
2387           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2388       makeNaN(false, sign);
2389       return is_signaling ? opInvalidOp : opOK;
2390     }
2391 
2392     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2393 
2394     // For x87 extended precision, we want to make a NaN, not a special NaN if
2395     // the input wasn't special either.
2396     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2397       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2398 
2399     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2400     // This also guarantees that a sNaN does not become Inf on a truncation
2401     // that loses all payload bits.
2402     if (is_signaling) {
2403       makeQuiet();
2404       fs = opInvalidOp;
2405     } else {
2406       fs = opOK;
2407     }
2408   } else if (category == fcInfinity &&
2409              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2410     makeNaN(false, sign);
2411     *losesInfo = true;
2412     fs = opInexact;
2413   } else {
2414     *losesInfo = false;
2415     fs = opOK;
2416   }
2417 
2418   return fs;
2419 }
2420 
2421 /* Convert a floating point number to an integer according to the
2422    rounding mode.  If the rounded integer value is out of range this
2423    returns an invalid operation exception and the contents of the
2424    destination parts are unspecified.  If the rounded value is in
2425    range but the floating point number is not the exact integer, the C
2426    standard doesn't require an inexact exception to be raised.  IEEE
2427    854 does require it so we do that.
2428 
2429    Note that for conversions to integer type the C standard requires
2430    round-to-zero to always be used.  */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2431 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2432     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2433     roundingMode rounding_mode, bool *isExact) const {
2434   lostFraction lost_fraction;
2435   const integerPart *src;
2436   unsigned int dstPartsCount, truncatedBits;
2437 
2438   *isExact = false;
2439 
2440   /* Handle the three special cases first.  */
2441   if (category == fcInfinity || category == fcNaN)
2442     return opInvalidOp;
2443 
2444   dstPartsCount = partCountForBits(width);
2445   assert(dstPartsCount <= parts.size() && "Integer too big");
2446 
2447   if (category == fcZero) {
2448     APInt::tcSet(parts.data(), 0, dstPartsCount);
2449     // Negative zero can't be represented as an int.
2450     *isExact = !sign;
2451     return opOK;
2452   }
2453 
2454   src = significandParts();
2455 
2456   /* Step 1: place our absolute value, with any fraction truncated, in
2457      the destination.  */
2458   if (exponent < 0) {
2459     /* Our absolute value is less than one; truncate everything.  */
2460     APInt::tcSet(parts.data(), 0, dstPartsCount);
2461     /* For exponent -1 the integer bit represents .5, look at that.
2462        For smaller exponents leftmost truncated bit is 0. */
2463     truncatedBits = semantics->precision -1U - exponent;
2464   } else {
2465     /* We want the most significant (exponent + 1) bits; the rest are
2466        truncated.  */
2467     unsigned int bits = exponent + 1U;
2468 
2469     /* Hopelessly large in magnitude?  */
2470     if (bits > width)
2471       return opInvalidOp;
2472 
2473     if (bits < semantics->precision) {
2474       /* We truncate (semantics->precision - bits) bits.  */
2475       truncatedBits = semantics->precision - bits;
2476       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2477     } else {
2478       /* We want at least as many bits as are available.  */
2479       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2480                        0);
2481       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2482                          bits - semantics->precision);
2483       truncatedBits = 0;
2484     }
2485   }
2486 
2487   /* Step 2: work out any lost fraction, and increment the absolute
2488      value if we would round away from zero.  */
2489   if (truncatedBits) {
2490     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2491                                                   truncatedBits);
2492     if (lost_fraction != lfExactlyZero &&
2493         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2494       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2495         return opInvalidOp;     /* Overflow.  */
2496     }
2497   } else {
2498     lost_fraction = lfExactlyZero;
2499   }
2500 
2501   /* Step 3: check if we fit in the destination.  */
2502   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2503 
2504   if (sign) {
2505     if (!isSigned) {
2506       /* Negative numbers cannot be represented as unsigned.  */
2507       if (omsb != 0)
2508         return opInvalidOp;
2509     } else {
2510       /* It takes omsb bits to represent the unsigned integer value.
2511          We lose a bit for the sign, but care is needed as the
2512          maximally negative integer is a special case.  */
2513       if (omsb == width &&
2514           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2515         return opInvalidOp;
2516 
2517       /* This case can happen because of rounding.  */
2518       if (omsb > width)
2519         return opInvalidOp;
2520     }
2521 
2522     APInt::tcNegate (parts.data(), dstPartsCount);
2523   } else {
2524     if (omsb >= width + !isSigned)
2525       return opInvalidOp;
2526   }
2527 
2528   if (lost_fraction == lfExactlyZero) {
2529     *isExact = true;
2530     return opOK;
2531   } else
2532     return opInexact;
2533 }
2534 
2535 /* Same as convertToSignExtendedInteger, except we provide
2536    deterministic values in case of an invalid operation exception,
2537    namely zero for NaNs and the minimal or maximal value respectively
2538    for underflow or overflow.
2539    The *isExact output tells whether the result is exact, in the sense
2540    that converting it back to the original floating point type produces
2541    the original value.  This is almost equivalent to result==opOK,
2542    except for negative zeroes.
2543 */
2544 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2545 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2546                             unsigned int width, bool isSigned,
2547                             roundingMode rounding_mode, bool *isExact) const {
2548   opStatus fs;
2549 
2550   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2551                                     isExact);
2552 
2553   if (fs == opInvalidOp) {
2554     unsigned int bits, dstPartsCount;
2555 
2556     dstPartsCount = partCountForBits(width);
2557     assert(dstPartsCount <= parts.size() && "Integer too big");
2558 
2559     if (category == fcNaN)
2560       bits = 0;
2561     else if (sign)
2562       bits = isSigned;
2563     else
2564       bits = width - isSigned;
2565 
2566     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2567     if (sign && isSigned)
2568       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2569   }
2570 
2571   return fs;
2572 }
2573 
2574 /* Convert an unsigned integer SRC to a floating point number,
2575    rounding according to ROUNDING_MODE.  The sign of the floating
2576    point number is not modified.  */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2577 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2578     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2579   unsigned int omsb, precision, dstCount;
2580   integerPart *dst;
2581   lostFraction lost_fraction;
2582 
2583   category = fcNormal;
2584   omsb = APInt::tcMSB(src, srcCount) + 1;
2585   dst = significandParts();
2586   dstCount = partCount();
2587   precision = semantics->precision;
2588 
2589   /* We want the most significant PRECISION bits of SRC.  There may not
2590      be that many; extract what we can.  */
2591   if (precision <= omsb) {
2592     exponent = omsb - 1;
2593     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2594                                                   omsb - precision);
2595     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2596   } else {
2597     exponent = precision - 1;
2598     lost_fraction = lfExactlyZero;
2599     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2600   }
2601 
2602   return normalize(rounding_mode, lost_fraction);
2603 }
2604 
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2605 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2606                                                 roundingMode rounding_mode) {
2607   unsigned int partCount = Val.getNumWords();
2608   APInt api = Val;
2609 
2610   sign = false;
2611   if (isSigned && api.isNegative()) {
2612     sign = true;
2613     api = -api;
2614   }
2615 
2616   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2617 }
2618 
2619 /* Convert a two's complement integer SRC to a floating point number,
2620    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2621    integer is signed, in which case it must be sign-extended.  */
2622 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2623 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2624                                           unsigned int srcCount, bool isSigned,
2625                                           roundingMode rounding_mode) {
2626   opStatus status;
2627 
2628   if (isSigned &&
2629       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2630     integerPart *copy;
2631 
2632     /* If we're signed and negative negate a copy.  */
2633     sign = true;
2634     copy = new integerPart[srcCount];
2635     APInt::tcAssign(copy, src, srcCount);
2636     APInt::tcNegate(copy, srcCount);
2637     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2638     delete [] copy;
2639   } else {
2640     sign = false;
2641     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2642   }
2643 
2644   return status;
2645 }
2646 
2647 /* FIXME: should this just take a const APInt reference?  */
2648 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2649 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2650                                           unsigned int width, bool isSigned,
2651                                           roundingMode rounding_mode) {
2652   unsigned int partCount = partCountForBits(width);
2653   APInt api = APInt(width, ArrayRef(parts, partCount));
2654 
2655   sign = false;
2656   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2657     sign = true;
2658     api = -api;
2659   }
2660 
2661   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2662 }
2663 
2664 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2665 IEEEFloat::convertFromHexadecimalString(StringRef s,
2666                                         roundingMode rounding_mode) {
2667   lostFraction lost_fraction = lfExactlyZero;
2668 
2669   category = fcNormal;
2670   zeroSignificand();
2671   exponent = 0;
2672 
2673   integerPart *significand = significandParts();
2674   unsigned partsCount = partCount();
2675   unsigned bitPos = partsCount * integerPartWidth;
2676   bool computedTrailingFraction = false;
2677 
2678   // Skip leading zeroes and any (hexa)decimal point.
2679   StringRef::iterator begin = s.begin();
2680   StringRef::iterator end = s.end();
2681   StringRef::iterator dot;
2682   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2683   if (!PtrOrErr)
2684     return PtrOrErr.takeError();
2685   StringRef::iterator p = *PtrOrErr;
2686   StringRef::iterator firstSignificantDigit = p;
2687 
2688   while (p != end) {
2689     integerPart hex_value;
2690 
2691     if (*p == '.') {
2692       if (dot != end)
2693         return createError("String contains multiple dots");
2694       dot = p++;
2695       continue;
2696     }
2697 
2698     hex_value = hexDigitValue(*p);
2699     if (hex_value == -1U)
2700       break;
2701 
2702     p++;
2703 
2704     // Store the number while we have space.
2705     if (bitPos) {
2706       bitPos -= 4;
2707       hex_value <<= bitPos % integerPartWidth;
2708       significand[bitPos / integerPartWidth] |= hex_value;
2709     } else if (!computedTrailingFraction) {
2710       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2711       if (!FractOrErr)
2712         return FractOrErr.takeError();
2713       lost_fraction = *FractOrErr;
2714       computedTrailingFraction = true;
2715     }
2716   }
2717 
2718   /* Hex floats require an exponent but not a hexadecimal point.  */
2719   if (p == end)
2720     return createError("Hex strings require an exponent");
2721   if (*p != 'p' && *p != 'P')
2722     return createError("Invalid character in significand");
2723   if (p == begin)
2724     return createError("Significand has no digits");
2725   if (dot != end && p - begin == 1)
2726     return createError("Significand has no digits");
2727 
2728   /* Ignore the exponent if we are zero.  */
2729   if (p != firstSignificantDigit) {
2730     int expAdjustment;
2731 
2732     /* Implicit hexadecimal point?  */
2733     if (dot == end)
2734       dot = p;
2735 
2736     /* Calculate the exponent adjustment implicit in the number of
2737        significant digits.  */
2738     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2739     if (expAdjustment < 0)
2740       expAdjustment++;
2741     expAdjustment = expAdjustment * 4 - 1;
2742 
2743     /* Adjust for writing the significand starting at the most
2744        significant nibble.  */
2745     expAdjustment += semantics->precision;
2746     expAdjustment -= partsCount * integerPartWidth;
2747 
2748     /* Adjust for the given exponent.  */
2749     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2750     if (!ExpOrErr)
2751       return ExpOrErr.takeError();
2752     exponent = *ExpOrErr;
2753   }
2754 
2755   return normalize(rounding_mode, lost_fraction);
2756 }
2757 
2758 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2759 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2760                                         unsigned sigPartCount, int exp,
2761                                         roundingMode rounding_mode) {
2762   unsigned int parts, pow5PartCount;
2763   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2764   integerPart pow5Parts[maxPowerOfFiveParts];
2765   bool isNearest;
2766 
2767   isNearest = (rounding_mode == rmNearestTiesToEven ||
2768                rounding_mode == rmNearestTiesToAway);
2769 
2770   parts = partCountForBits(semantics->precision + 11);
2771 
2772   /* Calculate pow(5, abs(exp)).  */
2773   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2774 
2775   for (;; parts *= 2) {
2776     opStatus sigStatus, powStatus;
2777     unsigned int excessPrecision, truncatedBits;
2778 
2779     calcSemantics.precision = parts * integerPartWidth - 1;
2780     excessPrecision = calcSemantics.precision - semantics->precision;
2781     truncatedBits = excessPrecision;
2782 
2783     IEEEFloat decSig(calcSemantics, uninitialized);
2784     decSig.makeZero(sign);
2785     IEEEFloat pow5(calcSemantics);
2786 
2787     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2788                                                 rmNearestTiesToEven);
2789     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2790                                               rmNearestTiesToEven);
2791     /* Add exp, as 10^n = 5^n * 2^n.  */
2792     decSig.exponent += exp;
2793 
2794     lostFraction calcLostFraction;
2795     integerPart HUerr, HUdistance;
2796     unsigned int powHUerr;
2797 
2798     if (exp >= 0) {
2799       /* multiplySignificand leaves the precision-th bit set to 1.  */
2800       calcLostFraction = decSig.multiplySignificand(pow5);
2801       powHUerr = powStatus != opOK;
2802     } else {
2803       calcLostFraction = decSig.divideSignificand(pow5);
2804       /* Denormal numbers have less precision.  */
2805       if (decSig.exponent < semantics->minExponent) {
2806         excessPrecision += (semantics->minExponent - decSig.exponent);
2807         truncatedBits = excessPrecision;
2808         if (excessPrecision > calcSemantics.precision)
2809           excessPrecision = calcSemantics.precision;
2810       }
2811       /* Extra half-ulp lost in reciprocal of exponent.  */
2812       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2813     }
2814 
2815     /* Both multiplySignificand and divideSignificand return the
2816        result with the integer bit set.  */
2817     assert(APInt::tcExtractBit
2818            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2819 
2820     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2821                        powHUerr);
2822     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2823                                       excessPrecision, isNearest);
2824 
2825     /* Are we guaranteed to round correctly if we truncate?  */
2826     if (HUdistance >= HUerr) {
2827       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2828                        calcSemantics.precision - excessPrecision,
2829                        excessPrecision);
2830       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2831          above we must adjust our exponent to compensate for the
2832          implicit right shift.  */
2833       exponent = (decSig.exponent + semantics->precision
2834                   - (calcSemantics.precision - excessPrecision));
2835       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2836                                                        decSig.partCount(),
2837                                                        truncatedBits);
2838       return normalize(rounding_mode, calcLostFraction);
2839     }
2840   }
2841 }
2842 
2843 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2844 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2845   decimalInfo D;
2846   opStatus fs;
2847 
2848   /* Scan the text.  */
2849   StringRef::iterator p = str.begin();
2850   if (Error Err = interpretDecimal(p, str.end(), &D))
2851     return std::move(Err);
2852 
2853   /* Handle the quick cases.  First the case of no significant digits,
2854      i.e. zero, and then exponents that are obviously too large or too
2855      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2856      definitely overflows if
2857 
2858            (exp - 1) * L >= maxExponent
2859 
2860      and definitely underflows to zero where
2861 
2862            (exp + 1) * L <= minExponent - precision
2863 
2864      With integer arithmetic the tightest bounds for L are
2865 
2866            93/28 < L < 196/59            [ numerator <= 256 ]
2867            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2868   */
2869 
2870   // Test if we have a zero number allowing for strings with no null terminators
2871   // and zero decimals with non-zero exponents.
2872   //
2873   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2874   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2875   // be at most one dot. On the other hand, if we have a zero with a non-zero
2876   // exponent, then we know that D.firstSigDigit will be non-numeric.
2877   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2878     category = fcZero;
2879     fs = opOK;
2880 
2881   /* Check whether the normalized exponent is high enough to overflow
2882      max during the log-rebasing in the max-exponent check below. */
2883   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2884     fs = handleOverflow(rounding_mode);
2885 
2886   /* If it wasn't, then it also wasn't high enough to overflow max
2887      during the log-rebasing in the min-exponent check.  Check that it
2888      won't overflow min in either check, then perform the min-exponent
2889      check. */
2890   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2891              (D.normalizedExponent + 1) * 28738 <=
2892                8651 * (semantics->minExponent - (int) semantics->precision)) {
2893     /* Underflow to zero and round.  */
2894     category = fcNormal;
2895     zeroSignificand();
2896     fs = normalize(rounding_mode, lfLessThanHalf);
2897 
2898   /* We can finally safely perform the max-exponent check. */
2899   } else if ((D.normalizedExponent - 1) * 42039
2900              >= 12655 * semantics->maxExponent) {
2901     /* Overflow and round.  */
2902     fs = handleOverflow(rounding_mode);
2903   } else {
2904     integerPart *decSignificand;
2905     unsigned int partCount;
2906 
2907     /* A tight upper bound on number of bits required to hold an
2908        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2909        to hold the full significand, and an extra part required by
2910        tcMultiplyPart.  */
2911     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2912     partCount = partCountForBits(1 + 196 * partCount / 59);
2913     decSignificand = new integerPart[partCount + 1];
2914     partCount = 0;
2915 
2916     /* Convert to binary efficiently - we do almost all multiplication
2917        in an integerPart.  When this would overflow do we do a single
2918        bignum multiplication, and then revert again to multiplication
2919        in an integerPart.  */
2920     do {
2921       integerPart decValue, val, multiplier;
2922 
2923       val = 0;
2924       multiplier = 1;
2925 
2926       do {
2927         if (*p == '.') {
2928           p++;
2929           if (p == str.end()) {
2930             break;
2931           }
2932         }
2933         decValue = decDigitValue(*p++);
2934         if (decValue >= 10U) {
2935           delete[] decSignificand;
2936           return createError("Invalid character in significand");
2937         }
2938         multiplier *= 10;
2939         val = val * 10 + decValue;
2940         /* The maximum number that can be multiplied by ten with any
2941            digit added without overflowing an integerPart.  */
2942       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2943 
2944       /* Multiply out the current part.  */
2945       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2946                             partCount, partCount + 1, false);
2947 
2948       /* If we used another part (likely but not guaranteed), increase
2949          the count.  */
2950       if (decSignificand[partCount])
2951         partCount++;
2952     } while (p <= D.lastSigDigit);
2953 
2954     category = fcNormal;
2955     fs = roundSignificandWithExponent(decSignificand, partCount,
2956                                       D.exponent, rounding_mode);
2957 
2958     delete [] decSignificand;
2959   }
2960 
2961   return fs;
2962 }
2963 
convertFromStringSpecials(StringRef str)2964 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2965   const size_t MIN_NAME_SIZE = 3;
2966 
2967   if (str.size() < MIN_NAME_SIZE)
2968     return false;
2969 
2970   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2971     makeInf(false);
2972     return true;
2973   }
2974 
2975   bool IsNegative = str.front() == '-';
2976   if (IsNegative) {
2977     str = str.drop_front();
2978     if (str.size() < MIN_NAME_SIZE)
2979       return false;
2980 
2981     if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2982       makeInf(true);
2983       return true;
2984     }
2985   }
2986 
2987   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2988   bool IsSignaling = str.front() == 's' || str.front() == 'S';
2989   if (IsSignaling) {
2990     str = str.drop_front();
2991     if (str.size() < MIN_NAME_SIZE)
2992       return false;
2993   }
2994 
2995   if (str.startswith("nan") || str.startswith("NaN")) {
2996     str = str.drop_front(3);
2997 
2998     // A NaN without payload.
2999     if (str.empty()) {
3000       makeNaN(IsSignaling, IsNegative);
3001       return true;
3002     }
3003 
3004     // Allow the payload to be inside parentheses.
3005     if (str.front() == '(') {
3006       // Parentheses should be balanced (and not empty).
3007       if (str.size() <= 2 || str.back() != ')')
3008         return false;
3009 
3010       str = str.slice(1, str.size() - 1);
3011     }
3012 
3013     // Determine the payload number's radix.
3014     unsigned Radix = 10;
3015     if (str[0] == '0') {
3016       if (str.size() > 1 && tolower(str[1]) == 'x') {
3017         str = str.drop_front(2);
3018         Radix = 16;
3019       } else
3020         Radix = 8;
3021     }
3022 
3023     // Parse the payload and make the NaN.
3024     APInt Payload;
3025     if (!str.getAsInteger(Radix, Payload)) {
3026       makeNaN(IsSignaling, IsNegative, &Payload);
3027       return true;
3028     }
3029   }
3030 
3031   return false;
3032 }
3033 
3034 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)3035 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3036   if (str.empty())
3037     return createError("Invalid string length");
3038 
3039   // Handle special cases.
3040   if (convertFromStringSpecials(str))
3041     return opOK;
3042 
3043   /* Handle a leading minus sign.  */
3044   StringRef::iterator p = str.begin();
3045   size_t slen = str.size();
3046   sign = *p == '-' ? 1 : 0;
3047   if (*p == '-' || *p == '+') {
3048     p++;
3049     slen--;
3050     if (!slen)
3051       return createError("String has no digits");
3052   }
3053 
3054   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3055     if (slen == 2)
3056       return createError("Invalid string");
3057     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3058                                         rounding_mode);
3059   }
3060 
3061   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3062 }
3063 
3064 /* Write out a hexadecimal representation of the floating point value
3065    to DST, which must be of sufficient size, in the C99 form
3066    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3067    excluding the terminating NUL.
3068 
3069    If UPPERCASE, the output is in upper case, otherwise in lower case.
3070 
3071    HEXDIGITS digits appear altogether, rounding the value if
3072    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3073    number precisely is used instead.  If nothing would appear after
3074    the decimal point it is suppressed.
3075 
3076    The decimal exponent is always printed and has at least one digit.
3077    Zero values display an exponent of zero.  Infinities and NaNs
3078    appear as "infinity" or "nan" respectively.
3079 
3080    The above rules are as specified by C99.  There is ambiguity about
3081    what the leading hexadecimal digit should be.  This implementation
3082    uses whatever is necessary so that the exponent is displayed as
3083    stored.  This implies the exponent will fall within the IEEE format
3084    range, and the leading hexadecimal digit will be 0 (for denormals),
3085    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3086    any other digits zero).
3087 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3088 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3089                                            bool upperCase,
3090                                            roundingMode rounding_mode) const {
3091   char *p;
3092 
3093   p = dst;
3094   if (sign)
3095     *dst++ = '-';
3096 
3097   switch (category) {
3098   case fcInfinity:
3099     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3100     dst += sizeof infinityL - 1;
3101     break;
3102 
3103   case fcNaN:
3104     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3105     dst += sizeof NaNU - 1;
3106     break;
3107 
3108   case fcZero:
3109     *dst++ = '0';
3110     *dst++ = upperCase ? 'X': 'x';
3111     *dst++ = '0';
3112     if (hexDigits > 1) {
3113       *dst++ = '.';
3114       memset (dst, '0', hexDigits - 1);
3115       dst += hexDigits - 1;
3116     }
3117     *dst++ = upperCase ? 'P': 'p';
3118     *dst++ = '0';
3119     break;
3120 
3121   case fcNormal:
3122     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3123     break;
3124   }
3125 
3126   *dst = 0;
3127 
3128   return static_cast<unsigned int>(dst - p);
3129 }
3130 
3131 /* Does the hard work of outputting the correctly rounded hexadecimal
3132    form of a normal floating point number with the specified number of
3133    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3134    digits necessary to print the value precisely is output.  */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3135 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3136                                           bool upperCase,
3137                                           roundingMode rounding_mode) const {
3138   unsigned int count, valueBits, shift, partsCount, outputDigits;
3139   const char *hexDigitChars;
3140   const integerPart *significand;
3141   char *p;
3142   bool roundUp;
3143 
3144   *dst++ = '0';
3145   *dst++ = upperCase ? 'X': 'x';
3146 
3147   roundUp = false;
3148   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3149 
3150   significand = significandParts();
3151   partsCount = partCount();
3152 
3153   /* +3 because the first digit only uses the single integer bit, so
3154      we have 3 virtual zero most-significant-bits.  */
3155   valueBits = semantics->precision + 3;
3156   shift = integerPartWidth - valueBits % integerPartWidth;
3157 
3158   /* The natural number of digits required ignoring trailing
3159      insignificant zeroes.  */
3160   outputDigits = (valueBits - significandLSB () + 3) / 4;
3161 
3162   /* hexDigits of zero means use the required number for the
3163      precision.  Otherwise, see if we are truncating.  If we are,
3164      find out if we need to round away from zero.  */
3165   if (hexDigits) {
3166     if (hexDigits < outputDigits) {
3167       /* We are dropping non-zero bits, so need to check how to round.
3168          "bits" is the number of dropped bits.  */
3169       unsigned int bits;
3170       lostFraction fraction;
3171 
3172       bits = valueBits - hexDigits * 4;
3173       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3174       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3175     }
3176     outputDigits = hexDigits;
3177   }
3178 
3179   /* Write the digits consecutively, and start writing in the location
3180      of the hexadecimal point.  We move the most significant digit
3181      left and add the hexadecimal point later.  */
3182   p = ++dst;
3183 
3184   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3185 
3186   while (outputDigits && count) {
3187     integerPart part;
3188 
3189     /* Put the most significant integerPartWidth bits in "part".  */
3190     if (--count == partsCount)
3191       part = 0;  /* An imaginary higher zero part.  */
3192     else
3193       part = significand[count] << shift;
3194 
3195     if (count && shift)
3196       part |= significand[count - 1] >> (integerPartWidth - shift);
3197 
3198     /* Convert as much of "part" to hexdigits as we can.  */
3199     unsigned int curDigits = integerPartWidth / 4;
3200 
3201     if (curDigits > outputDigits)
3202       curDigits = outputDigits;
3203     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3204     outputDigits -= curDigits;
3205   }
3206 
3207   if (roundUp) {
3208     char *q = dst;
3209 
3210     /* Note that hexDigitChars has a trailing '0'.  */
3211     do {
3212       q--;
3213       *q = hexDigitChars[hexDigitValue (*q) + 1];
3214     } while (*q == '0');
3215     assert(q >= p);
3216   } else {
3217     /* Add trailing zeroes.  */
3218     memset (dst, '0', outputDigits);
3219     dst += outputDigits;
3220   }
3221 
3222   /* Move the most significant digit to before the point, and if there
3223      is something after the decimal point add it.  This must come
3224      after rounding above.  */
3225   p[-1] = p[0];
3226   if (dst -1 == p)
3227     dst--;
3228   else
3229     p[0] = '.';
3230 
3231   /* Finally output the exponent.  */
3232   *dst++ = upperCase ? 'P': 'p';
3233 
3234   return writeSignedDecimal (dst, exponent);
3235 }
3236 
hash_value(const IEEEFloat & Arg)3237 hash_code hash_value(const IEEEFloat &Arg) {
3238   if (!Arg.isFiniteNonZero())
3239     return hash_combine((uint8_t)Arg.category,
3240                         // NaN has no sign, fix it at zero.
3241                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3242                         Arg.semantics->precision);
3243 
3244   // Normal floats need their exponent and significand hashed.
3245   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3246                       Arg.semantics->precision, Arg.exponent,
3247                       hash_combine_range(
3248                         Arg.significandParts(),
3249                         Arg.significandParts() + Arg.partCount()));
3250 }
3251 
3252 // Conversion from APFloat to/from host float/double.  It may eventually be
3253 // possible to eliminate these and have everybody deal with APFloats, but that
3254 // will take a while.  This approach will not easily extend to long double.
3255 // Current implementation requires integerPartWidth==64, which is correct at
3256 // the moment but could be made more general.
3257 
3258 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3259 // the actual IEEE respresentations.  We compensate for that here.
3260 
convertF80LongDoubleAPFloatToAPInt() const3261 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3262   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3263   assert(partCount()==2);
3264 
3265   uint64_t myexponent, mysignificand;
3266 
3267   if (isFiniteNonZero()) {
3268     myexponent = exponent+16383; //bias
3269     mysignificand = significandParts()[0];
3270     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3271       myexponent = 0;   // denormal
3272   } else if (category==fcZero) {
3273     myexponent = 0;
3274     mysignificand = 0;
3275   } else if (category==fcInfinity) {
3276     myexponent = 0x7fff;
3277     mysignificand = 0x8000000000000000ULL;
3278   } else {
3279     assert(category == fcNaN && "Unknown category");
3280     myexponent = 0x7fff;
3281     mysignificand = significandParts()[0];
3282   }
3283 
3284   uint64_t words[2];
3285   words[0] = mysignificand;
3286   words[1] =  ((uint64_t)(sign & 1) << 15) |
3287               (myexponent & 0x7fffLL);
3288   return APInt(80, words);
3289 }
3290 
convertPPCDoubleDoubleAPFloatToAPInt() const3291 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3292   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3293   assert(partCount()==2);
3294 
3295   uint64_t words[2];
3296   opStatus fs;
3297   bool losesInfo;
3298 
3299   // Convert number to double.  To avoid spurious underflows, we re-
3300   // normalize against the "double" minExponent first, and only *then*
3301   // truncate the mantissa.  The result of that second conversion
3302   // may be inexact, but should never underflow.
3303   // Declare fltSemantics before APFloat that uses it (and
3304   // saves pointer to it) to ensure correct destruction order.
3305   fltSemantics extendedSemantics = *semantics;
3306   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3307   IEEEFloat extended(*this);
3308   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3309   assert(fs == opOK && !losesInfo);
3310   (void)fs;
3311 
3312   IEEEFloat u(extended);
3313   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3314   assert(fs == opOK || fs == opInexact);
3315   (void)fs;
3316   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3317 
3318   // If conversion was exact or resulted in a special case, we're done;
3319   // just set the second double to zero.  Otherwise, re-convert back to
3320   // the extended format and compute the difference.  This now should
3321   // convert exactly to double.
3322   if (u.isFiniteNonZero() && losesInfo) {
3323     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3324     assert(fs == opOK && !losesInfo);
3325     (void)fs;
3326 
3327     IEEEFloat v(extended);
3328     v.subtract(u, rmNearestTiesToEven);
3329     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3330     assert(fs == opOK && !losesInfo);
3331     (void)fs;
3332     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3333   } else {
3334     words[1] = 0;
3335   }
3336 
3337   return APInt(128, words);
3338 }
3339 
convertQuadrupleAPFloatToAPInt() const3340 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3341   assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3342   assert(partCount()==2);
3343 
3344   uint64_t myexponent, mysignificand, mysignificand2;
3345 
3346   if (isFiniteNonZero()) {
3347     myexponent = exponent+16383; //bias
3348     mysignificand = significandParts()[0];
3349     mysignificand2 = significandParts()[1];
3350     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3351       myexponent = 0;   // denormal
3352   } else if (category==fcZero) {
3353     myexponent = 0;
3354     mysignificand = mysignificand2 = 0;
3355   } else if (category==fcInfinity) {
3356     myexponent = 0x7fff;
3357     mysignificand = mysignificand2 = 0;
3358   } else {
3359     assert(category == fcNaN && "Unknown category!");
3360     myexponent = 0x7fff;
3361     mysignificand = significandParts()[0];
3362     mysignificand2 = significandParts()[1];
3363   }
3364 
3365   uint64_t words[2];
3366   words[0] = mysignificand;
3367   words[1] = ((uint64_t)(sign & 1) << 63) |
3368              ((myexponent & 0x7fff) << 48) |
3369              (mysignificand2 & 0xffffffffffffLL);
3370 
3371   return APInt(128, words);
3372 }
3373 
convertDoubleAPFloatToAPInt() const3374 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3375   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3376   assert(partCount()==1);
3377 
3378   uint64_t myexponent, mysignificand;
3379 
3380   if (isFiniteNonZero()) {
3381     myexponent = exponent+1023; //bias
3382     mysignificand = *significandParts();
3383     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3384       myexponent = 0;   // denormal
3385   } else if (category==fcZero) {
3386     myexponent = 0;
3387     mysignificand = 0;
3388   } else if (category==fcInfinity) {
3389     myexponent = 0x7ff;
3390     mysignificand = 0;
3391   } else {
3392     assert(category == fcNaN && "Unknown category!");
3393     myexponent = 0x7ff;
3394     mysignificand = *significandParts();
3395   }
3396 
3397   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3398                      ((myexponent & 0x7ff) <<  52) |
3399                      (mysignificand & 0xfffffffffffffLL))));
3400 }
3401 
convertFloatAPFloatToAPInt() const3402 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3403   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3404   assert(partCount()==1);
3405 
3406   uint32_t myexponent, mysignificand;
3407 
3408   if (isFiniteNonZero()) {
3409     myexponent = exponent+127; //bias
3410     mysignificand = (uint32_t)*significandParts();
3411     if (myexponent == 1 && !(mysignificand & 0x800000))
3412       myexponent = 0;   // denormal
3413   } else if (category==fcZero) {
3414     myexponent = 0;
3415     mysignificand = 0;
3416   } else if (category==fcInfinity) {
3417     myexponent = 0xff;
3418     mysignificand = 0;
3419   } else {
3420     assert(category == fcNaN && "Unknown category!");
3421     myexponent = 0xff;
3422     mysignificand = (uint32_t)*significandParts();
3423   }
3424 
3425   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3426                     (mysignificand & 0x7fffff)));
3427 }
3428 
convertBFloatAPFloatToAPInt() const3429 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3430   assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3431   assert(partCount() == 1);
3432 
3433   uint32_t myexponent, mysignificand;
3434 
3435   if (isFiniteNonZero()) {
3436     myexponent = exponent + 127; // bias
3437     mysignificand = (uint32_t)*significandParts();
3438     if (myexponent == 1 && !(mysignificand & 0x80))
3439       myexponent = 0; // denormal
3440   } else if (category == fcZero) {
3441     myexponent = 0;
3442     mysignificand = 0;
3443   } else if (category == fcInfinity) {
3444     myexponent = 0xff;
3445     mysignificand = 0;
3446   } else {
3447     assert(category == fcNaN && "Unknown category!");
3448     myexponent = 0xff;
3449     mysignificand = (uint32_t)*significandParts();
3450   }
3451 
3452   return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3453                     (mysignificand & 0x7f)));
3454 }
3455 
convertHalfAPFloatToAPInt() const3456 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3457   assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3458   assert(partCount()==1);
3459 
3460   uint32_t myexponent, mysignificand;
3461 
3462   if (isFiniteNonZero()) {
3463     myexponent = exponent+15; //bias
3464     mysignificand = (uint32_t)*significandParts();
3465     if (myexponent == 1 && !(mysignificand & 0x400))
3466       myexponent = 0;   // denormal
3467   } else if (category==fcZero) {
3468     myexponent = 0;
3469     mysignificand = 0;
3470   } else if (category==fcInfinity) {
3471     myexponent = 0x1f;
3472     mysignificand = 0;
3473   } else {
3474     assert(category == fcNaN && "Unknown category!");
3475     myexponent = 0x1f;
3476     mysignificand = (uint32_t)*significandParts();
3477   }
3478 
3479   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3480                     (mysignificand & 0x3ff)));
3481 }
3482 
convertFloat8E5M2APFloatToAPInt() const3483 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3484   assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);
3485   assert(partCount() == 1);
3486 
3487   uint32_t myexponent, mysignificand;
3488 
3489   if (isFiniteNonZero()) {
3490     myexponent = exponent + 15; // bias
3491     mysignificand = (uint32_t)*significandParts();
3492     if (myexponent == 1 && !(mysignificand & 0x4))
3493       myexponent = 0; // denormal
3494   } else if (category == fcZero) {
3495     myexponent = 0;
3496     mysignificand = 0;
3497   } else if (category == fcInfinity) {
3498     myexponent = 0x1f;
3499     mysignificand = 0;
3500   } else {
3501     assert(category == fcNaN && "Unknown category!");
3502     myexponent = 0x1f;
3503     mysignificand = (uint32_t)*significandParts();
3504   }
3505 
3506   return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
3507                    (mysignificand & 0x3)));
3508 }
3509 
convertFloat8E4M3FNAPFloatToAPInt() const3510 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3511   assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
3512   assert(partCount() == 1);
3513 
3514   uint32_t myexponent, mysignificand;
3515 
3516   if (isFiniteNonZero()) {
3517     myexponent = exponent + 7; // bias
3518     mysignificand = (uint32_t)*significandParts();
3519     if (myexponent == 1 && !(mysignificand & 0x8))
3520       myexponent = 0; // denormal
3521   } else if (category == fcZero) {
3522     myexponent = 0;
3523     mysignificand = 0;
3524   } else if (category == fcInfinity) {
3525     myexponent = 0xf;
3526     mysignificand = 0;
3527   } else {
3528     assert(category == fcNaN && "Unknown category!");
3529     myexponent = 0xf;
3530     mysignificand = (uint32_t)*significandParts();
3531   }
3532 
3533   return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
3534                    (mysignificand & 0x7)));
3535 }
3536 
3537 // This function creates an APInt that is just a bit map of the floating
3538 // point constant as it would appear in memory.  It is not a conversion,
3539 // and treating the result as a normal integer is unlikely to be useful.
3540 
bitcastToAPInt() const3541 APInt IEEEFloat::bitcastToAPInt() const {
3542   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3543     return convertHalfAPFloatToAPInt();
3544 
3545   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3546     return convertBFloatAPFloatToAPInt();
3547 
3548   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3549     return convertFloatAPFloatToAPInt();
3550 
3551   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3552     return convertDoubleAPFloatToAPInt();
3553 
3554   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3555     return convertQuadrupleAPFloatToAPInt();
3556 
3557   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3558     return convertPPCDoubleDoubleAPFloatToAPInt();
3559 
3560   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3561     return convertFloat8E5M2APFloatToAPInt();
3562 
3563   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3564     return convertFloat8E4M3FNAPFloatToAPInt();
3565 
3566   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3567          "unknown format!");
3568   return convertF80LongDoubleAPFloatToAPInt();
3569 }
3570 
convertToFloat() const3571 float IEEEFloat::convertToFloat() const {
3572   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3573          "Float semantics are not IEEEsingle");
3574   APInt api = bitcastToAPInt();
3575   return api.bitsToFloat();
3576 }
3577 
convertToDouble() const3578 double IEEEFloat::convertToDouble() const {
3579   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3580          "Float semantics are not IEEEdouble");
3581   APInt api = bitcastToAPInt();
3582   return api.bitsToDouble();
3583 }
3584 
3585 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3586 /// does not support these bit patterns:
3587 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3588 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3589 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3590 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3591 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3592 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3593   uint64_t i1 = api.getRawData()[0];
3594   uint64_t i2 = api.getRawData()[1];
3595   uint64_t myexponent = (i2 & 0x7fff);
3596   uint64_t mysignificand = i1;
3597   uint8_t myintegerbit = mysignificand >> 63;
3598 
3599   initialize(&semX87DoubleExtended);
3600   assert(partCount()==2);
3601 
3602   sign = static_cast<unsigned int>(i2>>15);
3603   if (myexponent == 0 && mysignificand == 0) {
3604     makeZero(sign);
3605   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3606     makeInf(sign);
3607   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3608              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3609     category = fcNaN;
3610     exponent = exponentNaN();
3611     significandParts()[0] = mysignificand;
3612     significandParts()[1] = 0;
3613   } else {
3614     category = fcNormal;
3615     exponent = myexponent - 16383;
3616     significandParts()[0] = mysignificand;
3617     significandParts()[1] = 0;
3618     if (myexponent==0)          // denormal
3619       exponent = -16382;
3620   }
3621 }
3622 
initFromPPCDoubleDoubleAPInt(const APInt & api)3623 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3624   uint64_t i1 = api.getRawData()[0];
3625   uint64_t i2 = api.getRawData()[1];
3626   opStatus fs;
3627   bool losesInfo;
3628 
3629   // Get the first double and convert to our format.
3630   initFromDoubleAPInt(APInt(64, i1));
3631   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3632   assert(fs == opOK && !losesInfo);
3633   (void)fs;
3634 
3635   // Unless we have a special case, add in second double.
3636   if (isFiniteNonZero()) {
3637     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3638     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3639     assert(fs == opOK && !losesInfo);
3640     (void)fs;
3641 
3642     add(v, rmNearestTiesToEven);
3643   }
3644 }
3645 
initFromQuadrupleAPInt(const APInt & api)3646 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3647   uint64_t i1 = api.getRawData()[0];
3648   uint64_t i2 = api.getRawData()[1];
3649   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3650   uint64_t mysignificand  = i1;
3651   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3652 
3653   initialize(&semIEEEquad);
3654   assert(partCount()==2);
3655 
3656   sign = static_cast<unsigned int>(i2>>63);
3657   if (myexponent==0 &&
3658       (mysignificand==0 && mysignificand2==0)) {
3659     makeZero(sign);
3660   } else if (myexponent==0x7fff &&
3661              (mysignificand==0 && mysignificand2==0)) {
3662     makeInf(sign);
3663   } else if (myexponent==0x7fff &&
3664              (mysignificand!=0 || mysignificand2 !=0)) {
3665     category = fcNaN;
3666     exponent = exponentNaN();
3667     significandParts()[0] = mysignificand;
3668     significandParts()[1] = mysignificand2;
3669   } else {
3670     category = fcNormal;
3671     exponent = myexponent - 16383;
3672     significandParts()[0] = mysignificand;
3673     significandParts()[1] = mysignificand2;
3674     if (myexponent==0)          // denormal
3675       exponent = -16382;
3676     else
3677       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3678   }
3679 }
3680 
initFromDoubleAPInt(const APInt & api)3681 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3682   uint64_t i = *api.getRawData();
3683   uint64_t myexponent = (i >> 52) & 0x7ff;
3684   uint64_t mysignificand = i & 0xfffffffffffffLL;
3685 
3686   initialize(&semIEEEdouble);
3687   assert(partCount()==1);
3688 
3689   sign = static_cast<unsigned int>(i>>63);
3690   if (myexponent==0 && mysignificand==0) {
3691     makeZero(sign);
3692   } else if (myexponent==0x7ff && mysignificand==0) {
3693     makeInf(sign);
3694   } else if (myexponent==0x7ff && mysignificand!=0) {
3695     category = fcNaN;
3696     exponent = exponentNaN();
3697     *significandParts() = mysignificand;
3698   } else {
3699     category = fcNormal;
3700     exponent = myexponent - 1023;
3701     *significandParts() = mysignificand;
3702     if (myexponent==0)          // denormal
3703       exponent = -1022;
3704     else
3705       *significandParts() |= 0x10000000000000LL;  // integer bit
3706   }
3707 }
3708 
initFromFloatAPInt(const APInt & api)3709 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3710   uint32_t i = (uint32_t)*api.getRawData();
3711   uint32_t myexponent = (i >> 23) & 0xff;
3712   uint32_t mysignificand = i & 0x7fffff;
3713 
3714   initialize(&semIEEEsingle);
3715   assert(partCount()==1);
3716 
3717   sign = i >> 31;
3718   if (myexponent==0 && mysignificand==0) {
3719     makeZero(sign);
3720   } else if (myexponent==0xff && mysignificand==0) {
3721     makeInf(sign);
3722   } else if (myexponent==0xff && mysignificand!=0) {
3723     category = fcNaN;
3724     exponent = exponentNaN();
3725     *significandParts() = mysignificand;
3726   } else {
3727     category = fcNormal;
3728     exponent = myexponent - 127;  //bias
3729     *significandParts() = mysignificand;
3730     if (myexponent==0)    // denormal
3731       exponent = -126;
3732     else
3733       *significandParts() |= 0x800000; // integer bit
3734   }
3735 }
3736 
initFromBFloatAPInt(const APInt & api)3737 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3738   uint32_t i = (uint32_t)*api.getRawData();
3739   uint32_t myexponent = (i >> 7) & 0xff;
3740   uint32_t mysignificand = i & 0x7f;
3741 
3742   initialize(&semBFloat);
3743   assert(partCount() == 1);
3744 
3745   sign = i >> 15;
3746   if (myexponent == 0 && mysignificand == 0) {
3747     makeZero(sign);
3748   } else if (myexponent == 0xff && mysignificand == 0) {
3749     makeInf(sign);
3750   } else if (myexponent == 0xff && mysignificand != 0) {
3751     category = fcNaN;
3752     exponent = exponentNaN();
3753     *significandParts() = mysignificand;
3754   } else {
3755     category = fcNormal;
3756     exponent = myexponent - 127; // bias
3757     *significandParts() = mysignificand;
3758     if (myexponent == 0) // denormal
3759       exponent = -126;
3760     else
3761       *significandParts() |= 0x80; // integer bit
3762   }
3763 }
3764 
initFromHalfAPInt(const APInt & api)3765 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3766   uint32_t i = (uint32_t)*api.getRawData();
3767   uint32_t myexponent = (i >> 10) & 0x1f;
3768   uint32_t mysignificand = i & 0x3ff;
3769 
3770   initialize(&semIEEEhalf);
3771   assert(partCount()==1);
3772 
3773   sign = i >> 15;
3774   if (myexponent==0 && mysignificand==0) {
3775     makeZero(sign);
3776   } else if (myexponent==0x1f && mysignificand==0) {
3777     makeInf(sign);
3778   } else if (myexponent==0x1f && mysignificand!=0) {
3779     category = fcNaN;
3780     exponent = exponentNaN();
3781     *significandParts() = mysignificand;
3782   } else {
3783     category = fcNormal;
3784     exponent = myexponent - 15;  //bias
3785     *significandParts() = mysignificand;
3786     if (myexponent==0)    // denormal
3787       exponent = -14;
3788     else
3789       *significandParts() |= 0x400; // integer bit
3790   }
3791 }
3792 
initFromFloat8E5M2APInt(const APInt & api)3793 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3794   uint32_t i = (uint32_t)*api.getRawData();
3795   uint32_t myexponent = (i >> 2) & 0x1f;
3796   uint32_t mysignificand = i & 0x3;
3797 
3798   initialize(&semFloat8E5M2);
3799   assert(partCount() == 1);
3800 
3801   sign = i >> 7;
3802   if (myexponent == 0 && mysignificand == 0) {
3803     makeZero(sign);
3804   } else if (myexponent == 0x1f && mysignificand == 0) {
3805     makeInf(sign);
3806   } else if (myexponent == 0x1f && mysignificand != 0) {
3807     category = fcNaN;
3808     exponent = exponentNaN();
3809     *significandParts() = mysignificand;
3810   } else {
3811     category = fcNormal;
3812     exponent = myexponent - 15; // bias
3813     *significandParts() = mysignificand;
3814     if (myexponent == 0) // denormal
3815       exponent = -14;
3816     else
3817       *significandParts() |= 0x4; // integer bit
3818   }
3819 }
3820 
initFromFloat8E4M3FNAPInt(const APInt & api)3821 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3822   uint32_t i = (uint32_t)*api.getRawData();
3823   uint32_t myexponent = (i >> 3) & 0xf;
3824   uint32_t mysignificand = i & 0x7;
3825 
3826   initialize(&semFloat8E4M3FN);
3827   assert(partCount() == 1);
3828 
3829   sign = i >> 7;
3830   if (myexponent == 0 && mysignificand == 0) {
3831     makeZero(sign);
3832   } else if (myexponent == 0xf && mysignificand == 7) {
3833     category = fcNaN;
3834     exponent = exponentNaN();
3835     *significandParts() = mysignificand;
3836   } else {
3837     category = fcNormal;
3838     exponent = myexponent - 7; // bias
3839     *significandParts() = mysignificand;
3840     if (myexponent == 0) // denormal
3841       exponent = -6;
3842     else
3843       *significandParts() |= 0x8; // integer bit
3844   }
3845 }
3846 
3847 /// Treat api as containing the bits of a floating point number.
initFromAPInt(const fltSemantics * Sem,const APInt & api)3848 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3849   assert(api.getBitWidth() == Sem->sizeInBits);
3850   if (Sem == &semIEEEhalf)
3851     return initFromHalfAPInt(api);
3852   if (Sem == &semBFloat)
3853     return initFromBFloatAPInt(api);
3854   if (Sem == &semIEEEsingle)
3855     return initFromFloatAPInt(api);
3856   if (Sem == &semIEEEdouble)
3857     return initFromDoubleAPInt(api);
3858   if (Sem == &semX87DoubleExtended)
3859     return initFromF80LongDoubleAPInt(api);
3860   if (Sem == &semIEEEquad)
3861     return initFromQuadrupleAPInt(api);
3862   if (Sem == &semPPCDoubleDoubleLegacy)
3863     return initFromPPCDoubleDoubleAPInt(api);
3864   if (Sem == &semFloat8E5M2)
3865     return initFromFloat8E5M2APInt(api);
3866   if (Sem == &semFloat8E4M3FN)
3867     return initFromFloat8E4M3FNAPInt(api);
3868 
3869   llvm_unreachable(nullptr);
3870 }
3871 
3872 /// Make this number the largest magnitude normal number in the given
3873 /// semantics.
makeLargest(bool Negative)3874 void IEEEFloat::makeLargest(bool Negative) {
3875   // We want (in interchange format):
3876   //   sign = {Negative}
3877   //   exponent = 1..10
3878   //   significand = 1..1
3879   category = fcNormal;
3880   sign = Negative;
3881   exponent = semantics->maxExponent;
3882 
3883   // Use memset to set all but the highest integerPart to all ones.
3884   integerPart *significand = significandParts();
3885   unsigned PartCount = partCount();
3886   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3887 
3888   // Set the high integerPart especially setting all unused top bits for
3889   // internal consistency.
3890   const unsigned NumUnusedHighBits =
3891     PartCount*integerPartWidth - semantics->precision;
3892   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3893                                    ? (~integerPart(0) >> NumUnusedHighBits)
3894                                    : 0;
3895 
3896   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
3897     significand[0] &= ~integerPart(1);
3898 }
3899 
3900 /// Make this number the smallest magnitude denormal number in the given
3901 /// semantics.
makeSmallest(bool Negative)3902 void IEEEFloat::makeSmallest(bool Negative) {
3903   // We want (in interchange format):
3904   //   sign = {Negative}
3905   //   exponent = 0..0
3906   //   significand = 0..01
3907   category = fcNormal;
3908   sign = Negative;
3909   exponent = semantics->minExponent;
3910   APInt::tcSet(significandParts(), 1, partCount());
3911 }
3912 
makeSmallestNormalized(bool Negative)3913 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3914   // We want (in interchange format):
3915   //   sign = {Negative}
3916   //   exponent = 0..0
3917   //   significand = 10..0
3918 
3919   category = fcNormal;
3920   zeroSignificand();
3921   sign = Negative;
3922   exponent = semantics->minExponent;
3923   APInt::tcSetBit(significandParts(), semantics->precision - 1);
3924 }
3925 
IEEEFloat(const fltSemantics & Sem,const APInt & API)3926 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3927   initFromAPInt(&Sem, API);
3928 }
3929 
IEEEFloat(float f)3930 IEEEFloat::IEEEFloat(float f) {
3931   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3932 }
3933 
IEEEFloat(double d)3934 IEEEFloat::IEEEFloat(double d) {
3935   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3936 }
3937 
3938 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3939   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3940     Buffer.append(Str.begin(), Str.end());
3941   }
3942 
3943   /// Removes data from the given significand until it is no more
3944   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3945   void AdjustToPrecision(APInt &significand,
3946                          int &exp, unsigned FormatPrecision) {
3947     unsigned bits = significand.getActiveBits();
3948 
3949     // 196/59 is a very slight overestimate of lg_2(10).
3950     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3951 
3952     if (bits <= bitsRequired) return;
3953 
3954     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3955     if (!tensRemovable) return;
3956 
3957     exp += tensRemovable;
3958 
3959     APInt divisor(significand.getBitWidth(), 1);
3960     APInt powten(significand.getBitWidth(), 10);
3961     while (true) {
3962       if (tensRemovable & 1)
3963         divisor *= powten;
3964       tensRemovable >>= 1;
3965       if (!tensRemovable) break;
3966       powten *= powten;
3967     }
3968 
3969     significand = significand.udiv(divisor);
3970 
3971     // Truncate the significand down to its active bit count.
3972     significand = significand.trunc(significand.getActiveBits());
3973   }
3974 
3975 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3976   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3977                          int &exp, unsigned FormatPrecision) {
3978     unsigned N = buffer.size();
3979     if (N <= FormatPrecision) return;
3980 
3981     // The most significant figures are the last ones in the buffer.
3982     unsigned FirstSignificant = N - FormatPrecision;
3983 
3984     // Round.
3985     // FIXME: this probably shouldn't use 'round half up'.
3986 
3987     // Rounding down is just a truncation, except we also want to drop
3988     // trailing zeros from the new result.
3989     if (buffer[FirstSignificant - 1] < '5') {
3990       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3991         FirstSignificant++;
3992 
3993       exp += FirstSignificant;
3994       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3995       return;
3996     }
3997 
3998     // Rounding up requires a decimal add-with-carry.  If we continue
3999     // the carry, the newly-introduced zeros will just be truncated.
4000     for (unsigned I = FirstSignificant; I != N; ++I) {
4001       if (buffer[I] == '9') {
4002         FirstSignificant++;
4003       } else {
4004         buffer[I]++;
4005         break;
4006       }
4007     }
4008 
4009     // If we carried through, we have exactly one digit of precision.
4010     if (FirstSignificant == N) {
4011       exp += FirstSignificant;
4012       buffer.clear();
4013       buffer.push_back('1');
4014       return;
4015     }
4016 
4017     exp += FirstSignificant;
4018     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4019   }
4020 } // namespace
4021 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4022 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4023                          unsigned FormatMaxPadding, bool TruncateZero) const {
4024   switch (category) {
4025   case fcInfinity:
4026     if (isNegative())
4027       return append(Str, "-Inf");
4028     else
4029       return append(Str, "+Inf");
4030 
4031   case fcNaN: return append(Str, "NaN");
4032 
4033   case fcZero:
4034     if (isNegative())
4035       Str.push_back('-');
4036 
4037     if (!FormatMaxPadding) {
4038       if (TruncateZero)
4039         append(Str, "0.0E+0");
4040       else {
4041         append(Str, "0.0");
4042         if (FormatPrecision > 1)
4043           Str.append(FormatPrecision - 1, '0');
4044         append(Str, "e+00");
4045       }
4046     } else
4047       Str.push_back('0');
4048     return;
4049 
4050   case fcNormal:
4051     break;
4052   }
4053 
4054   if (isNegative())
4055     Str.push_back('-');
4056 
4057   // Decompose the number into an APInt and an exponent.
4058   int exp = exponent - ((int) semantics->precision - 1);
4059   APInt significand(
4060       semantics->precision,
4061       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4062 
4063   // Set FormatPrecision if zero.  We want to do this before we
4064   // truncate trailing zeros, as those are part of the precision.
4065   if (!FormatPrecision) {
4066     // We use enough digits so the number can be round-tripped back to an
4067     // APFloat. The formula comes from "How to Print Floating-Point Numbers
4068     // Accurately" by Steele and White.
4069     // FIXME: Using a formula based purely on the precision is conservative;
4070     // we can print fewer digits depending on the actual value being printed.
4071 
4072     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4073     FormatPrecision = 2 + semantics->precision * 59 / 196;
4074   }
4075 
4076   // Ignore trailing binary zeros.
4077   int trailingZeros = significand.countTrailingZeros();
4078   exp += trailingZeros;
4079   significand.lshrInPlace(trailingZeros);
4080 
4081   // Change the exponent from 2^e to 10^e.
4082   if (exp == 0) {
4083     // Nothing to do.
4084   } else if (exp > 0) {
4085     // Just shift left.
4086     significand = significand.zext(semantics->precision + exp);
4087     significand <<= exp;
4088     exp = 0;
4089   } else { /* exp < 0 */
4090     int texp = -exp;
4091 
4092     // We transform this using the identity:
4093     //   (N)(2^-e) == (N)(5^e)(10^-e)
4094     // This means we have to multiply N (the significand) by 5^e.
4095     // To avoid overflow, we have to operate on numbers large
4096     // enough to store N * 5^e:
4097     //   log2(N * 5^e) == log2(N) + e * log2(5)
4098     //                 <= semantics->precision + e * 137 / 59
4099     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4100 
4101     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4102 
4103     // Multiply significand by 5^e.
4104     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4105     significand = significand.zext(precision);
4106     APInt five_to_the_i(precision, 5);
4107     while (true) {
4108       if (texp & 1) significand *= five_to_the_i;
4109 
4110       texp >>= 1;
4111       if (!texp) break;
4112       five_to_the_i *= five_to_the_i;
4113     }
4114   }
4115 
4116   AdjustToPrecision(significand, exp, FormatPrecision);
4117 
4118   SmallVector<char, 256> buffer;
4119 
4120   // Fill the buffer.
4121   unsigned precision = significand.getBitWidth();
4122   if (precision < 4) {
4123     // We need enough precision to store the value 10.
4124     precision = 4;
4125     significand = significand.zext(precision);
4126   }
4127   APInt ten(precision, 10);
4128   APInt digit(precision, 0);
4129 
4130   bool inTrail = true;
4131   while (significand != 0) {
4132     // digit <- significand % 10
4133     // significand <- significand / 10
4134     APInt::udivrem(significand, ten, significand, digit);
4135 
4136     unsigned d = digit.getZExtValue();
4137 
4138     // Drop trailing zeros.
4139     if (inTrail && !d) exp++;
4140     else {
4141       buffer.push_back((char) ('0' + d));
4142       inTrail = false;
4143     }
4144   }
4145 
4146   assert(!buffer.empty() && "no characters in buffer!");
4147 
4148   // Drop down to FormatPrecision.
4149   // TODO: don't do more precise calculations above than are required.
4150   AdjustToPrecision(buffer, exp, FormatPrecision);
4151 
4152   unsigned NDigits = buffer.size();
4153 
4154   // Check whether we should use scientific notation.
4155   bool FormatScientific;
4156   if (!FormatMaxPadding)
4157     FormatScientific = true;
4158   else {
4159     if (exp >= 0) {
4160       // 765e3 --> 765000
4161       //              ^^^
4162       // But we shouldn't make the number look more precise than it is.
4163       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4164                           NDigits + (unsigned) exp > FormatPrecision);
4165     } else {
4166       // Power of the most significant digit.
4167       int MSD = exp + (int) (NDigits - 1);
4168       if (MSD >= 0) {
4169         // 765e-2 == 7.65
4170         FormatScientific = false;
4171       } else {
4172         // 765e-5 == 0.00765
4173         //           ^ ^^
4174         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4175       }
4176     }
4177   }
4178 
4179   // Scientific formatting is pretty straightforward.
4180   if (FormatScientific) {
4181     exp += (NDigits - 1);
4182 
4183     Str.push_back(buffer[NDigits-1]);
4184     Str.push_back('.');
4185     if (NDigits == 1 && TruncateZero)
4186       Str.push_back('0');
4187     else
4188       for (unsigned I = 1; I != NDigits; ++I)
4189         Str.push_back(buffer[NDigits-1-I]);
4190     // Fill with zeros up to FormatPrecision.
4191     if (!TruncateZero && FormatPrecision > NDigits - 1)
4192       Str.append(FormatPrecision - NDigits + 1, '0');
4193     // For !TruncateZero we use lower 'e'.
4194     Str.push_back(TruncateZero ? 'E' : 'e');
4195 
4196     Str.push_back(exp >= 0 ? '+' : '-');
4197     if (exp < 0) exp = -exp;
4198     SmallVector<char, 6> expbuf;
4199     do {
4200       expbuf.push_back((char) ('0' + (exp % 10)));
4201       exp /= 10;
4202     } while (exp);
4203     // Exponent always at least two digits if we do not truncate zeros.
4204     if (!TruncateZero && expbuf.size() < 2)
4205       expbuf.push_back('0');
4206     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4207       Str.push_back(expbuf[E-1-I]);
4208     return;
4209   }
4210 
4211   // Non-scientific, positive exponents.
4212   if (exp >= 0) {
4213     for (unsigned I = 0; I != NDigits; ++I)
4214       Str.push_back(buffer[NDigits-1-I]);
4215     for (unsigned I = 0; I != (unsigned) exp; ++I)
4216       Str.push_back('0');
4217     return;
4218   }
4219 
4220   // Non-scientific, negative exponents.
4221 
4222   // The number of digits to the left of the decimal point.
4223   int NWholeDigits = exp + (int) NDigits;
4224 
4225   unsigned I = 0;
4226   if (NWholeDigits > 0) {
4227     for (; I != (unsigned) NWholeDigits; ++I)
4228       Str.push_back(buffer[NDigits-I-1]);
4229     Str.push_back('.');
4230   } else {
4231     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4232 
4233     Str.push_back('0');
4234     Str.push_back('.');
4235     for (unsigned Z = 1; Z != NZeros; ++Z)
4236       Str.push_back('0');
4237   }
4238 
4239   for (; I != NDigits; ++I)
4240     Str.push_back(buffer[NDigits-I-1]);
4241 }
4242 
getExactInverse(APFloat * inv) const4243 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4244   // Special floats and denormals have no exact inverse.
4245   if (!isFiniteNonZero())
4246     return false;
4247 
4248   // Check that the number is a power of two by making sure that only the
4249   // integer bit is set in the significand.
4250   if (significandLSB() != semantics->precision - 1)
4251     return false;
4252 
4253   // Get the inverse.
4254   IEEEFloat reciprocal(*semantics, 1ULL);
4255   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4256     return false;
4257 
4258   // Avoid multiplication with a denormal, it is not safe on all platforms and
4259   // may be slower than a normal division.
4260   if (reciprocal.isDenormal())
4261     return false;
4262 
4263   assert(reciprocal.isFiniteNonZero() &&
4264          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4265 
4266   if (inv)
4267     *inv = APFloat(reciprocal, *semantics);
4268 
4269   return true;
4270 }
4271 
isSignaling() const4272 bool IEEEFloat::isSignaling() const {
4273   if (!isNaN())
4274     return false;
4275   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4276     return false;
4277 
4278   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4279   // first bit of the trailing significand being 0.
4280   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4281 }
4282 
4283 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4284 ///
4285 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4286 /// appropriate sign switching before/after the computation.
next(bool nextDown)4287 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4288   // If we are performing nextDown, swap sign so we have -x.
4289   if (nextDown)
4290     changeSign();
4291 
4292   // Compute nextUp(x)
4293   opStatus result = opOK;
4294 
4295   // Handle each float category separately.
4296   switch (category) {
4297   case fcInfinity:
4298     // nextUp(+inf) = +inf
4299     if (!isNegative())
4300       break;
4301     // nextUp(-inf) = -getLargest()
4302     makeLargest(true);
4303     break;
4304   case fcNaN:
4305     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4306     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4307     //                     change the payload.
4308     if (isSignaling()) {
4309       result = opInvalidOp;
4310       // For consistency, propagate the sign of the sNaN to the qNaN.
4311       makeNaN(false, isNegative(), nullptr);
4312     }
4313     break;
4314   case fcZero:
4315     // nextUp(pm 0) = +getSmallest()
4316     makeSmallest(false);
4317     break;
4318   case fcNormal:
4319     // nextUp(-getSmallest()) = -0
4320     if (isSmallest() && isNegative()) {
4321       APInt::tcSet(significandParts(), 0, partCount());
4322       category = fcZero;
4323       exponent = 0;
4324       break;
4325     }
4326 
4327     if (isLargest() && !isNegative()) {
4328       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4329         // nextUp(getLargest()) == NAN
4330         makeNaN();
4331         break;
4332       } else {
4333         // nextUp(getLargest()) == INFINITY
4334         APInt::tcSet(significandParts(), 0, partCount());
4335         category = fcInfinity;
4336         exponent = semantics->maxExponent + 1;
4337         break;
4338       }
4339     }
4340 
4341     // nextUp(normal) == normal + inc.
4342     if (isNegative()) {
4343       // If we are negative, we need to decrement the significand.
4344 
4345       // We only cross a binade boundary that requires adjusting the exponent
4346       // if:
4347       //   1. exponent != semantics->minExponent. This implies we are not in the
4348       //   smallest binade or are dealing with denormals.
4349       //   2. Our significand excluding the integral bit is all zeros.
4350       bool WillCrossBinadeBoundary =
4351         exponent != semantics->minExponent && isSignificandAllZeros();
4352 
4353       // Decrement the significand.
4354       //
4355       // We always do this since:
4356       //   1. If we are dealing with a non-binade decrement, by definition we
4357       //   just decrement the significand.
4358       //   2. If we are dealing with a normal -> normal binade decrement, since
4359       //   we have an explicit integral bit the fact that all bits but the
4360       //   integral bit are zero implies that subtracting one will yield a
4361       //   significand with 0 integral bit and 1 in all other spots. Thus we
4362       //   must just adjust the exponent and set the integral bit to 1.
4363       //   3. If we are dealing with a normal -> denormal binade decrement,
4364       //   since we set the integral bit to 0 when we represent denormals, we
4365       //   just decrement the significand.
4366       integerPart *Parts = significandParts();
4367       APInt::tcDecrement(Parts, partCount());
4368 
4369       if (WillCrossBinadeBoundary) {
4370         // Our result is a normal number. Do the following:
4371         // 1. Set the integral bit to 1.
4372         // 2. Decrement the exponent.
4373         APInt::tcSetBit(Parts, semantics->precision - 1);
4374         exponent--;
4375       }
4376     } else {
4377       // If we are positive, we need to increment the significand.
4378 
4379       // We only cross a binade boundary that requires adjusting the exponent if
4380       // the input is not a denormal and all of said input's significand bits
4381       // are set. If all of said conditions are true: clear the significand, set
4382       // the integral bit to 1, and increment the exponent. If we have a
4383       // denormal always increment since moving denormals and the numbers in the
4384       // smallest normal binade have the same exponent in our representation.
4385       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4386 
4387       if (WillCrossBinadeBoundary) {
4388         integerPart *Parts = significandParts();
4389         APInt::tcSet(Parts, 0, partCount());
4390         APInt::tcSetBit(Parts, semantics->precision - 1);
4391         assert(exponent != semantics->maxExponent &&
4392                "We can not increment an exponent beyond the maxExponent allowed"
4393                " by the given floating point semantics.");
4394         exponent++;
4395       } else {
4396         incrementSignificand();
4397       }
4398     }
4399     break;
4400   }
4401 
4402   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4403   if (nextDown)
4404     changeSign();
4405 
4406   return result;
4407 }
4408 
exponentNaN() const4409 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4410   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4411     return semantics->maxExponent;
4412   return semantics->maxExponent + 1;
4413 }
4414 
exponentInf() const4415 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4416   return semantics->maxExponent + 1;
4417 }
4418 
exponentZero() const4419 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4420   return semantics->minExponent - 1;
4421 }
4422 
makeInf(bool Negative)4423 void IEEEFloat::makeInf(bool Negative) {
4424   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4425     // There is no Inf, so make NaN instead.
4426     makeNaN(false, Negative);
4427     return;
4428   }
4429   category = fcInfinity;
4430   sign = Negative;
4431   exponent = exponentInf();
4432   APInt::tcSet(significandParts(), 0, partCount());
4433 }
4434 
makeZero(bool Negative)4435 void IEEEFloat::makeZero(bool Negative) {
4436   category = fcZero;
4437   sign = Negative;
4438   exponent = exponentZero();
4439   APInt::tcSet(significandParts(), 0, partCount());
4440 }
4441 
makeQuiet()4442 void IEEEFloat::makeQuiet() {
4443   assert(isNaN());
4444   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4445     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4446 }
4447 
ilogb(const IEEEFloat & Arg)4448 int ilogb(const IEEEFloat &Arg) {
4449   if (Arg.isNaN())
4450     return IEEEFloat::IEK_NaN;
4451   if (Arg.isZero())
4452     return IEEEFloat::IEK_Zero;
4453   if (Arg.isInfinity())
4454     return IEEEFloat::IEK_Inf;
4455   if (!Arg.isDenormal())
4456     return Arg.exponent;
4457 
4458   IEEEFloat Normalized(Arg);
4459   int SignificandBits = Arg.getSemantics().precision - 1;
4460 
4461   Normalized.exponent += SignificandBits;
4462   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4463   return Normalized.exponent - SignificandBits;
4464 }
4465 
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)4466 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4467   auto MaxExp = X.getSemantics().maxExponent;
4468   auto MinExp = X.getSemantics().minExponent;
4469 
4470   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4471   // overflow; clamp it to a safe range before adding, but ensure that the range
4472   // is large enough that the clamp does not change the result. The range we
4473   // need to support is the difference between the largest possible exponent and
4474   // the normalized exponent of half the smallest denormal.
4475 
4476   int SignificandBits = X.getSemantics().precision - 1;
4477   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4478 
4479   // Clamp to one past the range ends to let normalize handle overlflow.
4480   X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
4481   X.normalize(RoundingMode, lfExactlyZero);
4482   if (X.isNaN())
4483     X.makeQuiet();
4484   return X;
4485 }
4486 
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)4487 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4488   Exp = ilogb(Val);
4489 
4490   // Quiet signalling nans.
4491   if (Exp == IEEEFloat::IEK_NaN) {
4492     IEEEFloat Quiet(Val);
4493     Quiet.makeQuiet();
4494     return Quiet;
4495   }
4496 
4497   if (Exp == IEEEFloat::IEK_Inf)
4498     return Val;
4499 
4500   // 1 is added because frexp is defined to return a normalized fraction in
4501   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4502   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4503   return scalbn(Val, -Exp, RM);
4504 }
4505 
DoubleAPFloat(const fltSemantics & S)4506 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4507     : Semantics(&S),
4508       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4509   assert(Semantics == &semPPCDoubleDouble);
4510 }
4511 
DoubleAPFloat(const fltSemantics & S,uninitializedTag)4512 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4513     : Semantics(&S),
4514       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4515                             APFloat(semIEEEdouble, uninitialized)}) {
4516   assert(Semantics == &semPPCDoubleDouble);
4517 }
4518 
DoubleAPFloat(const fltSemantics & S,integerPart I)4519 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4520     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4521                                            APFloat(semIEEEdouble)}) {
4522   assert(Semantics == &semPPCDoubleDouble);
4523 }
4524 
DoubleAPFloat(const fltSemantics & S,const APInt & I)4525 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4526     : Semantics(&S),
4527       Floats(new APFloat[2]{
4528           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4529           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4530   assert(Semantics == &semPPCDoubleDouble);
4531 }
4532 
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)4533 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4534                              APFloat &&Second)
4535     : Semantics(&S),
4536       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4537   assert(Semantics == &semPPCDoubleDouble);
4538   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4539   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4540 }
4541 
DoubleAPFloat(const DoubleAPFloat & RHS)4542 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4543     : Semantics(RHS.Semantics),
4544       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4545                                          APFloat(RHS.Floats[1])}
4546                         : nullptr) {
4547   assert(Semantics == &semPPCDoubleDouble);
4548 }
4549 
DoubleAPFloat(DoubleAPFloat && RHS)4550 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4551     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4552   RHS.Semantics = &semBogus;
4553   assert(Semantics == &semPPCDoubleDouble);
4554 }
4555 
operator =(const DoubleAPFloat & RHS)4556 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4557   if (Semantics == RHS.Semantics && RHS.Floats) {
4558     Floats[0] = RHS.Floats[0];
4559     Floats[1] = RHS.Floats[1];
4560   } else if (this != &RHS) {
4561     this->~DoubleAPFloat();
4562     new (this) DoubleAPFloat(RHS);
4563   }
4564   return *this;
4565 }
4566 
4567 // Implement addition, subtraction, multiplication and division based on:
4568 // "Software for Doubled-Precision Floating-Point Computations",
4569 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)4570 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4571                                          const APFloat &c, const APFloat &cc,
4572                                          roundingMode RM) {
4573   int Status = opOK;
4574   APFloat z = a;
4575   Status |= z.add(c, RM);
4576   if (!z.isFinite()) {
4577     if (!z.isInfinity()) {
4578       Floats[0] = std::move(z);
4579       Floats[1].makeZero(/* Neg = */ false);
4580       return (opStatus)Status;
4581     }
4582     Status = opOK;
4583     auto AComparedToC = a.compareAbsoluteValue(c);
4584     z = cc;
4585     Status |= z.add(aa, RM);
4586     if (AComparedToC == APFloat::cmpGreaterThan) {
4587       // z = cc + aa + c + a;
4588       Status |= z.add(c, RM);
4589       Status |= z.add(a, RM);
4590     } else {
4591       // z = cc + aa + a + c;
4592       Status |= z.add(a, RM);
4593       Status |= z.add(c, RM);
4594     }
4595     if (!z.isFinite()) {
4596       Floats[0] = std::move(z);
4597       Floats[1].makeZero(/* Neg = */ false);
4598       return (opStatus)Status;
4599     }
4600     Floats[0] = z;
4601     APFloat zz = aa;
4602     Status |= zz.add(cc, RM);
4603     if (AComparedToC == APFloat::cmpGreaterThan) {
4604       // Floats[1] = a - z + c + zz;
4605       Floats[1] = a;
4606       Status |= Floats[1].subtract(z, RM);
4607       Status |= Floats[1].add(c, RM);
4608       Status |= Floats[1].add(zz, RM);
4609     } else {
4610       // Floats[1] = c - z + a + zz;
4611       Floats[1] = c;
4612       Status |= Floats[1].subtract(z, RM);
4613       Status |= Floats[1].add(a, RM);
4614       Status |= Floats[1].add(zz, RM);
4615     }
4616   } else {
4617     // q = a - z;
4618     APFloat q = a;
4619     Status |= q.subtract(z, RM);
4620 
4621     // zz = q + c + (a - (q + z)) + aa + cc;
4622     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4623     auto zz = q;
4624     Status |= zz.add(c, RM);
4625     Status |= q.add(z, RM);
4626     Status |= q.subtract(a, RM);
4627     q.changeSign();
4628     Status |= zz.add(q, RM);
4629     Status |= zz.add(aa, RM);
4630     Status |= zz.add(cc, RM);
4631     if (zz.isZero() && !zz.isNegative()) {
4632       Floats[0] = std::move(z);
4633       Floats[1].makeZero(/* Neg = */ false);
4634       return opOK;
4635     }
4636     Floats[0] = z;
4637     Status |= Floats[0].add(zz, RM);
4638     if (!Floats[0].isFinite()) {
4639       Floats[1].makeZero(/* Neg = */ false);
4640       return (opStatus)Status;
4641     }
4642     Floats[1] = std::move(z);
4643     Status |= Floats[1].subtract(Floats[0], RM);
4644     Status |= Floats[1].add(zz, RM);
4645   }
4646   return (opStatus)Status;
4647 }
4648 
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4649 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4650                                                 const DoubleAPFloat &RHS,
4651                                                 DoubleAPFloat &Out,
4652                                                 roundingMode RM) {
4653   if (LHS.getCategory() == fcNaN) {
4654     Out = LHS;
4655     return opOK;
4656   }
4657   if (RHS.getCategory() == fcNaN) {
4658     Out = RHS;
4659     return opOK;
4660   }
4661   if (LHS.getCategory() == fcZero) {
4662     Out = RHS;
4663     return opOK;
4664   }
4665   if (RHS.getCategory() == fcZero) {
4666     Out = LHS;
4667     return opOK;
4668   }
4669   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4670       LHS.isNegative() != RHS.isNegative()) {
4671     Out.makeNaN(false, Out.isNegative(), nullptr);
4672     return opInvalidOp;
4673   }
4674   if (LHS.getCategory() == fcInfinity) {
4675     Out = LHS;
4676     return opOK;
4677   }
4678   if (RHS.getCategory() == fcInfinity) {
4679     Out = RHS;
4680     return opOK;
4681   }
4682   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4683 
4684   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4685       CC(RHS.Floats[1]);
4686   assert(&A.getSemantics() == &semIEEEdouble);
4687   assert(&AA.getSemantics() == &semIEEEdouble);
4688   assert(&C.getSemantics() == &semIEEEdouble);
4689   assert(&CC.getSemantics() == &semIEEEdouble);
4690   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4691   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4692   return Out.addImpl(A, AA, C, CC, RM);
4693 }
4694 
add(const DoubleAPFloat & RHS,roundingMode RM)4695 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4696                                      roundingMode RM) {
4697   return addWithSpecial(*this, RHS, *this, RM);
4698 }
4699 
subtract(const DoubleAPFloat & RHS,roundingMode RM)4700 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4701                                           roundingMode RM) {
4702   changeSign();
4703   auto Ret = add(RHS, RM);
4704   changeSign();
4705   return Ret;
4706 }
4707 
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4708 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4709                                           APFloat::roundingMode RM) {
4710   const auto &LHS = *this;
4711   auto &Out = *this;
4712   /* Interesting observation: For special categories, finding the lowest
4713      common ancestor of the following layered graph gives the correct
4714      return category:
4715 
4716         NaN
4717        /   \
4718      Zero  Inf
4719        \   /
4720        Normal
4721 
4722      e.g. NaN * NaN = NaN
4723           Zero * Inf = NaN
4724           Normal * Zero = Zero
4725           Normal * Inf = Inf
4726   */
4727   if (LHS.getCategory() == fcNaN) {
4728     Out = LHS;
4729     return opOK;
4730   }
4731   if (RHS.getCategory() == fcNaN) {
4732     Out = RHS;
4733     return opOK;
4734   }
4735   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4736       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4737     Out.makeNaN(false, false, nullptr);
4738     return opOK;
4739   }
4740   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4741     Out = LHS;
4742     return opOK;
4743   }
4744   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4745     Out = RHS;
4746     return opOK;
4747   }
4748   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4749          "Special cases not handled exhaustively");
4750 
4751   int Status = opOK;
4752   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4753   // t = a * c
4754   APFloat T = A;
4755   Status |= T.multiply(C, RM);
4756   if (!T.isFiniteNonZero()) {
4757     Floats[0] = T;
4758     Floats[1].makeZero(/* Neg = */ false);
4759     return (opStatus)Status;
4760   }
4761 
4762   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4763   APFloat Tau = A;
4764   T.changeSign();
4765   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4766   T.changeSign();
4767   {
4768     // v = a * d
4769     APFloat V = A;
4770     Status |= V.multiply(D, RM);
4771     // w = b * c
4772     APFloat W = B;
4773     Status |= W.multiply(C, RM);
4774     Status |= V.add(W, RM);
4775     // tau += v + w
4776     Status |= Tau.add(V, RM);
4777   }
4778   // u = t + tau
4779   APFloat U = T;
4780   Status |= U.add(Tau, RM);
4781 
4782   Floats[0] = U;
4783   if (!U.isFinite()) {
4784     Floats[1].makeZero(/* Neg = */ false);
4785   } else {
4786     // Floats[1] = (t - u) + tau
4787     Status |= T.subtract(U, RM);
4788     Status |= T.add(Tau, RM);
4789     Floats[1] = T;
4790   }
4791   return (opStatus)Status;
4792 }
4793 
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4794 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4795                                         APFloat::roundingMode RM) {
4796   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4797   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4798   auto Ret =
4799       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4800   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4801   return Ret;
4802 }
4803 
remainder(const DoubleAPFloat & RHS)4804 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4805   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4806   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4807   auto Ret =
4808       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4809   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4810   return Ret;
4811 }
4812 
mod(const DoubleAPFloat & RHS)4813 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4814   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4815   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4816   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4817   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4818   return Ret;
4819 }
4820 
4821 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4822 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4823                                 const DoubleAPFloat &Addend,
4824                                 APFloat::roundingMode RM) {
4825   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4826   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4827   auto Ret = Tmp.fusedMultiplyAdd(
4828       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4829       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4830   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4831   return Ret;
4832 }
4833 
roundToIntegral(APFloat::roundingMode RM)4834 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4835   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4836   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4837   auto Ret = Tmp.roundToIntegral(RM);
4838   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4839   return Ret;
4840 }
4841 
changeSign()4842 void DoubleAPFloat::changeSign() {
4843   Floats[0].changeSign();
4844   Floats[1].changeSign();
4845 }
4846 
4847 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4848 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4849   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4850   if (Result != cmpEqual)
4851     return Result;
4852   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4853   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4854     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4855     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4856     if (Against && !RHSAgainst)
4857       return cmpLessThan;
4858     if (!Against && RHSAgainst)
4859       return cmpGreaterThan;
4860     if (!Against && !RHSAgainst)
4861       return Result;
4862     if (Against && RHSAgainst)
4863       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4864   }
4865   return Result;
4866 }
4867 
getCategory() const4868 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4869   return Floats[0].getCategory();
4870 }
4871 
isNegative() const4872 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4873 
makeInf(bool Neg)4874 void DoubleAPFloat::makeInf(bool Neg) {
4875   Floats[0].makeInf(Neg);
4876   Floats[1].makeZero(/* Neg = */ false);
4877 }
4878 
makeZero(bool Neg)4879 void DoubleAPFloat::makeZero(bool Neg) {
4880   Floats[0].makeZero(Neg);
4881   Floats[1].makeZero(/* Neg = */ false);
4882 }
4883 
makeLargest(bool Neg)4884 void DoubleAPFloat::makeLargest(bool Neg) {
4885   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4886   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4887   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4888   if (Neg)
4889     changeSign();
4890 }
4891 
makeSmallest(bool Neg)4892 void DoubleAPFloat::makeSmallest(bool Neg) {
4893   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4894   Floats[0].makeSmallest(Neg);
4895   Floats[1].makeZero(/* Neg = */ false);
4896 }
4897 
makeSmallestNormalized(bool Neg)4898 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4899   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4900   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4901   if (Neg)
4902     Floats[0].changeSign();
4903   Floats[1].makeZero(/* Neg = */ false);
4904 }
4905 
makeNaN(bool SNaN,bool Neg,const APInt * fill)4906 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4907   Floats[0].makeNaN(SNaN, Neg, fill);
4908   Floats[1].makeZero(/* Neg = */ false);
4909 }
4910 
compare(const DoubleAPFloat & RHS) const4911 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4912   auto Result = Floats[0].compare(RHS.Floats[0]);
4913   // |Float[0]| > |Float[1]|
4914   if (Result == APFloat::cmpEqual)
4915     return Floats[1].compare(RHS.Floats[1]);
4916   return Result;
4917 }
4918 
bitwiseIsEqual(const DoubleAPFloat & RHS) const4919 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4920   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4921          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4922 }
4923 
hash_value(const DoubleAPFloat & Arg)4924 hash_code hash_value(const DoubleAPFloat &Arg) {
4925   if (Arg.Floats)
4926     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4927   return hash_combine(Arg.Semantics);
4928 }
4929 
bitcastToAPInt() const4930 APInt DoubleAPFloat::bitcastToAPInt() const {
4931   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4932   uint64_t Data[] = {
4933       Floats[0].bitcastToAPInt().getRawData()[0],
4934       Floats[1].bitcastToAPInt().getRawData()[0],
4935   };
4936   return APInt(128, 2, Data);
4937 }
4938 
convertFromString(StringRef S,roundingMode RM)4939 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4940                                                              roundingMode RM) {
4941   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942   APFloat Tmp(semPPCDoubleDoubleLegacy);
4943   auto Ret = Tmp.convertFromString(S, RM);
4944   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4945   return Ret;
4946 }
4947 
next(bool nextDown)4948 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4949   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4950   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4951   auto Ret = Tmp.next(nextDown);
4952   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4953   return Ret;
4954 }
4955 
4956 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4957 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4958                                 unsigned int Width, bool IsSigned,
4959                                 roundingMode RM, bool *IsExact) const {
4960   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4961   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4962       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4963 }
4964 
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4965 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4966                                                   bool IsSigned,
4967                                                   roundingMode RM) {
4968   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4969   APFloat Tmp(semPPCDoubleDoubleLegacy);
4970   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4971   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4972   return Ret;
4973 }
4974 
4975 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4976 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4977                                               unsigned int InputSize,
4978                                               bool IsSigned, roundingMode RM) {
4979   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4980   APFloat Tmp(semPPCDoubleDoubleLegacy);
4981   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4982   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4983   return Ret;
4984 }
4985 
4986 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4987 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4988                                               unsigned int InputSize,
4989                                               bool IsSigned, roundingMode RM) {
4990   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4991   APFloat Tmp(semPPCDoubleDoubleLegacy);
4992   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4993   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4994   return Ret;
4995 }
4996 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4997 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4998                                                unsigned int HexDigits,
4999                                                bool UpperCase,
5000                                                roundingMode RM) const {
5001   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5002   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5003       .convertToHexString(DST, HexDigits, UpperCase, RM);
5004 }
5005 
isDenormal() const5006 bool DoubleAPFloat::isDenormal() const {
5007   return getCategory() == fcNormal &&
5008          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5009           // (double)(Hi + Lo) == Hi defines a normal number.
5010           Floats[0] != Floats[0] + Floats[1]);
5011 }
5012 
isSmallest() const5013 bool DoubleAPFloat::isSmallest() const {
5014   if (getCategory() != fcNormal)
5015     return false;
5016   DoubleAPFloat Tmp(*this);
5017   Tmp.makeSmallest(this->isNegative());
5018   return Tmp.compare(*this) == cmpEqual;
5019 }
5020 
isSmallestNormalized() const5021 bool DoubleAPFloat::isSmallestNormalized() const {
5022   if (getCategory() != fcNormal)
5023     return false;
5024 
5025   DoubleAPFloat Tmp(*this);
5026   Tmp.makeSmallestNormalized(this->isNegative());
5027   return Tmp.compare(*this) == cmpEqual;
5028 }
5029 
isLargest() const5030 bool DoubleAPFloat::isLargest() const {
5031   if (getCategory() != fcNormal)
5032     return false;
5033   DoubleAPFloat Tmp(*this);
5034   Tmp.makeLargest(this->isNegative());
5035   return Tmp.compare(*this) == cmpEqual;
5036 }
5037 
isInteger() const5038 bool DoubleAPFloat::isInteger() const {
5039   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5040   return Floats[0].isInteger() && Floats[1].isInteger();
5041 }
5042 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const5043 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5044                              unsigned FormatPrecision,
5045                              unsigned FormatMaxPadding,
5046                              bool TruncateZero) const {
5047   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5048   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5049       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5050 }
5051 
getExactInverse(APFloat * inv) const5052 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5053   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5054   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5055   if (!inv)
5056     return Tmp.getExactInverse(nullptr);
5057   APFloat Inv(semPPCDoubleDoubleLegacy);
5058   auto Ret = Tmp.getExactInverse(&Inv);
5059   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5060   return Ret;
5061 }
5062 
scalbn(const DoubleAPFloat & Arg,int Exp,APFloat::roundingMode RM)5063 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5064                      APFloat::roundingMode RM) {
5065   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5066   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5067                        scalbn(Arg.Floats[1], Exp, RM));
5068 }
5069 
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)5070 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5071                     APFloat::roundingMode RM) {
5072   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5073   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5074   APFloat Second = Arg.Floats[1];
5075   if (Arg.getCategory() == APFloat::fcNormal)
5076     Second = scalbn(Second, -Exp, RM);
5077   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5078 }
5079 
5080 } // namespace detail
5081 
Storage(IEEEFloat F,const fltSemantics & Semantics)5082 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5083   if (usesLayout<IEEEFloat>(Semantics)) {
5084     new (&IEEE) IEEEFloat(std::move(F));
5085     return;
5086   }
5087   if (usesLayout<DoubleAPFloat>(Semantics)) {
5088     const fltSemantics& S = F.getSemantics();
5089     new (&Double)
5090         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5091                       APFloat(semIEEEdouble));
5092     return;
5093   }
5094   llvm_unreachable("Unexpected semantics");
5095 }
5096 
convertFromString(StringRef Str,roundingMode RM)5097 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5098                                                        roundingMode RM) {
5099   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5100 }
5101 
hash_value(const APFloat & Arg)5102 hash_code hash_value(const APFloat &Arg) {
5103   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5104     return hash_value(Arg.U.IEEE);
5105   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5106     return hash_value(Arg.U.Double);
5107   llvm_unreachable("Unexpected semantics");
5108 }
5109 
APFloat(const fltSemantics & Semantics,StringRef S)5110 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5111     : APFloat(Semantics) {
5112   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5113   assert(StatusOrErr && "Invalid floating point representation");
5114   consumeError(StatusOrErr.takeError());
5115 }
5116 
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)5117 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5118                                    roundingMode RM, bool *losesInfo) {
5119   if (&getSemantics() == &ToSemantics) {
5120     *losesInfo = false;
5121     return opOK;
5122   }
5123   if (usesLayout<IEEEFloat>(getSemantics()) &&
5124       usesLayout<IEEEFloat>(ToSemantics))
5125     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5126   if (usesLayout<IEEEFloat>(getSemantics()) &&
5127       usesLayout<DoubleAPFloat>(ToSemantics)) {
5128     assert(&ToSemantics == &semPPCDoubleDouble);
5129     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5130     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5131     return Ret;
5132   }
5133   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5134       usesLayout<IEEEFloat>(ToSemantics)) {
5135     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5136     *this = APFloat(std::move(getIEEE()), ToSemantics);
5137     return Ret;
5138   }
5139   llvm_unreachable("Unexpected semantics");
5140 }
5141 
getAllOnesValue(const fltSemantics & Semantics)5142 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5143   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5144 }
5145 
print(raw_ostream & OS) const5146 void APFloat::print(raw_ostream &OS) const {
5147   SmallVector<char, 16> Buffer;
5148   toString(Buffer);
5149   OS << Buffer << "\n";
5150 }
5151 
5152 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const5153 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5154 #endif
5155 
Profile(FoldingSetNodeID & NID) const5156 void APFloat::Profile(FoldingSetNodeID &NID) const {
5157   NID.Add(bitcastToAPInt());
5158 }
5159 
5160 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5161    an APSInt, whose initial bit-width and signed-ness are used to determine the
5162    precision of the conversion.
5163  */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const5164 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5165                                             roundingMode rounding_mode,
5166                                             bool *isExact) const {
5167   unsigned bitWidth = result.getBitWidth();
5168   SmallVector<uint64_t, 4> parts(result.getNumWords());
5169   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5170                                      rounding_mode, isExact);
5171   // Keeps the original signed-ness.
5172   result = APInt(bitWidth, parts);
5173   return status;
5174 }
5175 
convertToDouble() const5176 double APFloat::convertToDouble() const {
5177   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5178     return getIEEE().convertToDouble();
5179   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5180          "Float semantics is not representable by IEEEdouble");
5181   APFloat Temp = *this;
5182   bool LosesInfo;
5183   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5184   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5185   (void)St;
5186   return Temp.getIEEE().convertToDouble();
5187 }
5188 
convertToFloat() const5189 float APFloat::convertToFloat() const {
5190   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5191     return getIEEE().convertToFloat();
5192   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5193          "Float semantics is not representable by IEEEsingle");
5194   APFloat Temp = *this;
5195   bool LosesInfo;
5196   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5197   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5198   (void)St;
5199   return Temp.getIEEE().convertToFloat();
5200 }
5201 
5202 } // namespace llvm
5203 
5204 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5205