1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28 
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
30   do {                                                                         \
31     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
32       return U.IEEE.METHOD_CALL;                                               \
33     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
34       return U.Double.METHOD_CALL;                                             \
35     llvm_unreachable("Unexpected semantics");                                  \
36   } while (false)
37 
38 using namespace llvm;
39 
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47 
48 /* Assumed in hexadecimal significand parsing, and conversion to
49    hexadecimal strings.  */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51 
52 namespace llvm {
53   /* Represents floating point arithmetic semantics.  */
54   struct fltSemantics {
55     /* The largest E such that 2^E is representable; this matches the
56        definition of IEEE 754.  */
57     APFloatBase::ExponentType maxExponent;
58 
59     /* The smallest E such that 2^E is a normalized number; this
60        matches the definition of IEEE 754.  */
61     APFloatBase::ExponentType minExponent;
62 
63     /* Number of bits in the significand.  This includes the integer
64        bit.  */
65     unsigned int precision;
66 
67     /* Number of bits actually used in the semantics. */
68     unsigned int sizeInBits;
69   };
70 
71   static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72   static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73   static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74   static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75   static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76   static const fltSemantics semBogus = {0, 0, 0, 0};
77 
78   /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79      64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80      (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81      Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82      to each other, and two 11-bit exponents.
83 
84      Note: we need to make the value different from semBogus as otherwise
85      an unsafe optimization may collapse both values to a single address,
86      and we heavily rely on them having distinct addresses.             */
87   static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
88 
89   /* These are legacy semantics for the fallback, inaccrurate implementation of
90      IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91      operation. It's equivalent to having an IEEE number with consecutive 106
92      bits of mantissa and 11 bits of exponent.
93 
94      It's not equivalent to IBM double-double. For example, a legit IBM
95      double-double, 1 + epsilon:
96 
97        1 + epsilon = 1 + (1 >> 1076)
98 
99      is not representable by a consecutive 106 bits of mantissa.
100 
101      Currently, these semantics are used in the following way:
102 
103        semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104        (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105        semPPCDoubleDoubleLegacy -> IEEE operations
106 
107      We use bitcastToAPInt() to get the bit representation (in APInt) of the
108      underlying IEEEdouble, then use the APInt constructor to construct the
109      legacy IEEE float.
110 
111      TODO: Implement all operations in semPPCDoubleDouble, and delete these
112      semantics.  */
113   static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
114                                                         53 + 53, 128};
115 
EnumToSemantics(Semantics S)116   const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
117     switch (S) {
118     case S_IEEEhalf:
119       return IEEEhalf();
120     case S_IEEEsingle:
121       return IEEEsingle();
122     case S_IEEEdouble:
123       return IEEEdouble();
124     case S_x87DoubleExtended:
125       return x87DoubleExtended();
126     case S_IEEEquad:
127       return IEEEquad();
128     case S_PPCDoubleDouble:
129       return PPCDoubleDouble();
130     }
131     llvm_unreachable("Unrecognised floating semantics");
132   }
133 
134   APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)135   APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
136     if (&Sem == &llvm::APFloat::IEEEhalf())
137       return S_IEEEhalf;
138     else if (&Sem == &llvm::APFloat::IEEEsingle())
139       return S_IEEEsingle;
140     else if (&Sem == &llvm::APFloat::IEEEdouble())
141       return S_IEEEdouble;
142     else if (&Sem == &llvm::APFloat::x87DoubleExtended())
143       return S_x87DoubleExtended;
144     else if (&Sem == &llvm::APFloat::IEEEquad())
145       return S_IEEEquad;
146     else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
147       return S_PPCDoubleDouble;
148     else
149       llvm_unreachable("Unknown floating semantics");
150   }
151 
IEEEhalf()152   const fltSemantics &APFloatBase::IEEEhalf() {
153     return semIEEEhalf;
154   }
IEEEsingle()155   const fltSemantics &APFloatBase::IEEEsingle() {
156     return semIEEEsingle;
157   }
IEEEdouble()158   const fltSemantics &APFloatBase::IEEEdouble() {
159     return semIEEEdouble;
160   }
IEEEquad()161   const fltSemantics &APFloatBase::IEEEquad() {
162     return semIEEEquad;
163   }
x87DoubleExtended()164   const fltSemantics &APFloatBase::x87DoubleExtended() {
165     return semX87DoubleExtended;
166   }
Bogus()167   const fltSemantics &APFloatBase::Bogus() {
168     return semBogus;
169   }
PPCDoubleDouble()170   const fltSemantics &APFloatBase::PPCDoubleDouble() {
171     return semPPCDoubleDouble;
172   }
173 
174   /* A tight upper bound on number of parts required to hold the value
175      pow(5, power) is
176 
177        power * 815 / (351 * integerPartWidth) + 1
178 
179      However, whilst the result may require only this many parts,
180      because we are multiplying two values to get it, the
181      multiplication may require an extra part with the excess part
182      being zero (consider the trivial case of 1 * 1, tcFullMultiply
183      requires two parts to hold the single-part result).  So we add an
184      extra one to guarantee enough space whilst multiplying.  */
185   const unsigned int maxExponent = 16383;
186   const unsigned int maxPrecision = 113;
187   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
188   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
189 
semanticsPrecision(const fltSemantics & semantics)190   unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
191     return semantics.precision;
192   }
193   APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)194   APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
195     return semantics.maxExponent;
196   }
197   APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)198   APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
199     return semantics.minExponent;
200   }
semanticsSizeInBits(const fltSemantics & semantics)201   unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
202     return semantics.sizeInBits;
203   }
204 
getSizeInBits(const fltSemantics & Sem)205   unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
206     return Sem.sizeInBits;
207 }
208 
209 /* A bunch of private, handy routines.  */
210 
211 static inline unsigned int
partCountForBits(unsigned int bits)212 partCountForBits(unsigned int bits)
213 {
214   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
215 }
216 
217 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
218 static inline unsigned int
decDigitValue(unsigned int c)219 decDigitValue(unsigned int c)
220 {
221   return c - '0';
222 }
223 
224 /* Return the value of a decimal exponent of the form
225    [+-]ddddddd.
226 
227    If the exponent overflows, returns a large exponent with the
228    appropriate sign.  */
229 static int
readExponent(StringRef::iterator begin,StringRef::iterator end)230 readExponent(StringRef::iterator begin, StringRef::iterator end)
231 {
232   bool isNegative;
233   unsigned int absExponent;
234   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
235   StringRef::iterator p = begin;
236 
237   // Treat no exponent as 0 to match binutils
238   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
239     return 0;
240   }
241 
242   isNegative = (*p == '-');
243   if (*p == '-' || *p == '+') {
244     p++;
245     assert(p != end && "Exponent has no digits");
246   }
247 
248   absExponent = decDigitValue(*p++);
249   assert(absExponent < 10U && "Invalid character in exponent");
250 
251   for (; p != end; ++p) {
252     unsigned int value;
253 
254     value = decDigitValue(*p);
255     assert(value < 10U && "Invalid character in exponent");
256 
257     value += absExponent * 10;
258     if (absExponent >= overlargeExponent) {
259       absExponent = overlargeExponent;
260       p = end;  /* outwit assert below */
261       break;
262     }
263     absExponent = value;
264   }
265 
266   assert(p == end && "Invalid exponent in exponent");
267 
268   if (isNegative)
269     return -(int) absExponent;
270   else
271     return (int) absExponent;
272 }
273 
274 /* This is ugly and needs cleaning up, but I don't immediately see
275    how whilst remaining safe.  */
276 static int
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)277 totalExponent(StringRef::iterator p, StringRef::iterator end,
278               int exponentAdjustment)
279 {
280   int unsignedExponent;
281   bool negative, overflow;
282   int exponent = 0;
283 
284   assert(p != end && "Exponent has no digits");
285 
286   negative = *p == '-';
287   if (*p == '-' || *p == '+') {
288     p++;
289     assert(p != end && "Exponent has no digits");
290   }
291 
292   unsignedExponent = 0;
293   overflow = false;
294   for (; p != end; ++p) {
295     unsigned int value;
296 
297     value = decDigitValue(*p);
298     assert(value < 10U && "Invalid character in exponent");
299 
300     unsignedExponent = unsignedExponent * 10 + value;
301     if (unsignedExponent > 32767) {
302       overflow = true;
303       break;
304     }
305   }
306 
307   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
308     overflow = true;
309 
310   if (!overflow) {
311     exponent = unsignedExponent;
312     if (negative)
313       exponent = -exponent;
314     exponent += exponentAdjustment;
315     if (exponent > 32767 || exponent < -32768)
316       overflow = true;
317   }
318 
319   if (overflow)
320     exponent = negative ? -32768: 32767;
321 
322   return exponent;
323 }
324 
325 static StringRef::iterator
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)326 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
327                            StringRef::iterator *dot)
328 {
329   StringRef::iterator p = begin;
330   *dot = end;
331   while (p != end && *p == '0')
332     p++;
333 
334   if (p != end && *p == '.') {
335     *dot = p++;
336 
337     assert(end - begin != 1 && "Significand has no digits");
338 
339     while (p != end && *p == '0')
340       p++;
341   }
342 
343   return p;
344 }
345 
346 /* Given a normal decimal floating point number of the form
347 
348      dddd.dddd[eE][+-]ddd
349 
350    where the decimal point and exponent are optional, fill out the
351    structure D.  Exponent is appropriate if the significand is
352    treated as an integer, and normalizedExponent if the significand
353    is taken to have the decimal point after a single leading
354    non-zero digit.
355 
356    If the value is zero, V->firstSigDigit points to a non-digit, and
357    the return exponent is zero.
358 */
359 struct decimalInfo {
360   const char *firstSigDigit;
361   const char *lastSigDigit;
362   int exponent;
363   int normalizedExponent;
364 };
365 
366 static void
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)367 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
368                  decimalInfo *D)
369 {
370   StringRef::iterator dot = end;
371   StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
372 
373   D->firstSigDigit = p;
374   D->exponent = 0;
375   D->normalizedExponent = 0;
376 
377   for (; p != end; ++p) {
378     if (*p == '.') {
379       assert(dot == end && "String contains multiple dots");
380       dot = p++;
381       if (p == end)
382         break;
383     }
384     if (decDigitValue(*p) >= 10U)
385       break;
386   }
387 
388   if (p != end) {
389     assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
390     assert(p != begin && "Significand has no digits");
391     assert((dot == end || p - begin != 1) && "Significand has no digits");
392 
393     /* p points to the first non-digit in the string */
394     D->exponent = readExponent(p + 1, end);
395 
396     /* Implied decimal point?  */
397     if (dot == end)
398       dot = p;
399   }
400 
401   /* If number is all zeroes accept any exponent.  */
402   if (p != D->firstSigDigit) {
403     /* Drop insignificant trailing zeroes.  */
404     if (p != begin) {
405       do
406         do
407           p--;
408         while (p != begin && *p == '0');
409       while (p != begin && *p == '.');
410     }
411 
412     /* Adjust the exponents for any decimal point.  */
413     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
414     D->normalizedExponent = (D->exponent +
415               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
416                                       - (dot > D->firstSigDigit && dot < p)));
417   }
418 
419   D->lastSigDigit = p;
420 }
421 
422 /* Return the trailing fraction of a hexadecimal number.
423    DIGITVALUE is the first hex digit of the fraction, P points to
424    the next digit.  */
425 static lostFraction
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)426 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
427                             unsigned int digitValue)
428 {
429   unsigned int hexDigit;
430 
431   /* If the first trailing digit isn't 0 or 8 we can work out the
432      fraction immediately.  */
433   if (digitValue > 8)
434     return lfMoreThanHalf;
435   else if (digitValue < 8 && digitValue > 0)
436     return lfLessThanHalf;
437 
438   // Otherwise we need to find the first non-zero digit.
439   while (p != end && (*p == '0' || *p == '.'))
440     p++;
441 
442   assert(p != end && "Invalid trailing hexadecimal fraction!");
443 
444   hexDigit = hexDigitValue(*p);
445 
446   /* If we ran off the end it is exactly zero or one-half, otherwise
447      a little more.  */
448   if (hexDigit == -1U)
449     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
450   else
451     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
452 }
453 
454 /* Return the fraction lost were a bignum truncated losing the least
455    significant BITS bits.  */
456 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)457 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
458                               unsigned int partCount,
459                               unsigned int bits)
460 {
461   unsigned int lsb;
462 
463   lsb = APInt::tcLSB(parts, partCount);
464 
465   /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
466   if (bits <= lsb)
467     return lfExactlyZero;
468   if (bits == lsb + 1)
469     return lfExactlyHalf;
470   if (bits <= partCount * APFloatBase::integerPartWidth &&
471       APInt::tcExtractBit(parts, bits - 1))
472     return lfMoreThanHalf;
473 
474   return lfLessThanHalf;
475 }
476 
477 /* Shift DST right BITS bits noting lost fraction.  */
478 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)479 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
480 {
481   lostFraction lost_fraction;
482 
483   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
484 
485   APInt::tcShiftRight(dst, parts, bits);
486 
487   return lost_fraction;
488 }
489 
490 /* Combine the effect of two lost fractions.  */
491 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)492 combineLostFractions(lostFraction moreSignificant,
493                      lostFraction lessSignificant)
494 {
495   if (lessSignificant != lfExactlyZero) {
496     if (moreSignificant == lfExactlyZero)
497       moreSignificant = lfLessThanHalf;
498     else if (moreSignificant == lfExactlyHalf)
499       moreSignificant = lfMoreThanHalf;
500   }
501 
502   return moreSignificant;
503 }
504 
505 /* The error from the true value, in half-ulps, on multiplying two
506    floating point numbers, which differ from the value they
507    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
508    than the returned value.
509 
510    See "How to Read Floating Point Numbers Accurately" by William D
511    Clinger.  */
512 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)513 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
514 {
515   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
516 
517   if (HUerr1 + HUerr2 == 0)
518     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
519   else
520     return inexactMultiply + 2 * (HUerr1 + HUerr2);
521 }
522 
523 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
524    when the least significant BITS are truncated.  BITS cannot be
525    zero.  */
526 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)527 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
528                  bool isNearest) {
529   unsigned int count, partBits;
530   APFloatBase::integerPart part, boundary;
531 
532   assert(bits != 0);
533 
534   bits--;
535   count = bits / APFloatBase::integerPartWidth;
536   partBits = bits % APFloatBase::integerPartWidth + 1;
537 
538   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
539 
540   if (isNearest)
541     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
542   else
543     boundary = 0;
544 
545   if (count == 0) {
546     if (part - boundary <= boundary - part)
547       return part - boundary;
548     else
549       return boundary - part;
550   }
551 
552   if (part == boundary) {
553     while (--count)
554       if (parts[count])
555         return ~(APFloatBase::integerPart) 0; /* A lot.  */
556 
557     return parts[0];
558   } else if (part == boundary - 1) {
559     while (--count)
560       if (~parts[count])
561         return ~(APFloatBase::integerPart) 0; /* A lot.  */
562 
563     return -parts[0];
564   }
565 
566   return ~(APFloatBase::integerPart) 0; /* A lot.  */
567 }
568 
569 /* Place pow(5, power) in DST, and return the number of parts used.
570    DST must be at least one part larger than size of the answer.  */
571 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)572 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
573   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
574   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
575   pow5s[0] = 78125 * 5;
576 
577   unsigned int partsCount[16] = { 1 };
578   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
579   unsigned int result;
580   assert(power <= maxExponent);
581 
582   p1 = dst;
583   p2 = scratch;
584 
585   *p1 = firstEightPowers[power & 7];
586   power >>= 3;
587 
588   result = 1;
589   pow5 = pow5s;
590 
591   for (unsigned int n = 0; power; power >>= 1, n++) {
592     unsigned int pc;
593 
594     pc = partsCount[n];
595 
596     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
597     if (pc == 0) {
598       pc = partsCount[n - 1];
599       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
600       pc *= 2;
601       if (pow5[pc - 1] == 0)
602         pc--;
603       partsCount[n] = pc;
604     }
605 
606     if (power & 1) {
607       APFloatBase::integerPart *tmp;
608 
609       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
610       result += pc;
611       if (p2[result - 1] == 0)
612         result--;
613 
614       /* Now result is in p1 with partsCount parts and p2 is scratch
615          space.  */
616       tmp = p1;
617       p1 = p2;
618       p2 = tmp;
619     }
620 
621     pow5 += pc;
622   }
623 
624   if (p1 != dst)
625     APInt::tcAssign(dst, p1, result);
626 
627   return result;
628 }
629 
630 /* Zero at the end to avoid modular arithmetic when adding one; used
631    when rounding up during hexadecimal output.  */
632 static const char hexDigitsLower[] = "0123456789abcdef0";
633 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
634 static const char infinityL[] = "infinity";
635 static const char infinityU[] = "INFINITY";
636 static const char NaNL[] = "nan";
637 static const char NaNU[] = "NAN";
638 
639 /* Write out an integerPart in hexadecimal, starting with the most
640    significant nibble.  Write out exactly COUNT hexdigits, return
641    COUNT.  */
642 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)643 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
644            const char *hexDigitChars)
645 {
646   unsigned int result = count;
647 
648   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
649 
650   part >>= (APFloatBase::integerPartWidth - 4 * count);
651   while (count--) {
652     dst[count] = hexDigitChars[part & 0xf];
653     part >>= 4;
654   }
655 
656   return result;
657 }
658 
659 /* Write out an unsigned decimal integer.  */
660 static char *
writeUnsignedDecimal(char * dst,unsigned int n)661 writeUnsignedDecimal (char *dst, unsigned int n)
662 {
663   char buff[40], *p;
664 
665   p = buff;
666   do
667     *p++ = '0' + n % 10;
668   while (n /= 10);
669 
670   do
671     *dst++ = *--p;
672   while (p != buff);
673 
674   return dst;
675 }
676 
677 /* Write out a signed decimal integer.  */
678 static char *
writeSignedDecimal(char * dst,int value)679 writeSignedDecimal (char *dst, int value)
680 {
681   if (value < 0) {
682     *dst++ = '-';
683     dst = writeUnsignedDecimal(dst, -(unsigned) value);
684   } else
685     dst = writeUnsignedDecimal(dst, value);
686 
687   return dst;
688 }
689 
690 namespace detail {
691 /* Constructors.  */
initialize(const fltSemantics * ourSemantics)692 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
693   unsigned int count;
694 
695   semantics = ourSemantics;
696   count = partCount();
697   if (count > 1)
698     significand.parts = new integerPart[count];
699 }
700 
freeSignificand()701 void IEEEFloat::freeSignificand() {
702   if (needsCleanup())
703     delete [] significand.parts;
704 }
705 
assign(const IEEEFloat & rhs)706 void IEEEFloat::assign(const IEEEFloat &rhs) {
707   assert(semantics == rhs.semantics);
708 
709   sign = rhs.sign;
710   category = rhs.category;
711   exponent = rhs.exponent;
712   if (isFiniteNonZero() || category == fcNaN)
713     copySignificand(rhs);
714 }
715 
copySignificand(const IEEEFloat & rhs)716 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
717   assert(isFiniteNonZero() || category == fcNaN);
718   assert(rhs.partCount() >= partCount());
719 
720   APInt::tcAssign(significandParts(), rhs.significandParts(),
721                   partCount());
722 }
723 
724 /* Make this number a NaN, with an arbitrary but deterministic value
725    for the significand.  If double or longer, this is a signalling NaN,
726    which may not be ideal.  If float, this is QNaN(0).  */
makeNaN(bool SNaN,bool Negative,const APInt * fill)727 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
728   category = fcNaN;
729   sign = Negative;
730 
731   integerPart *significand = significandParts();
732   unsigned numParts = partCount();
733 
734   // Set the significand bits to the fill.
735   if (!fill || fill->getNumWords() < numParts)
736     APInt::tcSet(significand, 0, numParts);
737   if (fill) {
738     APInt::tcAssign(significand, fill->getRawData(),
739                     std::min(fill->getNumWords(), numParts));
740 
741     // Zero out the excess bits of the significand.
742     unsigned bitsToPreserve = semantics->precision - 1;
743     unsigned part = bitsToPreserve / 64;
744     bitsToPreserve %= 64;
745     significand[part] &= ((1ULL << bitsToPreserve) - 1);
746     for (part++; part != numParts; ++part)
747       significand[part] = 0;
748   }
749 
750   unsigned QNaNBit = semantics->precision - 2;
751 
752   if (SNaN) {
753     // We always have to clear the QNaN bit to make it an SNaN.
754     APInt::tcClearBit(significand, QNaNBit);
755 
756     // If there are no bits set in the payload, we have to set
757     // *something* to make it a NaN instead of an infinity;
758     // conventionally, this is the next bit down from the QNaN bit.
759     if (APInt::tcIsZero(significand, numParts))
760       APInt::tcSetBit(significand, QNaNBit - 1);
761   } else {
762     // We always have to set the QNaN bit to make it a QNaN.
763     APInt::tcSetBit(significand, QNaNBit);
764   }
765 
766   // For x87 extended precision, we want to make a NaN, not a
767   // pseudo-NaN.  Maybe we should expose the ability to make
768   // pseudo-NaNs?
769   if (semantics == &semX87DoubleExtended)
770     APInt::tcSetBit(significand, QNaNBit + 1);
771 }
772 
operator =(const IEEEFloat & rhs)773 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
774   if (this != &rhs) {
775     if (semantics != rhs.semantics) {
776       freeSignificand();
777       initialize(rhs.semantics);
778     }
779     assign(rhs);
780   }
781 
782   return *this;
783 }
784 
operator =(IEEEFloat && rhs)785 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
786   freeSignificand();
787 
788   semantics = rhs.semantics;
789   significand = rhs.significand;
790   exponent = rhs.exponent;
791   category = rhs.category;
792   sign = rhs.sign;
793 
794   rhs.semantics = &semBogus;
795   return *this;
796 }
797 
isDenormal() const798 bool IEEEFloat::isDenormal() const {
799   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
800          (APInt::tcExtractBit(significandParts(),
801                               semantics->precision - 1) == 0);
802 }
803 
isSmallest() const804 bool IEEEFloat::isSmallest() const {
805   // The smallest number by magnitude in our format will be the smallest
806   // denormal, i.e. the floating point number with exponent being minimum
807   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
808   return isFiniteNonZero() && exponent == semantics->minExponent &&
809     significandMSB() == 0;
810 }
811 
isSignificandAllOnes() const812 bool IEEEFloat::isSignificandAllOnes() const {
813   // Test if the significand excluding the integral bit is all ones. This allows
814   // us to test for binade boundaries.
815   const integerPart *Parts = significandParts();
816   const unsigned PartCount = partCount();
817   for (unsigned i = 0; i < PartCount - 1; i++)
818     if (~Parts[i])
819       return false;
820 
821   // Set the unused high bits to all ones when we compare.
822   const unsigned NumHighBits =
823     PartCount*integerPartWidth - semantics->precision + 1;
824   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
825          "fill than integerPartWidth");
826   const integerPart HighBitFill =
827     ~integerPart(0) << (integerPartWidth - NumHighBits);
828   if (~(Parts[PartCount - 1] | HighBitFill))
829     return false;
830 
831   return true;
832 }
833 
isSignificandAllZeros() const834 bool IEEEFloat::isSignificandAllZeros() const {
835   // Test if the significand excluding the integral bit is all zeros. This
836   // allows us to test for binade boundaries.
837   const integerPart *Parts = significandParts();
838   const unsigned PartCount = partCount();
839 
840   for (unsigned i = 0; i < PartCount - 1; i++)
841     if (Parts[i])
842       return false;
843 
844   const unsigned NumHighBits =
845     PartCount*integerPartWidth - semantics->precision + 1;
846   assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
847          "clear than integerPartWidth");
848   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
849 
850   if (Parts[PartCount - 1] & HighBitMask)
851     return false;
852 
853   return true;
854 }
855 
isLargest() const856 bool IEEEFloat::isLargest() const {
857   // The largest number by magnitude in our format will be the floating point
858   // number with maximum exponent and with significand that is all ones.
859   return isFiniteNonZero() && exponent == semantics->maxExponent
860     && isSignificandAllOnes();
861 }
862 
isInteger() const863 bool IEEEFloat::isInteger() const {
864   // This could be made more efficient; I'm going for obviously correct.
865   if (!isFinite()) return false;
866   IEEEFloat truncated = *this;
867   truncated.roundToIntegral(rmTowardZero);
868   return compare(truncated) == cmpEqual;
869 }
870 
bitwiseIsEqual(const IEEEFloat & rhs) const871 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
872   if (this == &rhs)
873     return true;
874   if (semantics != rhs.semantics ||
875       category != rhs.category ||
876       sign != rhs.sign)
877     return false;
878   if (category==fcZero || category==fcInfinity)
879     return true;
880 
881   if (isFiniteNonZero() && exponent != rhs.exponent)
882     return false;
883 
884   return std::equal(significandParts(), significandParts() + partCount(),
885                     rhs.significandParts());
886 }
887 
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)888 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
889   initialize(&ourSemantics);
890   sign = 0;
891   category = fcNormal;
892   zeroSignificand();
893   exponent = ourSemantics.precision - 1;
894   significandParts()[0] = value;
895   normalize(rmNearestTiesToEven, lfExactlyZero);
896 }
897 
IEEEFloat(const fltSemantics & ourSemantics)898 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
899   initialize(&ourSemantics);
900   category = fcZero;
901   sign = false;
902 }
903 
904 // Delegate to the previous constructor, because later copy constructor may
905 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)906 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
907     : IEEEFloat(ourSemantics) {}
908 
IEEEFloat(const IEEEFloat & rhs)909 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
910   initialize(rhs.semantics);
911   assign(rhs);
912 }
913 
IEEEFloat(IEEEFloat && rhs)914 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
915   *this = std::move(rhs);
916 }
917 
~IEEEFloat()918 IEEEFloat::~IEEEFloat() { freeSignificand(); }
919 
partCount() const920 unsigned int IEEEFloat::partCount() const {
921   return partCountForBits(semantics->precision + 1);
922 }
923 
significandParts() const924 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
925   return const_cast<IEEEFloat *>(this)->significandParts();
926 }
927 
significandParts()928 IEEEFloat::integerPart *IEEEFloat::significandParts() {
929   if (partCount() > 1)
930     return significand.parts;
931   else
932     return &significand.part;
933 }
934 
zeroSignificand()935 void IEEEFloat::zeroSignificand() {
936   APInt::tcSet(significandParts(), 0, partCount());
937 }
938 
939 /* Increment an fcNormal floating point number's significand.  */
incrementSignificand()940 void IEEEFloat::incrementSignificand() {
941   integerPart carry;
942 
943   carry = APInt::tcIncrement(significandParts(), partCount());
944 
945   /* Our callers should never cause us to overflow.  */
946   assert(carry == 0);
947   (void)carry;
948 }
949 
950 /* Add the significand of the RHS.  Returns the carry flag.  */
addSignificand(const IEEEFloat & rhs)951 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
952   integerPart *parts;
953 
954   parts = significandParts();
955 
956   assert(semantics == rhs.semantics);
957   assert(exponent == rhs.exponent);
958 
959   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
960 }
961 
962 /* Subtract the significand of the RHS with a borrow flag.  Returns
963    the borrow flag.  */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)964 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
965                                                       integerPart borrow) {
966   integerPart *parts;
967 
968   parts = significandParts();
969 
970   assert(semantics == rhs.semantics);
971   assert(exponent == rhs.exponent);
972 
973   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
974                            partCount());
975 }
976 
977 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
978    on to the full-precision result of the multiplication.  Returns the
979    lost fraction.  */
multiplySignificand(const IEEEFloat & rhs,const IEEEFloat * addend)980 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
981                                             const IEEEFloat *addend) {
982   unsigned int omsb;        // One, not zero, based MSB.
983   unsigned int partsCount, newPartsCount, precision;
984   integerPart *lhsSignificand;
985   integerPart scratch[4];
986   integerPart *fullSignificand;
987   lostFraction lost_fraction;
988   bool ignored;
989 
990   assert(semantics == rhs.semantics);
991 
992   precision = semantics->precision;
993 
994   // Allocate space for twice as many bits as the original significand, plus one
995   // extra bit for the addition to overflow into.
996   newPartsCount = partCountForBits(precision * 2 + 1);
997 
998   if (newPartsCount > 4)
999     fullSignificand = new integerPart[newPartsCount];
1000   else
1001     fullSignificand = scratch;
1002 
1003   lhsSignificand = significandParts();
1004   partsCount = partCount();
1005 
1006   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1007                         rhs.significandParts(), partsCount, partsCount);
1008 
1009   lost_fraction = lfExactlyZero;
1010   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1011   exponent += rhs.exponent;
1012 
1013   // Assume the operands involved in the multiplication are single-precision
1014   // FP, and the two multiplicants are:
1015   //   *this = a23 . a22 ... a0 * 2^e1
1016   //     rhs = b23 . b22 ... b0 * 2^e2
1017   // the result of multiplication is:
1018   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1019   // Note that there are three significant bits at the left-hand side of the
1020   // radix point: two for the multiplication, and an overflow bit for the
1021   // addition (that will always be zero at this point). Move the radix point
1022   // toward left by two bits, and adjust exponent accordingly.
1023   exponent += 2;
1024 
1025   if (addend && addend->isNonZero()) {
1026     // The intermediate result of the multiplication has "2 * precision"
1027     // signicant bit; adjust the addend to be consistent with mul result.
1028     //
1029     Significand savedSignificand = significand;
1030     const fltSemantics *savedSemantics = semantics;
1031     fltSemantics extendedSemantics;
1032     opStatus status;
1033     unsigned int extendedPrecision;
1034 
1035     // Normalize our MSB to one below the top bit to allow for overflow.
1036     extendedPrecision = 2 * precision + 1;
1037     if (omsb != extendedPrecision - 1) {
1038       assert(extendedPrecision > omsb);
1039       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1040                          (extendedPrecision - 1) - omsb);
1041       exponent -= (extendedPrecision - 1) - omsb;
1042     }
1043 
1044     /* Create new semantics.  */
1045     extendedSemantics = *semantics;
1046     extendedSemantics.precision = extendedPrecision;
1047 
1048     if (newPartsCount == 1)
1049       significand.part = fullSignificand[0];
1050     else
1051       significand.parts = fullSignificand;
1052     semantics = &extendedSemantics;
1053 
1054     IEEEFloat extendedAddend(*addend);
1055     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1056     assert(status == opOK);
1057     (void)status;
1058 
1059     // Shift the significand of the addend right by one bit. This guarantees
1060     // that the high bit of the significand is zero (same as fullSignificand),
1061     // so the addition will overflow (if it does overflow at all) into the top bit.
1062     lost_fraction = extendedAddend.shiftSignificandRight(1);
1063     assert(lost_fraction == lfExactlyZero &&
1064            "Lost precision while shifting addend for fused-multiply-add.");
1065 
1066     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1067 
1068     /* Restore our state.  */
1069     if (newPartsCount == 1)
1070       fullSignificand[0] = significand.part;
1071     significand = savedSignificand;
1072     semantics = savedSemantics;
1073 
1074     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1075   }
1076 
1077   // Convert the result having "2 * precision" significant-bits back to the one
1078   // having "precision" significant-bits. First, move the radix point from
1079   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1080   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1081   exponent -= precision + 1;
1082 
1083   // In case MSB resides at the left-hand side of radix point, shift the
1084   // mantissa right by some amount to make sure the MSB reside right before
1085   // the radix point (i.e. "MSB . rest-significant-bits").
1086   //
1087   // Note that the result is not normalized when "omsb < precision". So, the
1088   // caller needs to call IEEEFloat::normalize() if normalized value is
1089   // expected.
1090   if (omsb > precision) {
1091     unsigned int bits, significantParts;
1092     lostFraction lf;
1093 
1094     bits = omsb - precision;
1095     significantParts = partCountForBits(omsb);
1096     lf = shiftRight(fullSignificand, significantParts, bits);
1097     lost_fraction = combineLostFractions(lf, lost_fraction);
1098     exponent += bits;
1099   }
1100 
1101   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1102 
1103   if (newPartsCount > 4)
1104     delete [] fullSignificand;
1105 
1106   return lost_fraction;
1107 }
1108 
1109 /* Multiply the significands of LHS and RHS to DST.  */
divideSignificand(const IEEEFloat & rhs)1110 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1111   unsigned int bit, i, partsCount;
1112   const integerPart *rhsSignificand;
1113   integerPart *lhsSignificand, *dividend, *divisor;
1114   integerPart scratch[4];
1115   lostFraction lost_fraction;
1116 
1117   assert(semantics == rhs.semantics);
1118 
1119   lhsSignificand = significandParts();
1120   rhsSignificand = rhs.significandParts();
1121   partsCount = partCount();
1122 
1123   if (partsCount > 2)
1124     dividend = new integerPart[partsCount * 2];
1125   else
1126     dividend = scratch;
1127 
1128   divisor = dividend + partsCount;
1129 
1130   /* Copy the dividend and divisor as they will be modified in-place.  */
1131   for (i = 0; i < partsCount; i++) {
1132     dividend[i] = lhsSignificand[i];
1133     divisor[i] = rhsSignificand[i];
1134     lhsSignificand[i] = 0;
1135   }
1136 
1137   exponent -= rhs.exponent;
1138 
1139   unsigned int precision = semantics->precision;
1140 
1141   /* Normalize the divisor.  */
1142   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1143   if (bit) {
1144     exponent += bit;
1145     APInt::tcShiftLeft(divisor, partsCount, bit);
1146   }
1147 
1148   /* Normalize the dividend.  */
1149   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1150   if (bit) {
1151     exponent -= bit;
1152     APInt::tcShiftLeft(dividend, partsCount, bit);
1153   }
1154 
1155   /* Ensure the dividend >= divisor initially for the loop below.
1156      Incidentally, this means that the division loop below is
1157      guaranteed to set the integer bit to one.  */
1158   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1159     exponent--;
1160     APInt::tcShiftLeft(dividend, partsCount, 1);
1161     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1162   }
1163 
1164   /* Long division.  */
1165   for (bit = precision; bit; bit -= 1) {
1166     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1167       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1168       APInt::tcSetBit(lhsSignificand, bit - 1);
1169     }
1170 
1171     APInt::tcShiftLeft(dividend, partsCount, 1);
1172   }
1173 
1174   /* Figure out the lost fraction.  */
1175   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1176 
1177   if (cmp > 0)
1178     lost_fraction = lfMoreThanHalf;
1179   else if (cmp == 0)
1180     lost_fraction = lfExactlyHalf;
1181   else if (APInt::tcIsZero(dividend, partsCount))
1182     lost_fraction = lfExactlyZero;
1183   else
1184     lost_fraction = lfLessThanHalf;
1185 
1186   if (partsCount > 2)
1187     delete [] dividend;
1188 
1189   return lost_fraction;
1190 }
1191 
significandMSB() const1192 unsigned int IEEEFloat::significandMSB() const {
1193   return APInt::tcMSB(significandParts(), partCount());
1194 }
1195 
significandLSB() const1196 unsigned int IEEEFloat::significandLSB() const {
1197   return APInt::tcLSB(significandParts(), partCount());
1198 }
1199 
1200 /* Note that a zero result is NOT normalized to fcZero.  */
shiftSignificandRight(unsigned int bits)1201 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1202   /* Our exponent should not overflow.  */
1203   assert((ExponentType) (exponent + bits) >= exponent);
1204 
1205   exponent += bits;
1206 
1207   return shiftRight(significandParts(), partCount(), bits);
1208 }
1209 
1210 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
shiftSignificandLeft(unsigned int bits)1211 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1212   assert(bits < semantics->precision);
1213 
1214   if (bits) {
1215     unsigned int partsCount = partCount();
1216 
1217     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1218     exponent -= bits;
1219 
1220     assert(!APInt::tcIsZero(significandParts(), partsCount));
1221   }
1222 }
1223 
1224 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1225 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1226   int compare;
1227 
1228   assert(semantics == rhs.semantics);
1229   assert(isFiniteNonZero());
1230   assert(rhs.isFiniteNonZero());
1231 
1232   compare = exponent - rhs.exponent;
1233 
1234   /* If exponents are equal, do an unsigned bignum comparison of the
1235      significands.  */
1236   if (compare == 0)
1237     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1238                                partCount());
1239 
1240   if (compare > 0)
1241     return cmpGreaterThan;
1242   else if (compare < 0)
1243     return cmpLessThan;
1244   else
1245     return cmpEqual;
1246 }
1247 
1248 /* Handle overflow.  Sign is preserved.  We either become infinity or
1249    the largest finite number.  */
handleOverflow(roundingMode rounding_mode)1250 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1251   /* Infinity?  */
1252   if (rounding_mode == rmNearestTiesToEven ||
1253       rounding_mode == rmNearestTiesToAway ||
1254       (rounding_mode == rmTowardPositive && !sign) ||
1255       (rounding_mode == rmTowardNegative && sign)) {
1256     category = fcInfinity;
1257     return (opStatus) (opOverflow | opInexact);
1258   }
1259 
1260   /* Otherwise we become the largest finite number.  */
1261   category = fcNormal;
1262   exponent = semantics->maxExponent;
1263   APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1264                                    semantics->precision);
1265 
1266   return opInexact;
1267 }
1268 
1269 /* Returns TRUE if, when truncating the current number, with BIT the
1270    new LSB, with the given lost fraction and rounding mode, the result
1271    would need to be rounded away from zero (i.e., by increasing the
1272    signficand).  This routine must work for fcZero of both signs, and
1273    fcNormal numbers.  */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1274 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1275                                   lostFraction lost_fraction,
1276                                   unsigned int bit) const {
1277   /* NaNs and infinities should not have lost fractions.  */
1278   assert(isFiniteNonZero() || category == fcZero);
1279 
1280   /* Current callers never pass this so we don't handle it.  */
1281   assert(lost_fraction != lfExactlyZero);
1282 
1283   switch (rounding_mode) {
1284   case rmNearestTiesToAway:
1285     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1286 
1287   case rmNearestTiesToEven:
1288     if (lost_fraction == lfMoreThanHalf)
1289       return true;
1290 
1291     /* Our zeroes don't have a significand to test.  */
1292     if (lost_fraction == lfExactlyHalf && category != fcZero)
1293       return APInt::tcExtractBit(significandParts(), bit);
1294 
1295     return false;
1296 
1297   case rmTowardZero:
1298     return false;
1299 
1300   case rmTowardPositive:
1301     return !sign;
1302 
1303   case rmTowardNegative:
1304     return sign;
1305   }
1306   llvm_unreachable("Invalid rounding mode found");
1307 }
1308 
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1309 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1310                                          lostFraction lost_fraction) {
1311   unsigned int omsb;                /* One, not zero, based MSB.  */
1312   int exponentChange;
1313 
1314   if (!isFiniteNonZero())
1315     return opOK;
1316 
1317   /* Before rounding normalize the exponent of fcNormal numbers.  */
1318   omsb = significandMSB() + 1;
1319 
1320   if (omsb) {
1321     /* OMSB is numbered from 1.  We want to place it in the integer
1322        bit numbered PRECISION if possible, with a compensating change in
1323        the exponent.  */
1324     exponentChange = omsb - semantics->precision;
1325 
1326     /* If the resulting exponent is too high, overflow according to
1327        the rounding mode.  */
1328     if (exponent + exponentChange > semantics->maxExponent)
1329       return handleOverflow(rounding_mode);
1330 
1331     /* Subnormal numbers have exponent minExponent, and their MSB
1332        is forced based on that.  */
1333     if (exponent + exponentChange < semantics->minExponent)
1334       exponentChange = semantics->minExponent - exponent;
1335 
1336     /* Shifting left is easy as we don't lose precision.  */
1337     if (exponentChange < 0) {
1338       assert(lost_fraction == lfExactlyZero);
1339 
1340       shiftSignificandLeft(-exponentChange);
1341 
1342       return opOK;
1343     }
1344 
1345     if (exponentChange > 0) {
1346       lostFraction lf;
1347 
1348       /* Shift right and capture any new lost fraction.  */
1349       lf = shiftSignificandRight(exponentChange);
1350 
1351       lost_fraction = combineLostFractions(lf, lost_fraction);
1352 
1353       /* Keep OMSB up-to-date.  */
1354       if (omsb > (unsigned) exponentChange)
1355         omsb -= exponentChange;
1356       else
1357         omsb = 0;
1358     }
1359   }
1360 
1361   /* Now round the number according to rounding_mode given the lost
1362      fraction.  */
1363 
1364   /* As specified in IEEE 754, since we do not trap we do not report
1365      underflow for exact results.  */
1366   if (lost_fraction == lfExactlyZero) {
1367     /* Canonicalize zeroes.  */
1368     if (omsb == 0)
1369       category = fcZero;
1370 
1371     return opOK;
1372   }
1373 
1374   /* Increment the significand if we're rounding away from zero.  */
1375   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1376     if (omsb == 0)
1377       exponent = semantics->minExponent;
1378 
1379     incrementSignificand();
1380     omsb = significandMSB() + 1;
1381 
1382     /* Did the significand increment overflow?  */
1383     if (omsb == (unsigned) semantics->precision + 1) {
1384       /* Renormalize by incrementing the exponent and shifting our
1385          significand right one.  However if we already have the
1386          maximum exponent we overflow to infinity.  */
1387       if (exponent == semantics->maxExponent) {
1388         category = fcInfinity;
1389 
1390         return (opStatus) (opOverflow | opInexact);
1391       }
1392 
1393       shiftSignificandRight(1);
1394 
1395       return opInexact;
1396     }
1397   }
1398 
1399   /* The normal case - we were and are not denormal, and any
1400      significand increment above didn't overflow.  */
1401   if (omsb == semantics->precision)
1402     return opInexact;
1403 
1404   /* We have a non-zero denormal.  */
1405   assert(omsb < semantics->precision);
1406 
1407   /* Canonicalize zeroes.  */
1408   if (omsb == 0)
1409     category = fcZero;
1410 
1411   /* The fcZero case is a denormal that underflowed to zero.  */
1412   return (opStatus) (opUnderflow | opInexact);
1413 }
1414 
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1415 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1416                                                      bool subtract) {
1417   switch (PackCategoriesIntoKey(category, rhs.category)) {
1418   default:
1419     llvm_unreachable(nullptr);
1420 
1421   case PackCategoriesIntoKey(fcNaN, fcZero):
1422   case PackCategoriesIntoKey(fcNaN, fcNormal):
1423   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1424   case PackCategoriesIntoKey(fcNaN, fcNaN):
1425   case PackCategoriesIntoKey(fcNormal, fcZero):
1426   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1427   case PackCategoriesIntoKey(fcInfinity, fcZero):
1428     return opOK;
1429 
1430   case PackCategoriesIntoKey(fcZero, fcNaN):
1431   case PackCategoriesIntoKey(fcNormal, fcNaN):
1432   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1433     // We need to be sure to flip the sign here for subtraction because we
1434     // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1435     sign = rhs.sign ^ subtract;
1436     category = fcNaN;
1437     copySignificand(rhs);
1438     return opOK;
1439 
1440   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1441   case PackCategoriesIntoKey(fcZero, fcInfinity):
1442     category = fcInfinity;
1443     sign = rhs.sign ^ subtract;
1444     return opOK;
1445 
1446   case PackCategoriesIntoKey(fcZero, fcNormal):
1447     assign(rhs);
1448     sign = rhs.sign ^ subtract;
1449     return opOK;
1450 
1451   case PackCategoriesIntoKey(fcZero, fcZero):
1452     /* Sign depends on rounding mode; handled by caller.  */
1453     return opOK;
1454 
1455   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1456     /* Differently signed infinities can only be validly
1457        subtracted.  */
1458     if (((sign ^ rhs.sign)!=0) != subtract) {
1459       makeNaN();
1460       return opInvalidOp;
1461     }
1462 
1463     return opOK;
1464 
1465   case PackCategoriesIntoKey(fcNormal, fcNormal):
1466     return opDivByZero;
1467   }
1468 }
1469 
1470 /* Add or subtract two normal numbers.  */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1471 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1472                                                  bool subtract) {
1473   integerPart carry;
1474   lostFraction lost_fraction;
1475   int bits;
1476 
1477   /* Determine if the operation on the absolute values is effectively
1478      an addition or subtraction.  */
1479   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1480 
1481   /* Are we bigger exponent-wise than the RHS?  */
1482   bits = exponent - rhs.exponent;
1483 
1484   /* Subtraction is more subtle than one might naively expect.  */
1485   if (subtract) {
1486     IEEEFloat temp_rhs(rhs);
1487     bool reverse;
1488 
1489     if (bits == 0) {
1490       reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1491       lost_fraction = lfExactlyZero;
1492     } else if (bits > 0) {
1493       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1494       shiftSignificandLeft(1);
1495       reverse = false;
1496     } else {
1497       lost_fraction = shiftSignificandRight(-bits - 1);
1498       temp_rhs.shiftSignificandLeft(1);
1499       reverse = true;
1500     }
1501 
1502     if (reverse) {
1503       carry = temp_rhs.subtractSignificand
1504         (*this, lost_fraction != lfExactlyZero);
1505       copySignificand(temp_rhs);
1506       sign = !sign;
1507     } else {
1508       carry = subtractSignificand
1509         (temp_rhs, lost_fraction != lfExactlyZero);
1510     }
1511 
1512     /* Invert the lost fraction - it was on the RHS and
1513        subtracted.  */
1514     if (lost_fraction == lfLessThanHalf)
1515       lost_fraction = lfMoreThanHalf;
1516     else if (lost_fraction == lfMoreThanHalf)
1517       lost_fraction = lfLessThanHalf;
1518 
1519     /* The code above is intended to ensure that no borrow is
1520        necessary.  */
1521     assert(!carry);
1522     (void)carry;
1523   } else {
1524     if (bits > 0) {
1525       IEEEFloat temp_rhs(rhs);
1526 
1527       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1528       carry = addSignificand(temp_rhs);
1529     } else {
1530       lost_fraction = shiftSignificandRight(-bits);
1531       carry = addSignificand(rhs);
1532     }
1533 
1534     /* We have a guard bit; generating a carry cannot happen.  */
1535     assert(!carry);
1536     (void)carry;
1537   }
1538 
1539   return lost_fraction;
1540 }
1541 
multiplySpecials(const IEEEFloat & rhs)1542 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1543   switch (PackCategoriesIntoKey(category, rhs.category)) {
1544   default:
1545     llvm_unreachable(nullptr);
1546 
1547   case PackCategoriesIntoKey(fcNaN, fcZero):
1548   case PackCategoriesIntoKey(fcNaN, fcNormal):
1549   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1550   case PackCategoriesIntoKey(fcNaN, fcNaN):
1551     sign = false;
1552     return opOK;
1553 
1554   case PackCategoriesIntoKey(fcZero, fcNaN):
1555   case PackCategoriesIntoKey(fcNormal, fcNaN):
1556   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1557     sign = false;
1558     category = fcNaN;
1559     copySignificand(rhs);
1560     return opOK;
1561 
1562   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1563   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1564   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1565     category = fcInfinity;
1566     return opOK;
1567 
1568   case PackCategoriesIntoKey(fcZero, fcNormal):
1569   case PackCategoriesIntoKey(fcNormal, fcZero):
1570   case PackCategoriesIntoKey(fcZero, fcZero):
1571     category = fcZero;
1572     return opOK;
1573 
1574   case PackCategoriesIntoKey(fcZero, fcInfinity):
1575   case PackCategoriesIntoKey(fcInfinity, fcZero):
1576     makeNaN();
1577     return opInvalidOp;
1578 
1579   case PackCategoriesIntoKey(fcNormal, fcNormal):
1580     return opOK;
1581   }
1582 }
1583 
divideSpecials(const IEEEFloat & rhs)1584 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1585   switch (PackCategoriesIntoKey(category, rhs.category)) {
1586   default:
1587     llvm_unreachable(nullptr);
1588 
1589   case PackCategoriesIntoKey(fcZero, fcNaN):
1590   case PackCategoriesIntoKey(fcNormal, fcNaN):
1591   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1592     category = fcNaN;
1593     copySignificand(rhs);
1594     LLVM_FALLTHROUGH;
1595   case PackCategoriesIntoKey(fcNaN, fcZero):
1596   case PackCategoriesIntoKey(fcNaN, fcNormal):
1597   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1598   case PackCategoriesIntoKey(fcNaN, fcNaN):
1599     sign = false;
1600     LLVM_FALLTHROUGH;
1601   case PackCategoriesIntoKey(fcInfinity, fcZero):
1602   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1603   case PackCategoriesIntoKey(fcZero, fcInfinity):
1604   case PackCategoriesIntoKey(fcZero, fcNormal):
1605     return opOK;
1606 
1607   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1608     category = fcZero;
1609     return opOK;
1610 
1611   case PackCategoriesIntoKey(fcNormal, fcZero):
1612     category = fcInfinity;
1613     return opDivByZero;
1614 
1615   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1616   case PackCategoriesIntoKey(fcZero, fcZero):
1617     makeNaN();
1618     return opInvalidOp;
1619 
1620   case PackCategoriesIntoKey(fcNormal, fcNormal):
1621     return opOK;
1622   }
1623 }
1624 
modSpecials(const IEEEFloat & rhs)1625 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1626   switch (PackCategoriesIntoKey(category, rhs.category)) {
1627   default:
1628     llvm_unreachable(nullptr);
1629 
1630   case PackCategoriesIntoKey(fcNaN, fcZero):
1631   case PackCategoriesIntoKey(fcNaN, fcNormal):
1632   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1633   case PackCategoriesIntoKey(fcNaN, fcNaN):
1634   case PackCategoriesIntoKey(fcZero, fcInfinity):
1635   case PackCategoriesIntoKey(fcZero, fcNormal):
1636   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1637     return opOK;
1638 
1639   case PackCategoriesIntoKey(fcZero, fcNaN):
1640   case PackCategoriesIntoKey(fcNormal, fcNaN):
1641   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1642     sign = false;
1643     category = fcNaN;
1644     copySignificand(rhs);
1645     return opOK;
1646 
1647   case PackCategoriesIntoKey(fcNormal, fcZero):
1648   case PackCategoriesIntoKey(fcInfinity, fcZero):
1649   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1650   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1651   case PackCategoriesIntoKey(fcZero, fcZero):
1652     makeNaN();
1653     return opInvalidOp;
1654 
1655   case PackCategoriesIntoKey(fcNormal, fcNormal):
1656     return opOK;
1657   }
1658 }
1659 
1660 /* Change sign.  */
changeSign()1661 void IEEEFloat::changeSign() {
1662   /* Look mummy, this one's easy.  */
1663   sign = !sign;
1664 }
1665 
1666 /* Normalized addition or subtraction.  */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1667 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1668                                              roundingMode rounding_mode,
1669                                              bool subtract) {
1670   opStatus fs;
1671 
1672   fs = addOrSubtractSpecials(rhs, subtract);
1673 
1674   /* This return code means it was not a simple case.  */
1675   if (fs == opDivByZero) {
1676     lostFraction lost_fraction;
1677 
1678     lost_fraction = addOrSubtractSignificand(rhs, subtract);
1679     fs = normalize(rounding_mode, lost_fraction);
1680 
1681     /* Can only be zero if we lost no fraction.  */
1682     assert(category != fcZero || lost_fraction == lfExactlyZero);
1683   }
1684 
1685   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1686      positive zero unless rounding to minus infinity, except that
1687      adding two like-signed zeroes gives that zero.  */
1688   if (category == fcZero) {
1689     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1690       sign = (rounding_mode == rmTowardNegative);
1691   }
1692 
1693   return fs;
1694 }
1695 
1696 /* Normalized addition.  */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1697 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1698                                    roundingMode rounding_mode) {
1699   return addOrSubtract(rhs, rounding_mode, false);
1700 }
1701 
1702 /* Normalized subtraction.  */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1703 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1704                                         roundingMode rounding_mode) {
1705   return addOrSubtract(rhs, rounding_mode, true);
1706 }
1707 
1708 /* Normalized multiply.  */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1709 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1710                                         roundingMode rounding_mode) {
1711   opStatus fs;
1712 
1713   sign ^= rhs.sign;
1714   fs = multiplySpecials(rhs);
1715 
1716   if (isFiniteNonZero()) {
1717     lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1718     fs = normalize(rounding_mode, lost_fraction);
1719     if (lost_fraction != lfExactlyZero)
1720       fs = (opStatus) (fs | opInexact);
1721   }
1722 
1723   return fs;
1724 }
1725 
1726 /* Normalized divide.  */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1727 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1728                                       roundingMode rounding_mode) {
1729   opStatus fs;
1730 
1731   sign ^= rhs.sign;
1732   fs = divideSpecials(rhs);
1733 
1734   if (isFiniteNonZero()) {
1735     lostFraction lost_fraction = divideSignificand(rhs);
1736     fs = normalize(rounding_mode, lost_fraction);
1737     if (lost_fraction != lfExactlyZero)
1738       fs = (opStatus) (fs | opInexact);
1739   }
1740 
1741   return fs;
1742 }
1743 
1744 /* Normalized remainder.  This is not currently correct in all cases.  */
remainder(const IEEEFloat & rhs)1745 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1746   opStatus fs;
1747   IEEEFloat V = *this;
1748   unsigned int origSign = sign;
1749 
1750   fs = V.divide(rhs, rmNearestTiesToEven);
1751   if (fs == opDivByZero)
1752     return fs;
1753 
1754   int parts = partCount();
1755   integerPart *x = new integerPart[parts];
1756   bool ignored;
1757   fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1758                           parts * integerPartWidth, true, rmNearestTiesToEven,
1759                           &ignored);
1760   if (fs == opInvalidOp) {
1761     delete[] x;
1762     return fs;
1763   }
1764 
1765   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1766                                         rmNearestTiesToEven);
1767   assert(fs==opOK);   // should always work
1768 
1769   fs = V.multiply(rhs, rmNearestTiesToEven);
1770   assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
1771 
1772   fs = subtract(V, rmNearestTiesToEven);
1773   assert(fs==opOK || fs==opInexact);   // likewise
1774 
1775   if (isZero())
1776     sign = origSign;    // IEEE754 requires this
1777   delete[] x;
1778   return fs;
1779 }
1780 
1781 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)1782 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1783   opStatus fs;
1784   fs = modSpecials(rhs);
1785   unsigned int origSign = sign;
1786 
1787   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1788          compareAbsoluteValue(rhs) != cmpLessThan) {
1789     IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1790     if (compareAbsoluteValue(V) == cmpLessThan)
1791       V = scalbn(V, -1, rmNearestTiesToEven);
1792     V.sign = sign;
1793 
1794     fs = subtract(V, rmNearestTiesToEven);
1795     assert(fs==opOK);
1796   }
1797   if (isZero())
1798     sign = origSign; // fmod requires this
1799   return fs;
1800 }
1801 
1802 /* Normalized fused-multiply-add.  */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)1803 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1804                                                 const IEEEFloat &addend,
1805                                                 roundingMode rounding_mode) {
1806   opStatus fs;
1807 
1808   /* Post-multiplication sign, before addition.  */
1809   sign ^= multiplicand.sign;
1810 
1811   /* If and only if all arguments are normal do we need to do an
1812      extended-precision calculation.  */
1813   if (isFiniteNonZero() &&
1814       multiplicand.isFiniteNonZero() &&
1815       addend.isFinite()) {
1816     lostFraction lost_fraction;
1817 
1818     lost_fraction = multiplySignificand(multiplicand, &addend);
1819     fs = normalize(rounding_mode, lost_fraction);
1820     if (lost_fraction != lfExactlyZero)
1821       fs = (opStatus) (fs | opInexact);
1822 
1823     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1824        positive zero unless rounding to minus infinity, except that
1825        adding two like-signed zeroes gives that zero.  */
1826     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1827       sign = (rounding_mode == rmTowardNegative);
1828   } else {
1829     fs = multiplySpecials(multiplicand);
1830 
1831     /* FS can only be opOK or opInvalidOp.  There is no more work
1832        to do in the latter case.  The IEEE-754R standard says it is
1833        implementation-defined in this case whether, if ADDEND is a
1834        quiet NaN, we raise invalid op; this implementation does so.
1835 
1836        If we need to do the addition we can do so with normal
1837        precision.  */
1838     if (fs == opOK)
1839       fs = addOrSubtract(addend, rounding_mode, false);
1840   }
1841 
1842   return fs;
1843 }
1844 
1845 /* Rounding-mode corrrect round to integral value.  */
roundToIntegral(roundingMode rounding_mode)1846 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1847   opStatus fs;
1848 
1849   // If the exponent is large enough, we know that this value is already
1850   // integral, and the arithmetic below would potentially cause it to saturate
1851   // to +/-Inf.  Bail out early instead.
1852   if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1853     return opOK;
1854 
1855   // The algorithm here is quite simple: we add 2^(p-1), where p is the
1856   // precision of our format, and then subtract it back off again.  The choice
1857   // of rounding modes for the addition/subtraction determines the rounding mode
1858   // for our integral rounding as well.
1859   // NOTE: When the input value is negative, we do subtraction followed by
1860   // addition instead.
1861   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1862   IntegerConstant <<= semanticsPrecision(*semantics)-1;
1863   IEEEFloat MagicConstant(*semantics);
1864   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1865                                       rmNearestTiesToEven);
1866   MagicConstant.sign = sign;
1867 
1868   if (fs != opOK)
1869     return fs;
1870 
1871   // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1872   bool inputSign = isNegative();
1873 
1874   fs = add(MagicConstant, rounding_mode);
1875   if (fs != opOK && fs != opInexact)
1876     return fs;
1877 
1878   fs = subtract(MagicConstant, rounding_mode);
1879 
1880   // Restore the input sign.
1881   if (inputSign != isNegative())
1882     changeSign();
1883 
1884   return fs;
1885 }
1886 
1887 
1888 /* Comparison requires normalized numbers.  */
compare(const IEEEFloat & rhs) const1889 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1890   cmpResult result;
1891 
1892   assert(semantics == rhs.semantics);
1893 
1894   switch (PackCategoriesIntoKey(category, rhs.category)) {
1895   default:
1896     llvm_unreachable(nullptr);
1897 
1898   case PackCategoriesIntoKey(fcNaN, fcZero):
1899   case PackCategoriesIntoKey(fcNaN, fcNormal):
1900   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1901   case PackCategoriesIntoKey(fcNaN, fcNaN):
1902   case PackCategoriesIntoKey(fcZero, fcNaN):
1903   case PackCategoriesIntoKey(fcNormal, fcNaN):
1904   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1905     return cmpUnordered;
1906 
1907   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1908   case PackCategoriesIntoKey(fcInfinity, fcZero):
1909   case PackCategoriesIntoKey(fcNormal, fcZero):
1910     if (sign)
1911       return cmpLessThan;
1912     else
1913       return cmpGreaterThan;
1914 
1915   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1916   case PackCategoriesIntoKey(fcZero, fcInfinity):
1917   case PackCategoriesIntoKey(fcZero, fcNormal):
1918     if (rhs.sign)
1919       return cmpGreaterThan;
1920     else
1921       return cmpLessThan;
1922 
1923   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1924     if (sign == rhs.sign)
1925       return cmpEqual;
1926     else if (sign)
1927       return cmpLessThan;
1928     else
1929       return cmpGreaterThan;
1930 
1931   case PackCategoriesIntoKey(fcZero, fcZero):
1932     return cmpEqual;
1933 
1934   case PackCategoriesIntoKey(fcNormal, fcNormal):
1935     break;
1936   }
1937 
1938   /* Two normal numbers.  Do they have the same sign?  */
1939   if (sign != rhs.sign) {
1940     if (sign)
1941       result = cmpLessThan;
1942     else
1943       result = cmpGreaterThan;
1944   } else {
1945     /* Compare absolute values; invert result if negative.  */
1946     result = compareAbsoluteValue(rhs);
1947 
1948     if (sign) {
1949       if (result == cmpLessThan)
1950         result = cmpGreaterThan;
1951       else if (result == cmpGreaterThan)
1952         result = cmpLessThan;
1953     }
1954   }
1955 
1956   return result;
1957 }
1958 
1959 /// IEEEFloat::convert - convert a value of one floating point type to another.
1960 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
1961 /// records whether the transformation lost information, i.e. whether
1962 /// converting the result back to the original type will produce the
1963 /// original value (this is almost the same as return value==fsOK, but there
1964 /// are edge cases where this is not so).
1965 
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)1966 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1967                                        roundingMode rounding_mode,
1968                                        bool *losesInfo) {
1969   lostFraction lostFraction;
1970   unsigned int newPartCount, oldPartCount;
1971   opStatus fs;
1972   int shift;
1973   const fltSemantics &fromSemantics = *semantics;
1974 
1975   lostFraction = lfExactlyZero;
1976   newPartCount = partCountForBits(toSemantics.precision + 1);
1977   oldPartCount = partCount();
1978   shift = toSemantics.precision - fromSemantics.precision;
1979 
1980   bool X86SpecialNan = false;
1981   if (&fromSemantics == &semX87DoubleExtended &&
1982       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1983       (!(*significandParts() & 0x8000000000000000ULL) ||
1984        !(*significandParts() & 0x4000000000000000ULL))) {
1985     // x86 has some unusual NaNs which cannot be represented in any other
1986     // format; note them here.
1987     X86SpecialNan = true;
1988   }
1989 
1990   // If this is a truncation of a denormal number, and the target semantics
1991   // has larger exponent range than the source semantics (this can happen
1992   // when truncating from PowerPC double-double to double format), the
1993   // right shift could lose result mantissa bits.  Adjust exponent instead
1994   // of performing excessive shift.
1995   if (shift < 0 && isFiniteNonZero()) {
1996     int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1997     if (exponent + exponentChange < toSemantics.minExponent)
1998       exponentChange = toSemantics.minExponent - exponent;
1999     if (exponentChange < shift)
2000       exponentChange = shift;
2001     if (exponentChange < 0) {
2002       shift -= exponentChange;
2003       exponent += exponentChange;
2004     }
2005   }
2006 
2007   // If this is a truncation, perform the shift before we narrow the storage.
2008   if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2009     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2010 
2011   // Fix the storage so it can hold to new value.
2012   if (newPartCount > oldPartCount) {
2013     // The new type requires more storage; make it available.
2014     integerPart *newParts;
2015     newParts = new integerPart[newPartCount];
2016     APInt::tcSet(newParts, 0, newPartCount);
2017     if (isFiniteNonZero() || category==fcNaN)
2018       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2019     freeSignificand();
2020     significand.parts = newParts;
2021   } else if (newPartCount == 1 && oldPartCount != 1) {
2022     // Switch to built-in storage for a single part.
2023     integerPart newPart = 0;
2024     if (isFiniteNonZero() || category==fcNaN)
2025       newPart = significandParts()[0];
2026     freeSignificand();
2027     significand.part = newPart;
2028   }
2029 
2030   // Now that we have the right storage, switch the semantics.
2031   semantics = &toSemantics;
2032 
2033   // If this is an extension, perform the shift now that the storage is
2034   // available.
2035   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2036     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2037 
2038   if (isFiniteNonZero()) {
2039     fs = normalize(rounding_mode, lostFraction);
2040     *losesInfo = (fs != opOK);
2041   } else if (category == fcNaN) {
2042     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2043 
2044     // For x87 extended precision, we want to make a NaN, not a special NaN if
2045     // the input wasn't special either.
2046     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2047       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2048 
2049     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2050     // does not give you back the same bits.  This is dubious, and we
2051     // don't currently do it.  You're really supposed to get
2052     // an invalid operation signal at runtime, but nobody does that.
2053     fs = opOK;
2054   } else {
2055     *losesInfo = false;
2056     fs = opOK;
2057   }
2058 
2059   return fs;
2060 }
2061 
2062 /* Convert a floating point number to an integer according to the
2063    rounding mode.  If the rounded integer value is out of range this
2064    returns an invalid operation exception and the contents of the
2065    destination parts are unspecified.  If the rounded value is in
2066    range but the floating point number is not the exact integer, the C
2067    standard doesn't require an inexact exception to be raised.  IEEE
2068    854 does require it so we do that.
2069 
2070    Note that for conversions to integer type the C standard requires
2071    round-to-zero to always be used.  */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2072 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2073     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2074     roundingMode rounding_mode, bool *isExact) const {
2075   lostFraction lost_fraction;
2076   const integerPart *src;
2077   unsigned int dstPartsCount, truncatedBits;
2078 
2079   *isExact = false;
2080 
2081   /* Handle the three special cases first.  */
2082   if (category == fcInfinity || category == fcNaN)
2083     return opInvalidOp;
2084 
2085   dstPartsCount = partCountForBits(width);
2086   assert(dstPartsCount <= parts.size() && "Integer too big");
2087 
2088   if (category == fcZero) {
2089     APInt::tcSet(parts.data(), 0, dstPartsCount);
2090     // Negative zero can't be represented as an int.
2091     *isExact = !sign;
2092     return opOK;
2093   }
2094 
2095   src = significandParts();
2096 
2097   /* Step 1: place our absolute value, with any fraction truncated, in
2098      the destination.  */
2099   if (exponent < 0) {
2100     /* Our absolute value is less than one; truncate everything.  */
2101     APInt::tcSet(parts.data(), 0, dstPartsCount);
2102     /* For exponent -1 the integer bit represents .5, look at that.
2103        For smaller exponents leftmost truncated bit is 0. */
2104     truncatedBits = semantics->precision -1U - exponent;
2105   } else {
2106     /* We want the most significant (exponent + 1) bits; the rest are
2107        truncated.  */
2108     unsigned int bits = exponent + 1U;
2109 
2110     /* Hopelessly large in magnitude?  */
2111     if (bits > width)
2112       return opInvalidOp;
2113 
2114     if (bits < semantics->precision) {
2115       /* We truncate (semantics->precision - bits) bits.  */
2116       truncatedBits = semantics->precision - bits;
2117       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2118     } else {
2119       /* We want at least as many bits as are available.  */
2120       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2121                        0);
2122       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2123                          bits - semantics->precision);
2124       truncatedBits = 0;
2125     }
2126   }
2127 
2128   /* Step 2: work out any lost fraction, and increment the absolute
2129      value if we would round away from zero.  */
2130   if (truncatedBits) {
2131     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2132                                                   truncatedBits);
2133     if (lost_fraction != lfExactlyZero &&
2134         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2135       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2136         return opInvalidOp;     /* Overflow.  */
2137     }
2138   } else {
2139     lost_fraction = lfExactlyZero;
2140   }
2141 
2142   /* Step 3: check if we fit in the destination.  */
2143   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2144 
2145   if (sign) {
2146     if (!isSigned) {
2147       /* Negative numbers cannot be represented as unsigned.  */
2148       if (omsb != 0)
2149         return opInvalidOp;
2150     } else {
2151       /* It takes omsb bits to represent the unsigned integer value.
2152          We lose a bit for the sign, but care is needed as the
2153          maximally negative integer is a special case.  */
2154       if (omsb == width &&
2155           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2156         return opInvalidOp;
2157 
2158       /* This case can happen because of rounding.  */
2159       if (omsb > width)
2160         return opInvalidOp;
2161     }
2162 
2163     APInt::tcNegate (parts.data(), dstPartsCount);
2164   } else {
2165     if (omsb >= width + !isSigned)
2166       return opInvalidOp;
2167   }
2168 
2169   if (lost_fraction == lfExactlyZero) {
2170     *isExact = true;
2171     return opOK;
2172   } else
2173     return opInexact;
2174 }
2175 
2176 /* Same as convertToSignExtendedInteger, except we provide
2177    deterministic values in case of an invalid operation exception,
2178    namely zero for NaNs and the minimal or maximal value respectively
2179    for underflow or overflow.
2180    The *isExact output tells whether the result is exact, in the sense
2181    that converting it back to the original floating point type produces
2182    the original value.  This is almost equivalent to result==opOK,
2183    except for negative zeroes.
2184 */
2185 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2186 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2187                             unsigned int width, bool isSigned,
2188                             roundingMode rounding_mode, bool *isExact) const {
2189   opStatus fs;
2190 
2191   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2192                                     isExact);
2193 
2194   if (fs == opInvalidOp) {
2195     unsigned int bits, dstPartsCount;
2196 
2197     dstPartsCount = partCountForBits(width);
2198     assert(dstPartsCount <= parts.size() && "Integer too big");
2199 
2200     if (category == fcNaN)
2201       bits = 0;
2202     else if (sign)
2203       bits = isSigned;
2204     else
2205       bits = width - isSigned;
2206 
2207     APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2208     if (sign && isSigned)
2209       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2210   }
2211 
2212   return fs;
2213 }
2214 
2215 /* Convert an unsigned integer SRC to a floating point number,
2216    rounding according to ROUNDING_MODE.  The sign of the floating
2217    point number is not modified.  */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2218 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2219     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2220   unsigned int omsb, precision, dstCount;
2221   integerPart *dst;
2222   lostFraction lost_fraction;
2223 
2224   category = fcNormal;
2225   omsb = APInt::tcMSB(src, srcCount) + 1;
2226   dst = significandParts();
2227   dstCount = partCount();
2228   precision = semantics->precision;
2229 
2230   /* We want the most significant PRECISION bits of SRC.  There may not
2231      be that many; extract what we can.  */
2232   if (precision <= omsb) {
2233     exponent = omsb - 1;
2234     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2235                                                   omsb - precision);
2236     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2237   } else {
2238     exponent = precision - 1;
2239     lost_fraction = lfExactlyZero;
2240     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2241   }
2242 
2243   return normalize(rounding_mode, lost_fraction);
2244 }
2245 
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2246 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2247                                                 roundingMode rounding_mode) {
2248   unsigned int partCount = Val.getNumWords();
2249   APInt api = Val;
2250 
2251   sign = false;
2252   if (isSigned && api.isNegative()) {
2253     sign = true;
2254     api = -api;
2255   }
2256 
2257   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2258 }
2259 
2260 /* Convert a two's complement integer SRC to a floating point number,
2261    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2262    integer is signed, in which case it must be sign-extended.  */
2263 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2264 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2265                                           unsigned int srcCount, bool isSigned,
2266                                           roundingMode rounding_mode) {
2267   opStatus status;
2268 
2269   if (isSigned &&
2270       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2271     integerPart *copy;
2272 
2273     /* If we're signed and negative negate a copy.  */
2274     sign = true;
2275     copy = new integerPart[srcCount];
2276     APInt::tcAssign(copy, src, srcCount);
2277     APInt::tcNegate(copy, srcCount);
2278     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2279     delete [] copy;
2280   } else {
2281     sign = false;
2282     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2283   }
2284 
2285   return status;
2286 }
2287 
2288 /* FIXME: should this just take a const APInt reference?  */
2289 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2290 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2291                                           unsigned int width, bool isSigned,
2292                                           roundingMode rounding_mode) {
2293   unsigned int partCount = partCountForBits(width);
2294   APInt api = APInt(width, makeArrayRef(parts, partCount));
2295 
2296   sign = false;
2297   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2298     sign = true;
2299     api = -api;
2300   }
2301 
2302   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2303 }
2304 
2305 IEEEFloat::opStatus
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2306 IEEEFloat::convertFromHexadecimalString(StringRef s,
2307                                         roundingMode rounding_mode) {
2308   lostFraction lost_fraction = lfExactlyZero;
2309 
2310   category = fcNormal;
2311   zeroSignificand();
2312   exponent = 0;
2313 
2314   integerPart *significand = significandParts();
2315   unsigned partsCount = partCount();
2316   unsigned bitPos = partsCount * integerPartWidth;
2317   bool computedTrailingFraction = false;
2318 
2319   // Skip leading zeroes and any (hexa)decimal point.
2320   StringRef::iterator begin = s.begin();
2321   StringRef::iterator end = s.end();
2322   StringRef::iterator dot;
2323   StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2324   StringRef::iterator firstSignificantDigit = p;
2325 
2326   while (p != end) {
2327     integerPart hex_value;
2328 
2329     if (*p == '.') {
2330       assert(dot == end && "String contains multiple dots");
2331       dot = p++;
2332       continue;
2333     }
2334 
2335     hex_value = hexDigitValue(*p);
2336     if (hex_value == -1U)
2337       break;
2338 
2339     p++;
2340 
2341     // Store the number while we have space.
2342     if (bitPos) {
2343       bitPos -= 4;
2344       hex_value <<= bitPos % integerPartWidth;
2345       significand[bitPos / integerPartWidth] |= hex_value;
2346     } else if (!computedTrailingFraction) {
2347       lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2348       computedTrailingFraction = true;
2349     }
2350   }
2351 
2352   /* Hex floats require an exponent but not a hexadecimal point.  */
2353   assert(p != end && "Hex strings require an exponent");
2354   assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2355   assert(p != begin && "Significand has no digits");
2356   assert((dot == end || p - begin != 1) && "Significand has no digits");
2357 
2358   /* Ignore the exponent if we are zero.  */
2359   if (p != firstSignificantDigit) {
2360     int expAdjustment;
2361 
2362     /* Implicit hexadecimal point?  */
2363     if (dot == end)
2364       dot = p;
2365 
2366     /* Calculate the exponent adjustment implicit in the number of
2367        significant digits.  */
2368     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2369     if (expAdjustment < 0)
2370       expAdjustment++;
2371     expAdjustment = expAdjustment * 4 - 1;
2372 
2373     /* Adjust for writing the significand starting at the most
2374        significant nibble.  */
2375     expAdjustment += semantics->precision;
2376     expAdjustment -= partsCount * integerPartWidth;
2377 
2378     /* Adjust for the given exponent.  */
2379     exponent = totalExponent(p + 1, end, expAdjustment);
2380   }
2381 
2382   return normalize(rounding_mode, lost_fraction);
2383 }
2384 
2385 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2386 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2387                                         unsigned sigPartCount, int exp,
2388                                         roundingMode rounding_mode) {
2389   unsigned int parts, pow5PartCount;
2390   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2391   integerPart pow5Parts[maxPowerOfFiveParts];
2392   bool isNearest;
2393 
2394   isNearest = (rounding_mode == rmNearestTiesToEven ||
2395                rounding_mode == rmNearestTiesToAway);
2396 
2397   parts = partCountForBits(semantics->precision + 11);
2398 
2399   /* Calculate pow(5, abs(exp)).  */
2400   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2401 
2402   for (;; parts *= 2) {
2403     opStatus sigStatus, powStatus;
2404     unsigned int excessPrecision, truncatedBits;
2405 
2406     calcSemantics.precision = parts * integerPartWidth - 1;
2407     excessPrecision = calcSemantics.precision - semantics->precision;
2408     truncatedBits = excessPrecision;
2409 
2410     IEEEFloat decSig(calcSemantics, uninitialized);
2411     decSig.makeZero(sign);
2412     IEEEFloat pow5(calcSemantics);
2413 
2414     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2415                                                 rmNearestTiesToEven);
2416     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2417                                               rmNearestTiesToEven);
2418     /* Add exp, as 10^n = 5^n * 2^n.  */
2419     decSig.exponent += exp;
2420 
2421     lostFraction calcLostFraction;
2422     integerPart HUerr, HUdistance;
2423     unsigned int powHUerr;
2424 
2425     if (exp >= 0) {
2426       /* multiplySignificand leaves the precision-th bit set to 1.  */
2427       calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2428       powHUerr = powStatus != opOK;
2429     } else {
2430       calcLostFraction = decSig.divideSignificand(pow5);
2431       /* Denormal numbers have less precision.  */
2432       if (decSig.exponent < semantics->minExponent) {
2433         excessPrecision += (semantics->minExponent - decSig.exponent);
2434         truncatedBits = excessPrecision;
2435         if (excessPrecision > calcSemantics.precision)
2436           excessPrecision = calcSemantics.precision;
2437       }
2438       /* Extra half-ulp lost in reciprocal of exponent.  */
2439       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2440     }
2441 
2442     /* Both multiplySignificand and divideSignificand return the
2443        result with the integer bit set.  */
2444     assert(APInt::tcExtractBit
2445            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2446 
2447     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2448                        powHUerr);
2449     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2450                                       excessPrecision, isNearest);
2451 
2452     /* Are we guaranteed to round correctly if we truncate?  */
2453     if (HUdistance >= HUerr) {
2454       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2455                        calcSemantics.precision - excessPrecision,
2456                        excessPrecision);
2457       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2458          above we must adjust our exponent to compensate for the
2459          implicit right shift.  */
2460       exponent = (decSig.exponent + semantics->precision
2461                   - (calcSemantics.precision - excessPrecision));
2462       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2463                                                        decSig.partCount(),
2464                                                        truncatedBits);
2465       return normalize(rounding_mode, calcLostFraction);
2466     }
2467   }
2468 }
2469 
2470 IEEEFloat::opStatus
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2471 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2472   decimalInfo D;
2473   opStatus fs;
2474 
2475   /* Scan the text.  */
2476   StringRef::iterator p = str.begin();
2477   interpretDecimal(p, str.end(), &D);
2478 
2479   /* Handle the quick cases.  First the case of no significant digits,
2480      i.e. zero, and then exponents that are obviously too large or too
2481      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
2482      definitely overflows if
2483 
2484            (exp - 1) * L >= maxExponent
2485 
2486      and definitely underflows to zero where
2487 
2488            (exp + 1) * L <= minExponent - precision
2489 
2490      With integer arithmetic the tightest bounds for L are
2491 
2492            93/28 < L < 196/59            [ numerator <= 256 ]
2493            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
2494   */
2495 
2496   // Test if we have a zero number allowing for strings with no null terminators
2497   // and zero decimals with non-zero exponents.
2498   //
2499   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2500   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2501   // be at most one dot. On the other hand, if we have a zero with a non-zero
2502   // exponent, then we know that D.firstSigDigit will be non-numeric.
2503   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2504     category = fcZero;
2505     fs = opOK;
2506 
2507   /* Check whether the normalized exponent is high enough to overflow
2508      max during the log-rebasing in the max-exponent check below. */
2509   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2510     fs = handleOverflow(rounding_mode);
2511 
2512   /* If it wasn't, then it also wasn't high enough to overflow max
2513      during the log-rebasing in the min-exponent check.  Check that it
2514      won't overflow min in either check, then perform the min-exponent
2515      check. */
2516   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2517              (D.normalizedExponent + 1) * 28738 <=
2518                8651 * (semantics->minExponent - (int) semantics->precision)) {
2519     /* Underflow to zero and round.  */
2520     category = fcNormal;
2521     zeroSignificand();
2522     fs = normalize(rounding_mode, lfLessThanHalf);
2523 
2524   /* We can finally safely perform the max-exponent check. */
2525   } else if ((D.normalizedExponent - 1) * 42039
2526              >= 12655 * semantics->maxExponent) {
2527     /* Overflow and round.  */
2528     fs = handleOverflow(rounding_mode);
2529   } else {
2530     integerPart *decSignificand;
2531     unsigned int partCount;
2532 
2533     /* A tight upper bound on number of bits required to hold an
2534        N-digit decimal integer is N * 196 / 59.  Allocate enough space
2535        to hold the full significand, and an extra part required by
2536        tcMultiplyPart.  */
2537     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2538     partCount = partCountForBits(1 + 196 * partCount / 59);
2539     decSignificand = new integerPart[partCount + 1];
2540     partCount = 0;
2541 
2542     /* Convert to binary efficiently - we do almost all multiplication
2543        in an integerPart.  When this would overflow do we do a single
2544        bignum multiplication, and then revert again to multiplication
2545        in an integerPart.  */
2546     do {
2547       integerPart decValue, val, multiplier;
2548 
2549       val = 0;
2550       multiplier = 1;
2551 
2552       do {
2553         if (*p == '.') {
2554           p++;
2555           if (p == str.end()) {
2556             break;
2557           }
2558         }
2559         decValue = decDigitValue(*p++);
2560         assert(decValue < 10U && "Invalid character in significand");
2561         multiplier *= 10;
2562         val = val * 10 + decValue;
2563         /* The maximum number that can be multiplied by ten with any
2564            digit added without overflowing an integerPart.  */
2565       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2566 
2567       /* Multiply out the current part.  */
2568       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2569                             partCount, partCount + 1, false);
2570 
2571       /* If we used another part (likely but not guaranteed), increase
2572          the count.  */
2573       if (decSignificand[partCount])
2574         partCount++;
2575     } while (p <= D.lastSigDigit);
2576 
2577     category = fcNormal;
2578     fs = roundSignificandWithExponent(decSignificand, partCount,
2579                                       D.exponent, rounding_mode);
2580 
2581     delete [] decSignificand;
2582   }
2583 
2584   return fs;
2585 }
2586 
convertFromStringSpecials(StringRef str)2587 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2588   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2589     makeInf(false);
2590     return true;
2591   }
2592 
2593   if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2594     makeInf(true);
2595     return true;
2596   }
2597 
2598   if (str.equals("nan") || str.equals("NaN")) {
2599     makeNaN(false, false);
2600     return true;
2601   }
2602 
2603   if (str.equals("-nan") || str.equals("-NaN")) {
2604     makeNaN(false, true);
2605     return true;
2606   }
2607 
2608   return false;
2609 }
2610 
convertFromString(StringRef str,roundingMode rounding_mode)2611 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2612                                                  roundingMode rounding_mode) {
2613   assert(!str.empty() && "Invalid string length");
2614 
2615   // Handle special cases.
2616   if (convertFromStringSpecials(str))
2617     return opOK;
2618 
2619   /* Handle a leading minus sign.  */
2620   StringRef::iterator p = str.begin();
2621   size_t slen = str.size();
2622   sign = *p == '-' ? 1 : 0;
2623   if (*p == '-' || *p == '+') {
2624     p++;
2625     slen--;
2626     assert(slen && "String has no digits");
2627   }
2628 
2629   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2630     assert(slen - 2 && "Invalid string");
2631     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2632                                         rounding_mode);
2633   }
2634 
2635   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2636 }
2637 
2638 /* Write out a hexadecimal representation of the floating point value
2639    to DST, which must be of sufficient size, in the C99 form
2640    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
2641    excluding the terminating NUL.
2642 
2643    If UPPERCASE, the output is in upper case, otherwise in lower case.
2644 
2645    HEXDIGITS digits appear altogether, rounding the value if
2646    necessary.  If HEXDIGITS is 0, the minimal precision to display the
2647    number precisely is used instead.  If nothing would appear after
2648    the decimal point it is suppressed.
2649 
2650    The decimal exponent is always printed and has at least one digit.
2651    Zero values display an exponent of zero.  Infinities and NaNs
2652    appear as "infinity" or "nan" respectively.
2653 
2654    The above rules are as specified by C99.  There is ambiguity about
2655    what the leading hexadecimal digit should be.  This implementation
2656    uses whatever is necessary so that the exponent is displayed as
2657    stored.  This implies the exponent will fall within the IEEE format
2658    range, and the leading hexadecimal digit will be 0 (for denormals),
2659    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2660    any other digits zero).
2661 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2662 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2663                                            bool upperCase,
2664                                            roundingMode rounding_mode) const {
2665   char *p;
2666 
2667   p = dst;
2668   if (sign)
2669     *dst++ = '-';
2670 
2671   switch (category) {
2672   case fcInfinity:
2673     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2674     dst += sizeof infinityL - 1;
2675     break;
2676 
2677   case fcNaN:
2678     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2679     dst += sizeof NaNU - 1;
2680     break;
2681 
2682   case fcZero:
2683     *dst++ = '0';
2684     *dst++ = upperCase ? 'X': 'x';
2685     *dst++ = '0';
2686     if (hexDigits > 1) {
2687       *dst++ = '.';
2688       memset (dst, '0', hexDigits - 1);
2689       dst += hexDigits - 1;
2690     }
2691     *dst++ = upperCase ? 'P': 'p';
2692     *dst++ = '0';
2693     break;
2694 
2695   case fcNormal:
2696     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2697     break;
2698   }
2699 
2700   *dst = 0;
2701 
2702   return static_cast<unsigned int>(dst - p);
2703 }
2704 
2705 /* Does the hard work of outputting the correctly rounded hexadecimal
2706    form of a normal floating point number with the specified number of
2707    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
2708    digits necessary to print the value precisely is output.  */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2709 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2710                                           bool upperCase,
2711                                           roundingMode rounding_mode) const {
2712   unsigned int count, valueBits, shift, partsCount, outputDigits;
2713   const char *hexDigitChars;
2714   const integerPart *significand;
2715   char *p;
2716   bool roundUp;
2717 
2718   *dst++ = '0';
2719   *dst++ = upperCase ? 'X': 'x';
2720 
2721   roundUp = false;
2722   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2723 
2724   significand = significandParts();
2725   partsCount = partCount();
2726 
2727   /* +3 because the first digit only uses the single integer bit, so
2728      we have 3 virtual zero most-significant-bits.  */
2729   valueBits = semantics->precision + 3;
2730   shift = integerPartWidth - valueBits % integerPartWidth;
2731 
2732   /* The natural number of digits required ignoring trailing
2733      insignificant zeroes.  */
2734   outputDigits = (valueBits - significandLSB () + 3) / 4;
2735 
2736   /* hexDigits of zero means use the required number for the
2737      precision.  Otherwise, see if we are truncating.  If we are,
2738      find out if we need to round away from zero.  */
2739   if (hexDigits) {
2740     if (hexDigits < outputDigits) {
2741       /* We are dropping non-zero bits, so need to check how to round.
2742          "bits" is the number of dropped bits.  */
2743       unsigned int bits;
2744       lostFraction fraction;
2745 
2746       bits = valueBits - hexDigits * 4;
2747       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2748       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2749     }
2750     outputDigits = hexDigits;
2751   }
2752 
2753   /* Write the digits consecutively, and start writing in the location
2754      of the hexadecimal point.  We move the most significant digit
2755      left and add the hexadecimal point later.  */
2756   p = ++dst;
2757 
2758   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2759 
2760   while (outputDigits && count) {
2761     integerPart part;
2762 
2763     /* Put the most significant integerPartWidth bits in "part".  */
2764     if (--count == partsCount)
2765       part = 0;  /* An imaginary higher zero part.  */
2766     else
2767       part = significand[count] << shift;
2768 
2769     if (count && shift)
2770       part |= significand[count - 1] >> (integerPartWidth - shift);
2771 
2772     /* Convert as much of "part" to hexdigits as we can.  */
2773     unsigned int curDigits = integerPartWidth / 4;
2774 
2775     if (curDigits > outputDigits)
2776       curDigits = outputDigits;
2777     dst += partAsHex (dst, part, curDigits, hexDigitChars);
2778     outputDigits -= curDigits;
2779   }
2780 
2781   if (roundUp) {
2782     char *q = dst;
2783 
2784     /* Note that hexDigitChars has a trailing '0'.  */
2785     do {
2786       q--;
2787       *q = hexDigitChars[hexDigitValue (*q) + 1];
2788     } while (*q == '0');
2789     assert(q >= p);
2790   } else {
2791     /* Add trailing zeroes.  */
2792     memset (dst, '0', outputDigits);
2793     dst += outputDigits;
2794   }
2795 
2796   /* Move the most significant digit to before the point, and if there
2797      is something after the decimal point add it.  This must come
2798      after rounding above.  */
2799   p[-1] = p[0];
2800   if (dst -1 == p)
2801     dst--;
2802   else
2803     p[0] = '.';
2804 
2805   /* Finally output the exponent.  */
2806   *dst++ = upperCase ? 'P': 'p';
2807 
2808   return writeSignedDecimal (dst, exponent);
2809 }
2810 
hash_value(const IEEEFloat & Arg)2811 hash_code hash_value(const IEEEFloat &Arg) {
2812   if (!Arg.isFiniteNonZero())
2813     return hash_combine((uint8_t)Arg.category,
2814                         // NaN has no sign, fix it at zero.
2815                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2816                         Arg.semantics->precision);
2817 
2818   // Normal floats need their exponent and significand hashed.
2819   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2820                       Arg.semantics->precision, Arg.exponent,
2821                       hash_combine_range(
2822                         Arg.significandParts(),
2823                         Arg.significandParts() + Arg.partCount()));
2824 }
2825 
2826 // Conversion from APFloat to/from host float/double.  It may eventually be
2827 // possible to eliminate these and have everybody deal with APFloats, but that
2828 // will take a while.  This approach will not easily extend to long double.
2829 // Current implementation requires integerPartWidth==64, which is correct at
2830 // the moment but could be made more general.
2831 
2832 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2833 // the actual IEEE respresentations.  We compensate for that here.
2834 
convertF80LongDoubleAPFloatToAPInt() const2835 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2836   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2837   assert(partCount()==2);
2838 
2839   uint64_t myexponent, mysignificand;
2840 
2841   if (isFiniteNonZero()) {
2842     myexponent = exponent+16383; //bias
2843     mysignificand = significandParts()[0];
2844     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2845       myexponent = 0;   // denormal
2846   } else if (category==fcZero) {
2847     myexponent = 0;
2848     mysignificand = 0;
2849   } else if (category==fcInfinity) {
2850     myexponent = 0x7fff;
2851     mysignificand = 0x8000000000000000ULL;
2852   } else {
2853     assert(category == fcNaN && "Unknown category");
2854     myexponent = 0x7fff;
2855     mysignificand = significandParts()[0];
2856   }
2857 
2858   uint64_t words[2];
2859   words[0] = mysignificand;
2860   words[1] =  ((uint64_t)(sign & 1) << 15) |
2861               (myexponent & 0x7fffLL);
2862   return APInt(80, words);
2863 }
2864 
convertPPCDoubleDoubleAPFloatToAPInt() const2865 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2866   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2867   assert(partCount()==2);
2868 
2869   uint64_t words[2];
2870   opStatus fs;
2871   bool losesInfo;
2872 
2873   // Convert number to double.  To avoid spurious underflows, we re-
2874   // normalize against the "double" minExponent first, and only *then*
2875   // truncate the mantissa.  The result of that second conversion
2876   // may be inexact, but should never underflow.
2877   // Declare fltSemantics before APFloat that uses it (and
2878   // saves pointer to it) to ensure correct destruction order.
2879   fltSemantics extendedSemantics = *semantics;
2880   extendedSemantics.minExponent = semIEEEdouble.minExponent;
2881   IEEEFloat extended(*this);
2882   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2883   assert(fs == opOK && !losesInfo);
2884   (void)fs;
2885 
2886   IEEEFloat u(extended);
2887   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2888   assert(fs == opOK || fs == opInexact);
2889   (void)fs;
2890   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2891 
2892   // If conversion was exact or resulted in a special case, we're done;
2893   // just set the second double to zero.  Otherwise, re-convert back to
2894   // the extended format and compute the difference.  This now should
2895   // convert exactly to double.
2896   if (u.isFiniteNonZero() && losesInfo) {
2897     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2898     assert(fs == opOK && !losesInfo);
2899     (void)fs;
2900 
2901     IEEEFloat v(extended);
2902     v.subtract(u, rmNearestTiesToEven);
2903     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2904     assert(fs == opOK && !losesInfo);
2905     (void)fs;
2906     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2907   } else {
2908     words[1] = 0;
2909   }
2910 
2911   return APInt(128, words);
2912 }
2913 
convertQuadrupleAPFloatToAPInt() const2914 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2915   assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2916   assert(partCount()==2);
2917 
2918   uint64_t myexponent, mysignificand, mysignificand2;
2919 
2920   if (isFiniteNonZero()) {
2921     myexponent = exponent+16383; //bias
2922     mysignificand = significandParts()[0];
2923     mysignificand2 = significandParts()[1];
2924     if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2925       myexponent = 0;   // denormal
2926   } else if (category==fcZero) {
2927     myexponent = 0;
2928     mysignificand = mysignificand2 = 0;
2929   } else if (category==fcInfinity) {
2930     myexponent = 0x7fff;
2931     mysignificand = mysignificand2 = 0;
2932   } else {
2933     assert(category == fcNaN && "Unknown category!");
2934     myexponent = 0x7fff;
2935     mysignificand = significandParts()[0];
2936     mysignificand2 = significandParts()[1];
2937   }
2938 
2939   uint64_t words[2];
2940   words[0] = mysignificand;
2941   words[1] = ((uint64_t)(sign & 1) << 63) |
2942              ((myexponent & 0x7fff) << 48) |
2943              (mysignificand2 & 0xffffffffffffLL);
2944 
2945   return APInt(128, words);
2946 }
2947 
convertDoubleAPFloatToAPInt() const2948 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2949   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2950   assert(partCount()==1);
2951 
2952   uint64_t myexponent, mysignificand;
2953 
2954   if (isFiniteNonZero()) {
2955     myexponent = exponent+1023; //bias
2956     mysignificand = *significandParts();
2957     if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2958       myexponent = 0;   // denormal
2959   } else if (category==fcZero) {
2960     myexponent = 0;
2961     mysignificand = 0;
2962   } else if (category==fcInfinity) {
2963     myexponent = 0x7ff;
2964     mysignificand = 0;
2965   } else {
2966     assert(category == fcNaN && "Unknown category!");
2967     myexponent = 0x7ff;
2968     mysignificand = *significandParts();
2969   }
2970 
2971   return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2972                      ((myexponent & 0x7ff) <<  52) |
2973                      (mysignificand & 0xfffffffffffffLL))));
2974 }
2975 
convertFloatAPFloatToAPInt() const2976 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2977   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2978   assert(partCount()==1);
2979 
2980   uint32_t myexponent, mysignificand;
2981 
2982   if (isFiniteNonZero()) {
2983     myexponent = exponent+127; //bias
2984     mysignificand = (uint32_t)*significandParts();
2985     if (myexponent == 1 && !(mysignificand & 0x800000))
2986       myexponent = 0;   // denormal
2987   } else if (category==fcZero) {
2988     myexponent = 0;
2989     mysignificand = 0;
2990   } else if (category==fcInfinity) {
2991     myexponent = 0xff;
2992     mysignificand = 0;
2993   } else {
2994     assert(category == fcNaN && "Unknown category!");
2995     myexponent = 0xff;
2996     mysignificand = (uint32_t)*significandParts();
2997   }
2998 
2999   return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3000                     (mysignificand & 0x7fffff)));
3001 }
3002 
convertHalfAPFloatToAPInt() const3003 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3004   assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3005   assert(partCount()==1);
3006 
3007   uint32_t myexponent, mysignificand;
3008 
3009   if (isFiniteNonZero()) {
3010     myexponent = exponent+15; //bias
3011     mysignificand = (uint32_t)*significandParts();
3012     if (myexponent == 1 && !(mysignificand & 0x400))
3013       myexponent = 0;   // denormal
3014   } else if (category==fcZero) {
3015     myexponent = 0;
3016     mysignificand = 0;
3017   } else if (category==fcInfinity) {
3018     myexponent = 0x1f;
3019     mysignificand = 0;
3020   } else {
3021     assert(category == fcNaN && "Unknown category!");
3022     myexponent = 0x1f;
3023     mysignificand = (uint32_t)*significandParts();
3024   }
3025 
3026   return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3027                     (mysignificand & 0x3ff)));
3028 }
3029 
3030 // This function creates an APInt that is just a bit map of the floating
3031 // point constant as it would appear in memory.  It is not a conversion,
3032 // and treating the result as a normal integer is unlikely to be useful.
3033 
bitcastToAPInt() const3034 APInt IEEEFloat::bitcastToAPInt() const {
3035   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3036     return convertHalfAPFloatToAPInt();
3037 
3038   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3039     return convertFloatAPFloatToAPInt();
3040 
3041   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3042     return convertDoubleAPFloatToAPInt();
3043 
3044   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3045     return convertQuadrupleAPFloatToAPInt();
3046 
3047   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3048     return convertPPCDoubleDoubleAPFloatToAPInt();
3049 
3050   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3051          "unknown format!");
3052   return convertF80LongDoubleAPFloatToAPInt();
3053 }
3054 
convertToFloat() const3055 float IEEEFloat::convertToFloat() const {
3056   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3057          "Float semantics are not IEEEsingle");
3058   APInt api = bitcastToAPInt();
3059   return api.bitsToFloat();
3060 }
3061 
convertToDouble() const3062 double IEEEFloat::convertToDouble() const {
3063   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3064          "Float semantics are not IEEEdouble");
3065   APInt api = bitcastToAPInt();
3066   return api.bitsToDouble();
3067 }
3068 
3069 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3070 /// does not support these bit patterns:
3071 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3072 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3073 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3074 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3075 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3076 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3077   assert(api.getBitWidth()==80);
3078   uint64_t i1 = api.getRawData()[0];
3079   uint64_t i2 = api.getRawData()[1];
3080   uint64_t myexponent = (i2 & 0x7fff);
3081   uint64_t mysignificand = i1;
3082   uint8_t myintegerbit = mysignificand >> 63;
3083 
3084   initialize(&semX87DoubleExtended);
3085   assert(partCount()==2);
3086 
3087   sign = static_cast<unsigned int>(i2>>15);
3088   if (myexponent == 0 && mysignificand == 0) {
3089     // exponent, significand meaningless
3090     category = fcZero;
3091   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3092     // exponent, significand meaningless
3093     category = fcInfinity;
3094   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3095              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3096     // exponent meaningless
3097     category = fcNaN;
3098     significandParts()[0] = mysignificand;
3099     significandParts()[1] = 0;
3100   } else {
3101     category = fcNormal;
3102     exponent = myexponent - 16383;
3103     significandParts()[0] = mysignificand;
3104     significandParts()[1] = 0;
3105     if (myexponent==0)          // denormal
3106       exponent = -16382;
3107   }
3108 }
3109 
initFromPPCDoubleDoubleAPInt(const APInt & api)3110 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3111   assert(api.getBitWidth()==128);
3112   uint64_t i1 = api.getRawData()[0];
3113   uint64_t i2 = api.getRawData()[1];
3114   opStatus fs;
3115   bool losesInfo;
3116 
3117   // Get the first double and convert to our format.
3118   initFromDoubleAPInt(APInt(64, i1));
3119   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3120   assert(fs == opOK && !losesInfo);
3121   (void)fs;
3122 
3123   // Unless we have a special case, add in second double.
3124   if (isFiniteNonZero()) {
3125     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3126     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3127     assert(fs == opOK && !losesInfo);
3128     (void)fs;
3129 
3130     add(v, rmNearestTiesToEven);
3131   }
3132 }
3133 
initFromQuadrupleAPInt(const APInt & api)3134 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3135   assert(api.getBitWidth()==128);
3136   uint64_t i1 = api.getRawData()[0];
3137   uint64_t i2 = api.getRawData()[1];
3138   uint64_t myexponent = (i2 >> 48) & 0x7fff;
3139   uint64_t mysignificand  = i1;
3140   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3141 
3142   initialize(&semIEEEquad);
3143   assert(partCount()==2);
3144 
3145   sign = static_cast<unsigned int>(i2>>63);
3146   if (myexponent==0 &&
3147       (mysignificand==0 && mysignificand2==0)) {
3148     // exponent, significand meaningless
3149     category = fcZero;
3150   } else if (myexponent==0x7fff &&
3151              (mysignificand==0 && mysignificand2==0)) {
3152     // exponent, significand meaningless
3153     category = fcInfinity;
3154   } else if (myexponent==0x7fff &&
3155              (mysignificand!=0 || mysignificand2 !=0)) {
3156     // exponent meaningless
3157     category = fcNaN;
3158     significandParts()[0] = mysignificand;
3159     significandParts()[1] = mysignificand2;
3160   } else {
3161     category = fcNormal;
3162     exponent = myexponent - 16383;
3163     significandParts()[0] = mysignificand;
3164     significandParts()[1] = mysignificand2;
3165     if (myexponent==0)          // denormal
3166       exponent = -16382;
3167     else
3168       significandParts()[1] |= 0x1000000000000LL;  // integer bit
3169   }
3170 }
3171 
initFromDoubleAPInt(const APInt & api)3172 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3173   assert(api.getBitWidth()==64);
3174   uint64_t i = *api.getRawData();
3175   uint64_t myexponent = (i >> 52) & 0x7ff;
3176   uint64_t mysignificand = i & 0xfffffffffffffLL;
3177 
3178   initialize(&semIEEEdouble);
3179   assert(partCount()==1);
3180 
3181   sign = static_cast<unsigned int>(i>>63);
3182   if (myexponent==0 && mysignificand==0) {
3183     // exponent, significand meaningless
3184     category = fcZero;
3185   } else if (myexponent==0x7ff && mysignificand==0) {
3186     // exponent, significand meaningless
3187     category = fcInfinity;
3188   } else if (myexponent==0x7ff && mysignificand!=0) {
3189     // exponent meaningless
3190     category = fcNaN;
3191     *significandParts() = mysignificand;
3192   } else {
3193     category = fcNormal;
3194     exponent = myexponent - 1023;
3195     *significandParts() = mysignificand;
3196     if (myexponent==0)          // denormal
3197       exponent = -1022;
3198     else
3199       *significandParts() |= 0x10000000000000LL;  // integer bit
3200   }
3201 }
3202 
initFromFloatAPInt(const APInt & api)3203 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3204   assert(api.getBitWidth()==32);
3205   uint32_t i = (uint32_t)*api.getRawData();
3206   uint32_t myexponent = (i >> 23) & 0xff;
3207   uint32_t mysignificand = i & 0x7fffff;
3208 
3209   initialize(&semIEEEsingle);
3210   assert(partCount()==1);
3211 
3212   sign = i >> 31;
3213   if (myexponent==0 && mysignificand==0) {
3214     // exponent, significand meaningless
3215     category = fcZero;
3216   } else if (myexponent==0xff && mysignificand==0) {
3217     // exponent, significand meaningless
3218     category = fcInfinity;
3219   } else if (myexponent==0xff && mysignificand!=0) {
3220     // sign, exponent, significand meaningless
3221     category = fcNaN;
3222     *significandParts() = mysignificand;
3223   } else {
3224     category = fcNormal;
3225     exponent = myexponent - 127;  //bias
3226     *significandParts() = mysignificand;
3227     if (myexponent==0)    // denormal
3228       exponent = -126;
3229     else
3230       *significandParts() |= 0x800000; // integer bit
3231   }
3232 }
3233 
initFromHalfAPInt(const APInt & api)3234 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3235   assert(api.getBitWidth()==16);
3236   uint32_t i = (uint32_t)*api.getRawData();
3237   uint32_t myexponent = (i >> 10) & 0x1f;
3238   uint32_t mysignificand = i & 0x3ff;
3239 
3240   initialize(&semIEEEhalf);
3241   assert(partCount()==1);
3242 
3243   sign = i >> 15;
3244   if (myexponent==0 && mysignificand==0) {
3245     // exponent, significand meaningless
3246     category = fcZero;
3247   } else if (myexponent==0x1f && mysignificand==0) {
3248     // exponent, significand meaningless
3249     category = fcInfinity;
3250   } else if (myexponent==0x1f && mysignificand!=0) {
3251     // sign, exponent, significand meaningless
3252     category = fcNaN;
3253     *significandParts() = mysignificand;
3254   } else {
3255     category = fcNormal;
3256     exponent = myexponent - 15;  //bias
3257     *significandParts() = mysignificand;
3258     if (myexponent==0)    // denormal
3259       exponent = -14;
3260     else
3261       *significandParts() |= 0x400; // integer bit
3262   }
3263 }
3264 
3265 /// Treat api as containing the bits of a floating point number.  Currently
3266 /// we infer the floating point type from the size of the APInt.  The
3267 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3268 /// when the size is anything else).
initFromAPInt(const fltSemantics * Sem,const APInt & api)3269 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3270   if (Sem == &semIEEEhalf)
3271     return initFromHalfAPInt(api);
3272   if (Sem == &semIEEEsingle)
3273     return initFromFloatAPInt(api);
3274   if (Sem == &semIEEEdouble)
3275     return initFromDoubleAPInt(api);
3276   if (Sem == &semX87DoubleExtended)
3277     return initFromF80LongDoubleAPInt(api);
3278   if (Sem == &semIEEEquad)
3279     return initFromQuadrupleAPInt(api);
3280   if (Sem == &semPPCDoubleDoubleLegacy)
3281     return initFromPPCDoubleDoubleAPInt(api);
3282 
3283   llvm_unreachable(nullptr);
3284 }
3285 
3286 /// Make this number the largest magnitude normal number in the given
3287 /// semantics.
makeLargest(bool Negative)3288 void IEEEFloat::makeLargest(bool Negative) {
3289   // We want (in interchange format):
3290   //   sign = {Negative}
3291   //   exponent = 1..10
3292   //   significand = 1..1
3293   category = fcNormal;
3294   sign = Negative;
3295   exponent = semantics->maxExponent;
3296 
3297   // Use memset to set all but the highest integerPart to all ones.
3298   integerPart *significand = significandParts();
3299   unsigned PartCount = partCount();
3300   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3301 
3302   // Set the high integerPart especially setting all unused top bits for
3303   // internal consistency.
3304   const unsigned NumUnusedHighBits =
3305     PartCount*integerPartWidth - semantics->precision;
3306   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3307                                    ? (~integerPart(0) >> NumUnusedHighBits)
3308                                    : 0;
3309 }
3310 
3311 /// Make this number the smallest magnitude denormal number in the given
3312 /// semantics.
makeSmallest(bool Negative)3313 void IEEEFloat::makeSmallest(bool Negative) {
3314   // We want (in interchange format):
3315   //   sign = {Negative}
3316   //   exponent = 0..0
3317   //   significand = 0..01
3318   category = fcNormal;
3319   sign = Negative;
3320   exponent = semantics->minExponent;
3321   APInt::tcSet(significandParts(), 1, partCount());
3322 }
3323 
makeSmallestNormalized(bool Negative)3324 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3325   // We want (in interchange format):
3326   //   sign = {Negative}
3327   //   exponent = 0..0
3328   //   significand = 10..0
3329 
3330   category = fcNormal;
3331   zeroSignificand();
3332   sign = Negative;
3333   exponent = semantics->minExponent;
3334   significandParts()[partCountForBits(semantics->precision) - 1] |=
3335       (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3336 }
3337 
IEEEFloat(const fltSemantics & Sem,const APInt & API)3338 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3339   initFromAPInt(&Sem, API);
3340 }
3341 
IEEEFloat(float f)3342 IEEEFloat::IEEEFloat(float f) {
3343   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3344 }
3345 
IEEEFloat(double d)3346 IEEEFloat::IEEEFloat(double d) {
3347   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3348 }
3349 
3350 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3351   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3352     Buffer.append(Str.begin(), Str.end());
3353   }
3354 
3355   /// Removes data from the given significand until it is no more
3356   /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3357   void AdjustToPrecision(APInt &significand,
3358                          int &exp, unsigned FormatPrecision) {
3359     unsigned bits = significand.getActiveBits();
3360 
3361     // 196/59 is a very slight overestimate of lg_2(10).
3362     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3363 
3364     if (bits <= bitsRequired) return;
3365 
3366     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3367     if (!tensRemovable) return;
3368 
3369     exp += tensRemovable;
3370 
3371     APInt divisor(significand.getBitWidth(), 1);
3372     APInt powten(significand.getBitWidth(), 10);
3373     while (true) {
3374       if (tensRemovable & 1)
3375         divisor *= powten;
3376       tensRemovable >>= 1;
3377       if (!tensRemovable) break;
3378       powten *= powten;
3379     }
3380 
3381     significand = significand.udiv(divisor);
3382 
3383     // Truncate the significand down to its active bit count.
3384     significand = significand.trunc(significand.getActiveBits());
3385   }
3386 
3387 
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3388   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3389                          int &exp, unsigned FormatPrecision) {
3390     unsigned N = buffer.size();
3391     if (N <= FormatPrecision) return;
3392 
3393     // The most significant figures are the last ones in the buffer.
3394     unsigned FirstSignificant = N - FormatPrecision;
3395 
3396     // Round.
3397     // FIXME: this probably shouldn't use 'round half up'.
3398 
3399     // Rounding down is just a truncation, except we also want to drop
3400     // trailing zeros from the new result.
3401     if (buffer[FirstSignificant - 1] < '5') {
3402       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3403         FirstSignificant++;
3404 
3405       exp += FirstSignificant;
3406       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3407       return;
3408     }
3409 
3410     // Rounding up requires a decimal add-with-carry.  If we continue
3411     // the carry, the newly-introduced zeros will just be truncated.
3412     for (unsigned I = FirstSignificant; I != N; ++I) {
3413       if (buffer[I] == '9') {
3414         FirstSignificant++;
3415       } else {
3416         buffer[I]++;
3417         break;
3418       }
3419     }
3420 
3421     // If we carried through, we have exactly one digit of precision.
3422     if (FirstSignificant == N) {
3423       exp += FirstSignificant;
3424       buffer.clear();
3425       buffer.push_back('1');
3426       return;
3427     }
3428 
3429     exp += FirstSignificant;
3430     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3431   }
3432 }
3433 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const3434 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3435                          unsigned FormatMaxPadding, bool TruncateZero) const {
3436   switch (category) {
3437   case fcInfinity:
3438     if (isNegative())
3439       return append(Str, "-Inf");
3440     else
3441       return append(Str, "+Inf");
3442 
3443   case fcNaN: return append(Str, "NaN");
3444 
3445   case fcZero:
3446     if (isNegative())
3447       Str.push_back('-');
3448 
3449     if (!FormatMaxPadding) {
3450       if (TruncateZero)
3451         append(Str, "0.0E+0");
3452       else {
3453         append(Str, "0.0");
3454         if (FormatPrecision > 1)
3455           Str.append(FormatPrecision - 1, '0');
3456         append(Str, "e+00");
3457       }
3458     } else
3459       Str.push_back('0');
3460     return;
3461 
3462   case fcNormal:
3463     break;
3464   }
3465 
3466   if (isNegative())
3467     Str.push_back('-');
3468 
3469   // Decompose the number into an APInt and an exponent.
3470   int exp = exponent - ((int) semantics->precision - 1);
3471   APInt significand(semantics->precision,
3472                     makeArrayRef(significandParts(),
3473                                  partCountForBits(semantics->precision)));
3474 
3475   // Set FormatPrecision if zero.  We want to do this before we
3476   // truncate trailing zeros, as those are part of the precision.
3477   if (!FormatPrecision) {
3478     // We use enough digits so the number can be round-tripped back to an
3479     // APFloat. The formula comes from "How to Print Floating-Point Numbers
3480     // Accurately" by Steele and White.
3481     // FIXME: Using a formula based purely on the precision is conservative;
3482     // we can print fewer digits depending on the actual value being printed.
3483 
3484     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3485     FormatPrecision = 2 + semantics->precision * 59 / 196;
3486   }
3487 
3488   // Ignore trailing binary zeros.
3489   int trailingZeros = significand.countTrailingZeros();
3490   exp += trailingZeros;
3491   significand.lshrInPlace(trailingZeros);
3492 
3493   // Change the exponent from 2^e to 10^e.
3494   if (exp == 0) {
3495     // Nothing to do.
3496   } else if (exp > 0) {
3497     // Just shift left.
3498     significand = significand.zext(semantics->precision + exp);
3499     significand <<= exp;
3500     exp = 0;
3501   } else { /* exp < 0 */
3502     int texp = -exp;
3503 
3504     // We transform this using the identity:
3505     //   (N)(2^-e) == (N)(5^e)(10^-e)
3506     // This means we have to multiply N (the significand) by 5^e.
3507     // To avoid overflow, we have to operate on numbers large
3508     // enough to store N * 5^e:
3509     //   log2(N * 5^e) == log2(N) + e * log2(5)
3510     //                 <= semantics->precision + e * 137 / 59
3511     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3512 
3513     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3514 
3515     // Multiply significand by 5^e.
3516     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3517     significand = significand.zext(precision);
3518     APInt five_to_the_i(precision, 5);
3519     while (true) {
3520       if (texp & 1) significand *= five_to_the_i;
3521 
3522       texp >>= 1;
3523       if (!texp) break;
3524       five_to_the_i *= five_to_the_i;
3525     }
3526   }
3527 
3528   AdjustToPrecision(significand, exp, FormatPrecision);
3529 
3530   SmallVector<char, 256> buffer;
3531 
3532   // Fill the buffer.
3533   unsigned precision = significand.getBitWidth();
3534   APInt ten(precision, 10);
3535   APInt digit(precision, 0);
3536 
3537   bool inTrail = true;
3538   while (significand != 0) {
3539     // digit <- significand % 10
3540     // significand <- significand / 10
3541     APInt::udivrem(significand, ten, significand, digit);
3542 
3543     unsigned d = digit.getZExtValue();
3544 
3545     // Drop trailing zeros.
3546     if (inTrail && !d) exp++;
3547     else {
3548       buffer.push_back((char) ('0' + d));
3549       inTrail = false;
3550     }
3551   }
3552 
3553   assert(!buffer.empty() && "no characters in buffer!");
3554 
3555   // Drop down to FormatPrecision.
3556   // TODO: don't do more precise calculations above than are required.
3557   AdjustToPrecision(buffer, exp, FormatPrecision);
3558 
3559   unsigned NDigits = buffer.size();
3560 
3561   // Check whether we should use scientific notation.
3562   bool FormatScientific;
3563   if (!FormatMaxPadding)
3564     FormatScientific = true;
3565   else {
3566     if (exp >= 0) {
3567       // 765e3 --> 765000
3568       //              ^^^
3569       // But we shouldn't make the number look more precise than it is.
3570       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3571                           NDigits + (unsigned) exp > FormatPrecision);
3572     } else {
3573       // Power of the most significant digit.
3574       int MSD = exp + (int) (NDigits - 1);
3575       if (MSD >= 0) {
3576         // 765e-2 == 7.65
3577         FormatScientific = false;
3578       } else {
3579         // 765e-5 == 0.00765
3580         //           ^ ^^
3581         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3582       }
3583     }
3584   }
3585 
3586   // Scientific formatting is pretty straightforward.
3587   if (FormatScientific) {
3588     exp += (NDigits - 1);
3589 
3590     Str.push_back(buffer[NDigits-1]);
3591     Str.push_back('.');
3592     if (NDigits == 1 && TruncateZero)
3593       Str.push_back('0');
3594     else
3595       for (unsigned I = 1; I != NDigits; ++I)
3596         Str.push_back(buffer[NDigits-1-I]);
3597     // Fill with zeros up to FormatPrecision.
3598     if (!TruncateZero && FormatPrecision > NDigits - 1)
3599       Str.append(FormatPrecision - NDigits + 1, '0');
3600     // For !TruncateZero we use lower 'e'.
3601     Str.push_back(TruncateZero ? 'E' : 'e');
3602 
3603     Str.push_back(exp >= 0 ? '+' : '-');
3604     if (exp < 0) exp = -exp;
3605     SmallVector<char, 6> expbuf;
3606     do {
3607       expbuf.push_back((char) ('0' + (exp % 10)));
3608       exp /= 10;
3609     } while (exp);
3610     // Exponent always at least two digits if we do not truncate zeros.
3611     if (!TruncateZero && expbuf.size() < 2)
3612       expbuf.push_back('0');
3613     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3614       Str.push_back(expbuf[E-1-I]);
3615     return;
3616   }
3617 
3618   // Non-scientific, positive exponents.
3619   if (exp >= 0) {
3620     for (unsigned I = 0; I != NDigits; ++I)
3621       Str.push_back(buffer[NDigits-1-I]);
3622     for (unsigned I = 0; I != (unsigned) exp; ++I)
3623       Str.push_back('0');
3624     return;
3625   }
3626 
3627   // Non-scientific, negative exponents.
3628 
3629   // The number of digits to the left of the decimal point.
3630   int NWholeDigits = exp + (int) NDigits;
3631 
3632   unsigned I = 0;
3633   if (NWholeDigits > 0) {
3634     for (; I != (unsigned) NWholeDigits; ++I)
3635       Str.push_back(buffer[NDigits-I-1]);
3636     Str.push_back('.');
3637   } else {
3638     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3639 
3640     Str.push_back('0');
3641     Str.push_back('.');
3642     for (unsigned Z = 1; Z != NZeros; ++Z)
3643       Str.push_back('0');
3644   }
3645 
3646   for (; I != NDigits; ++I)
3647     Str.push_back(buffer[NDigits-I-1]);
3648 }
3649 
getExactInverse(APFloat * inv) const3650 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3651   // Special floats and denormals have no exact inverse.
3652   if (!isFiniteNonZero())
3653     return false;
3654 
3655   // Check that the number is a power of two by making sure that only the
3656   // integer bit is set in the significand.
3657   if (significandLSB() != semantics->precision - 1)
3658     return false;
3659 
3660   // Get the inverse.
3661   IEEEFloat reciprocal(*semantics, 1ULL);
3662   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3663     return false;
3664 
3665   // Avoid multiplication with a denormal, it is not safe on all platforms and
3666   // may be slower than a normal division.
3667   if (reciprocal.isDenormal())
3668     return false;
3669 
3670   assert(reciprocal.isFiniteNonZero() &&
3671          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3672 
3673   if (inv)
3674     *inv = APFloat(reciprocal, *semantics);
3675 
3676   return true;
3677 }
3678 
isSignaling() const3679 bool IEEEFloat::isSignaling() const {
3680   if (!isNaN())
3681     return false;
3682 
3683   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3684   // first bit of the trailing significand being 0.
3685   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3686 }
3687 
3688 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3689 ///
3690 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3691 /// appropriate sign switching before/after the computation.
next(bool nextDown)3692 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3693   // If we are performing nextDown, swap sign so we have -x.
3694   if (nextDown)
3695     changeSign();
3696 
3697   // Compute nextUp(x)
3698   opStatus result = opOK;
3699 
3700   // Handle each float category separately.
3701   switch (category) {
3702   case fcInfinity:
3703     // nextUp(+inf) = +inf
3704     if (!isNegative())
3705       break;
3706     // nextUp(-inf) = -getLargest()
3707     makeLargest(true);
3708     break;
3709   case fcNaN:
3710     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3711     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3712     //                     change the payload.
3713     if (isSignaling()) {
3714       result = opInvalidOp;
3715       // For consistency, propagate the sign of the sNaN to the qNaN.
3716       makeNaN(false, isNegative(), nullptr);
3717     }
3718     break;
3719   case fcZero:
3720     // nextUp(pm 0) = +getSmallest()
3721     makeSmallest(false);
3722     break;
3723   case fcNormal:
3724     // nextUp(-getSmallest()) = -0
3725     if (isSmallest() && isNegative()) {
3726       APInt::tcSet(significandParts(), 0, partCount());
3727       category = fcZero;
3728       exponent = 0;
3729       break;
3730     }
3731 
3732     // nextUp(getLargest()) == INFINITY
3733     if (isLargest() && !isNegative()) {
3734       APInt::tcSet(significandParts(), 0, partCount());
3735       category = fcInfinity;
3736       exponent = semantics->maxExponent + 1;
3737       break;
3738     }
3739 
3740     // nextUp(normal) == normal + inc.
3741     if (isNegative()) {
3742       // If we are negative, we need to decrement the significand.
3743 
3744       // We only cross a binade boundary that requires adjusting the exponent
3745       // if:
3746       //   1. exponent != semantics->minExponent. This implies we are not in the
3747       //   smallest binade or are dealing with denormals.
3748       //   2. Our significand excluding the integral bit is all zeros.
3749       bool WillCrossBinadeBoundary =
3750         exponent != semantics->minExponent && isSignificandAllZeros();
3751 
3752       // Decrement the significand.
3753       //
3754       // We always do this since:
3755       //   1. If we are dealing with a non-binade decrement, by definition we
3756       //   just decrement the significand.
3757       //   2. If we are dealing with a normal -> normal binade decrement, since
3758       //   we have an explicit integral bit the fact that all bits but the
3759       //   integral bit are zero implies that subtracting one will yield a
3760       //   significand with 0 integral bit and 1 in all other spots. Thus we
3761       //   must just adjust the exponent and set the integral bit to 1.
3762       //   3. If we are dealing with a normal -> denormal binade decrement,
3763       //   since we set the integral bit to 0 when we represent denormals, we
3764       //   just decrement the significand.
3765       integerPart *Parts = significandParts();
3766       APInt::tcDecrement(Parts, partCount());
3767 
3768       if (WillCrossBinadeBoundary) {
3769         // Our result is a normal number. Do the following:
3770         // 1. Set the integral bit to 1.
3771         // 2. Decrement the exponent.
3772         APInt::tcSetBit(Parts, semantics->precision - 1);
3773         exponent--;
3774       }
3775     } else {
3776       // If we are positive, we need to increment the significand.
3777 
3778       // We only cross a binade boundary that requires adjusting the exponent if
3779       // the input is not a denormal and all of said input's significand bits
3780       // are set. If all of said conditions are true: clear the significand, set
3781       // the integral bit to 1, and increment the exponent. If we have a
3782       // denormal always increment since moving denormals and the numbers in the
3783       // smallest normal binade have the same exponent in our representation.
3784       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3785 
3786       if (WillCrossBinadeBoundary) {
3787         integerPart *Parts = significandParts();
3788         APInt::tcSet(Parts, 0, partCount());
3789         APInt::tcSetBit(Parts, semantics->precision - 1);
3790         assert(exponent != semantics->maxExponent &&
3791                "We can not increment an exponent beyond the maxExponent allowed"
3792                " by the given floating point semantics.");
3793         exponent++;
3794       } else {
3795         incrementSignificand();
3796       }
3797     }
3798     break;
3799   }
3800 
3801   // If we are performing nextDown, swap sign so we have -nextUp(-x)
3802   if (nextDown)
3803     changeSign();
3804 
3805   return result;
3806 }
3807 
makeInf(bool Negative)3808 void IEEEFloat::makeInf(bool Negative) {
3809   category = fcInfinity;
3810   sign = Negative;
3811   exponent = semantics->maxExponent + 1;
3812   APInt::tcSet(significandParts(), 0, partCount());
3813 }
3814 
makeZero(bool Negative)3815 void IEEEFloat::makeZero(bool Negative) {
3816   category = fcZero;
3817   sign = Negative;
3818   exponent = semantics->minExponent-1;
3819   APInt::tcSet(significandParts(), 0, partCount());
3820 }
3821 
makeQuiet()3822 void IEEEFloat::makeQuiet() {
3823   assert(isNaN());
3824   APInt::tcSetBit(significandParts(), semantics->precision - 2);
3825 }
3826 
ilogb(const IEEEFloat & Arg)3827 int ilogb(const IEEEFloat &Arg) {
3828   if (Arg.isNaN())
3829     return IEEEFloat::IEK_NaN;
3830   if (Arg.isZero())
3831     return IEEEFloat::IEK_Zero;
3832   if (Arg.isInfinity())
3833     return IEEEFloat::IEK_Inf;
3834   if (!Arg.isDenormal())
3835     return Arg.exponent;
3836 
3837   IEEEFloat Normalized(Arg);
3838   int SignificandBits = Arg.getSemantics().precision - 1;
3839 
3840   Normalized.exponent += SignificandBits;
3841   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3842   return Normalized.exponent - SignificandBits;
3843 }
3844 
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)3845 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3846   auto MaxExp = X.getSemantics().maxExponent;
3847   auto MinExp = X.getSemantics().minExponent;
3848 
3849   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3850   // overflow; clamp it to a safe range before adding, but ensure that the range
3851   // is large enough that the clamp does not change the result. The range we
3852   // need to support is the difference between the largest possible exponent and
3853   // the normalized exponent of half the smallest denormal.
3854 
3855   int SignificandBits = X.getSemantics().precision - 1;
3856   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3857 
3858   // Clamp to one past the range ends to let normalize handle overlflow.
3859   X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3860   X.normalize(RoundingMode, lfExactlyZero);
3861   if (X.isNaN())
3862     X.makeQuiet();
3863   return X;
3864 }
3865 
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)3866 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3867   Exp = ilogb(Val);
3868 
3869   // Quiet signalling nans.
3870   if (Exp == IEEEFloat::IEK_NaN) {
3871     IEEEFloat Quiet(Val);
3872     Quiet.makeQuiet();
3873     return Quiet;
3874   }
3875 
3876   if (Exp == IEEEFloat::IEK_Inf)
3877     return Val;
3878 
3879   // 1 is added because frexp is defined to return a normalized fraction in
3880   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3881   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3882   return scalbn(Val, -Exp, RM);
3883 }
3884 
DoubleAPFloat(const fltSemantics & S)3885 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3886     : Semantics(&S),
3887       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3888   assert(Semantics == &semPPCDoubleDouble);
3889 }
3890 
DoubleAPFloat(const fltSemantics & S,uninitializedTag)3891 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3892     : Semantics(&S),
3893       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3894                             APFloat(semIEEEdouble, uninitialized)}) {
3895   assert(Semantics == &semPPCDoubleDouble);
3896 }
3897 
DoubleAPFloat(const fltSemantics & S,integerPart I)3898 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3899     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3900                                            APFloat(semIEEEdouble)}) {
3901   assert(Semantics == &semPPCDoubleDouble);
3902 }
3903 
DoubleAPFloat(const fltSemantics & S,const APInt & I)3904 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3905     : Semantics(&S),
3906       Floats(new APFloat[2]{
3907           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3908           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3909   assert(Semantics == &semPPCDoubleDouble);
3910 }
3911 
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)3912 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3913                              APFloat &&Second)
3914     : Semantics(&S),
3915       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3916   assert(Semantics == &semPPCDoubleDouble);
3917   assert(&Floats[0].getSemantics() == &semIEEEdouble);
3918   assert(&Floats[1].getSemantics() == &semIEEEdouble);
3919 }
3920 
DoubleAPFloat(const DoubleAPFloat & RHS)3921 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3922     : Semantics(RHS.Semantics),
3923       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3924                                          APFloat(RHS.Floats[1])}
3925                         : nullptr) {
3926   assert(Semantics == &semPPCDoubleDouble);
3927 }
3928 
DoubleAPFloat(DoubleAPFloat && RHS)3929 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3930     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3931   RHS.Semantics = &semBogus;
3932   assert(Semantics == &semPPCDoubleDouble);
3933 }
3934 
operator =(const DoubleAPFloat & RHS)3935 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3936   if (Semantics == RHS.Semantics && RHS.Floats) {
3937     Floats[0] = RHS.Floats[0];
3938     Floats[1] = RHS.Floats[1];
3939   } else if (this != &RHS) {
3940     this->~DoubleAPFloat();
3941     new (this) DoubleAPFloat(RHS);
3942   }
3943   return *this;
3944 }
3945 
3946 // Implement addition, subtraction, multiplication and division based on:
3947 // "Software for Doubled-Precision Floating-Point Computations",
3948 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)3949 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3950                                          const APFloat &c, const APFloat &cc,
3951                                          roundingMode RM) {
3952   int Status = opOK;
3953   APFloat z = a;
3954   Status |= z.add(c, RM);
3955   if (!z.isFinite()) {
3956     if (!z.isInfinity()) {
3957       Floats[0] = std::move(z);
3958       Floats[1].makeZero(/* Neg = */ false);
3959       return (opStatus)Status;
3960     }
3961     Status = opOK;
3962     auto AComparedToC = a.compareAbsoluteValue(c);
3963     z = cc;
3964     Status |= z.add(aa, RM);
3965     if (AComparedToC == APFloat::cmpGreaterThan) {
3966       // z = cc + aa + c + a;
3967       Status |= z.add(c, RM);
3968       Status |= z.add(a, RM);
3969     } else {
3970       // z = cc + aa + a + c;
3971       Status |= z.add(a, RM);
3972       Status |= z.add(c, RM);
3973     }
3974     if (!z.isFinite()) {
3975       Floats[0] = std::move(z);
3976       Floats[1].makeZero(/* Neg = */ false);
3977       return (opStatus)Status;
3978     }
3979     Floats[0] = z;
3980     APFloat zz = aa;
3981     Status |= zz.add(cc, RM);
3982     if (AComparedToC == APFloat::cmpGreaterThan) {
3983       // Floats[1] = a - z + c + zz;
3984       Floats[1] = a;
3985       Status |= Floats[1].subtract(z, RM);
3986       Status |= Floats[1].add(c, RM);
3987       Status |= Floats[1].add(zz, RM);
3988     } else {
3989       // Floats[1] = c - z + a + zz;
3990       Floats[1] = c;
3991       Status |= Floats[1].subtract(z, RM);
3992       Status |= Floats[1].add(a, RM);
3993       Status |= Floats[1].add(zz, RM);
3994     }
3995   } else {
3996     // q = a - z;
3997     APFloat q = a;
3998     Status |= q.subtract(z, RM);
3999 
4000     // zz = q + c + (a - (q + z)) + aa + cc;
4001     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4002     auto zz = q;
4003     Status |= zz.add(c, RM);
4004     Status |= q.add(z, RM);
4005     Status |= q.subtract(a, RM);
4006     q.changeSign();
4007     Status |= zz.add(q, RM);
4008     Status |= zz.add(aa, RM);
4009     Status |= zz.add(cc, RM);
4010     if (zz.isZero() && !zz.isNegative()) {
4011       Floats[0] = std::move(z);
4012       Floats[1].makeZero(/* Neg = */ false);
4013       return opOK;
4014     }
4015     Floats[0] = z;
4016     Status |= Floats[0].add(zz, RM);
4017     if (!Floats[0].isFinite()) {
4018       Floats[1].makeZero(/* Neg = */ false);
4019       return (opStatus)Status;
4020     }
4021     Floats[1] = std::move(z);
4022     Status |= Floats[1].subtract(Floats[0], RM);
4023     Status |= Floats[1].add(zz, RM);
4024   }
4025   return (opStatus)Status;
4026 }
4027 
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4028 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4029                                                 const DoubleAPFloat &RHS,
4030                                                 DoubleAPFloat &Out,
4031                                                 roundingMode RM) {
4032   if (LHS.getCategory() == fcNaN) {
4033     Out = LHS;
4034     return opOK;
4035   }
4036   if (RHS.getCategory() == fcNaN) {
4037     Out = RHS;
4038     return opOK;
4039   }
4040   if (LHS.getCategory() == fcZero) {
4041     Out = RHS;
4042     return opOK;
4043   }
4044   if (RHS.getCategory() == fcZero) {
4045     Out = LHS;
4046     return opOK;
4047   }
4048   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4049       LHS.isNegative() != RHS.isNegative()) {
4050     Out.makeNaN(false, Out.isNegative(), nullptr);
4051     return opInvalidOp;
4052   }
4053   if (LHS.getCategory() == fcInfinity) {
4054     Out = LHS;
4055     return opOK;
4056   }
4057   if (RHS.getCategory() == fcInfinity) {
4058     Out = RHS;
4059     return opOK;
4060   }
4061   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4062 
4063   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4064       CC(RHS.Floats[1]);
4065   assert(&A.getSemantics() == &semIEEEdouble);
4066   assert(&AA.getSemantics() == &semIEEEdouble);
4067   assert(&C.getSemantics() == &semIEEEdouble);
4068   assert(&CC.getSemantics() == &semIEEEdouble);
4069   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4070   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4071   return Out.addImpl(A, AA, C, CC, RM);
4072 }
4073 
add(const DoubleAPFloat & RHS,roundingMode RM)4074 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4075                                      roundingMode RM) {
4076   return addWithSpecial(*this, RHS, *this, RM);
4077 }
4078 
subtract(const DoubleAPFloat & RHS,roundingMode RM)4079 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4080                                           roundingMode RM) {
4081   changeSign();
4082   auto Ret = add(RHS, RM);
4083   changeSign();
4084   return Ret;
4085 }
4086 
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4087 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4088                                           APFloat::roundingMode RM) {
4089   const auto &LHS = *this;
4090   auto &Out = *this;
4091   /* Interesting observation: For special categories, finding the lowest
4092      common ancestor of the following layered graph gives the correct
4093      return category:
4094 
4095         NaN
4096        /   \
4097      Zero  Inf
4098        \   /
4099        Normal
4100 
4101      e.g. NaN * NaN = NaN
4102           Zero * Inf = NaN
4103           Normal * Zero = Zero
4104           Normal * Inf = Inf
4105   */
4106   if (LHS.getCategory() == fcNaN) {
4107     Out = LHS;
4108     return opOK;
4109   }
4110   if (RHS.getCategory() == fcNaN) {
4111     Out = RHS;
4112     return opOK;
4113   }
4114   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4115       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4116     Out.makeNaN(false, false, nullptr);
4117     return opOK;
4118   }
4119   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4120     Out = LHS;
4121     return opOK;
4122   }
4123   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4124     Out = RHS;
4125     return opOK;
4126   }
4127   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4128          "Special cases not handled exhaustively");
4129 
4130   int Status = opOK;
4131   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4132   // t = a * c
4133   APFloat T = A;
4134   Status |= T.multiply(C, RM);
4135   if (!T.isFiniteNonZero()) {
4136     Floats[0] = T;
4137     Floats[1].makeZero(/* Neg = */ false);
4138     return (opStatus)Status;
4139   }
4140 
4141   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4142   APFloat Tau = A;
4143   T.changeSign();
4144   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4145   T.changeSign();
4146   {
4147     // v = a * d
4148     APFloat V = A;
4149     Status |= V.multiply(D, RM);
4150     // w = b * c
4151     APFloat W = B;
4152     Status |= W.multiply(C, RM);
4153     Status |= V.add(W, RM);
4154     // tau += v + w
4155     Status |= Tau.add(V, RM);
4156   }
4157   // u = t + tau
4158   APFloat U = T;
4159   Status |= U.add(Tau, RM);
4160 
4161   Floats[0] = U;
4162   if (!U.isFinite()) {
4163     Floats[1].makeZero(/* Neg = */ false);
4164   } else {
4165     // Floats[1] = (t - u) + tau
4166     Status |= T.subtract(U, RM);
4167     Status |= T.add(Tau, RM);
4168     Floats[1] = T;
4169   }
4170   return (opStatus)Status;
4171 }
4172 
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4173 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4174                                         APFloat::roundingMode RM) {
4175   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4176   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4177   auto Ret =
4178       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4179   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4180   return Ret;
4181 }
4182 
remainder(const DoubleAPFloat & RHS)4183 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4184   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4185   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4186   auto Ret =
4187       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4188   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4189   return Ret;
4190 }
4191 
mod(const DoubleAPFloat & RHS)4192 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4193   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4194   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4195   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4196   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4197   return Ret;
4198 }
4199 
4200 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4201 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4202                                 const DoubleAPFloat &Addend,
4203                                 APFloat::roundingMode RM) {
4204   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4205   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4206   auto Ret = Tmp.fusedMultiplyAdd(
4207       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4208       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4209   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4210   return Ret;
4211 }
4212 
roundToIntegral(APFloat::roundingMode RM)4213 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4214   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4215   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4216   auto Ret = Tmp.roundToIntegral(RM);
4217   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4218   return Ret;
4219 }
4220 
changeSign()4221 void DoubleAPFloat::changeSign() {
4222   Floats[0].changeSign();
4223   Floats[1].changeSign();
4224 }
4225 
4226 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4227 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4228   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4229   if (Result != cmpEqual)
4230     return Result;
4231   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4232   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4233     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4234     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4235     if (Against && !RHSAgainst)
4236       return cmpLessThan;
4237     if (!Against && RHSAgainst)
4238       return cmpGreaterThan;
4239     if (!Against && !RHSAgainst)
4240       return Result;
4241     if (Against && RHSAgainst)
4242       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4243   }
4244   return Result;
4245 }
4246 
getCategory() const4247 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4248   return Floats[0].getCategory();
4249 }
4250 
isNegative() const4251 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4252 
makeInf(bool Neg)4253 void DoubleAPFloat::makeInf(bool Neg) {
4254   Floats[0].makeInf(Neg);
4255   Floats[1].makeZero(/* Neg = */ false);
4256 }
4257 
makeZero(bool Neg)4258 void DoubleAPFloat::makeZero(bool Neg) {
4259   Floats[0].makeZero(Neg);
4260   Floats[1].makeZero(/* Neg = */ false);
4261 }
4262 
makeLargest(bool Neg)4263 void DoubleAPFloat::makeLargest(bool Neg) {
4264   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4265   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4266   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4267   if (Neg)
4268     changeSign();
4269 }
4270 
makeSmallest(bool Neg)4271 void DoubleAPFloat::makeSmallest(bool Neg) {
4272   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4273   Floats[0].makeSmallest(Neg);
4274   Floats[1].makeZero(/* Neg = */ false);
4275 }
4276 
makeSmallestNormalized(bool Neg)4277 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4278   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4279   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4280   if (Neg)
4281     Floats[0].changeSign();
4282   Floats[1].makeZero(/* Neg = */ false);
4283 }
4284 
makeNaN(bool SNaN,bool Neg,const APInt * fill)4285 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4286   Floats[0].makeNaN(SNaN, Neg, fill);
4287   Floats[1].makeZero(/* Neg = */ false);
4288 }
4289 
compare(const DoubleAPFloat & RHS) const4290 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4291   auto Result = Floats[0].compare(RHS.Floats[0]);
4292   // |Float[0]| > |Float[1]|
4293   if (Result == APFloat::cmpEqual)
4294     return Floats[1].compare(RHS.Floats[1]);
4295   return Result;
4296 }
4297 
bitwiseIsEqual(const DoubleAPFloat & RHS) const4298 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4299   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4300          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4301 }
4302 
hash_value(const DoubleAPFloat & Arg)4303 hash_code hash_value(const DoubleAPFloat &Arg) {
4304   if (Arg.Floats)
4305     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4306   return hash_combine(Arg.Semantics);
4307 }
4308 
bitcastToAPInt() const4309 APInt DoubleAPFloat::bitcastToAPInt() const {
4310   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4311   uint64_t Data[] = {
4312       Floats[0].bitcastToAPInt().getRawData()[0],
4313       Floats[1].bitcastToAPInt().getRawData()[0],
4314   };
4315   return APInt(128, 2, Data);
4316 }
4317 
convertFromString(StringRef S,roundingMode RM)4318 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4319                                                    roundingMode RM) {
4320   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4321   APFloat Tmp(semPPCDoubleDoubleLegacy);
4322   auto Ret = Tmp.convertFromString(S, RM);
4323   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4324   return Ret;
4325 }
4326 
next(bool nextDown)4327 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4328   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4329   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4330   auto Ret = Tmp.next(nextDown);
4331   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4332   return Ret;
4333 }
4334 
4335 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4336 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4337                                 unsigned int Width, bool IsSigned,
4338                                 roundingMode RM, bool *IsExact) const {
4339   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4340   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4341       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4342 }
4343 
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4344 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4345                                                   bool IsSigned,
4346                                                   roundingMode RM) {
4347   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4348   APFloat Tmp(semPPCDoubleDoubleLegacy);
4349   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4350   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4351   return Ret;
4352 }
4353 
4354 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4355 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4356                                               unsigned int InputSize,
4357                                               bool IsSigned, roundingMode RM) {
4358   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4359   APFloat Tmp(semPPCDoubleDoubleLegacy);
4360   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4361   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4362   return Ret;
4363 }
4364 
4365 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4366 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4367                                               unsigned int InputSize,
4368                                               bool IsSigned, roundingMode RM) {
4369   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4370   APFloat Tmp(semPPCDoubleDoubleLegacy);
4371   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4372   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4373   return Ret;
4374 }
4375 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4376 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4377                                                unsigned int HexDigits,
4378                                                bool UpperCase,
4379                                                roundingMode RM) const {
4380   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4381   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4382       .convertToHexString(DST, HexDigits, UpperCase, RM);
4383 }
4384 
isDenormal() const4385 bool DoubleAPFloat::isDenormal() const {
4386   return getCategory() == fcNormal &&
4387          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4388           // (double)(Hi + Lo) == Hi defines a normal number.
4389           Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4390 }
4391 
isSmallest() const4392 bool DoubleAPFloat::isSmallest() const {
4393   if (getCategory() != fcNormal)
4394     return false;
4395   DoubleAPFloat Tmp(*this);
4396   Tmp.makeSmallest(this->isNegative());
4397   return Tmp.compare(*this) == cmpEqual;
4398 }
4399 
isLargest() const4400 bool DoubleAPFloat::isLargest() const {
4401   if (getCategory() != fcNormal)
4402     return false;
4403   DoubleAPFloat Tmp(*this);
4404   Tmp.makeLargest(this->isNegative());
4405   return Tmp.compare(*this) == cmpEqual;
4406 }
4407 
isInteger() const4408 bool DoubleAPFloat::isInteger() const {
4409   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4410   return Floats[0].isInteger() && Floats[1].isInteger();
4411 }
4412 
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4413 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4414                              unsigned FormatPrecision,
4415                              unsigned FormatMaxPadding,
4416                              bool TruncateZero) const {
4417   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4418   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4419       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4420 }
4421 
getExactInverse(APFloat * inv) const4422 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4423   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4424   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4425   if (!inv)
4426     return Tmp.getExactInverse(nullptr);
4427   APFloat Inv(semPPCDoubleDoubleLegacy);
4428   auto Ret = Tmp.getExactInverse(&Inv);
4429   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4430   return Ret;
4431 }
4432 
scalbn(DoubleAPFloat Arg,int Exp,APFloat::roundingMode RM)4433 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4434   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4435   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4436                        scalbn(Arg.Floats[1], Exp, RM));
4437 }
4438 
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)4439 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4440                     APFloat::roundingMode RM) {
4441   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4442   APFloat First = frexp(Arg.Floats[0], Exp, RM);
4443   APFloat Second = Arg.Floats[1];
4444   if (Arg.getCategory() == APFloat::fcNormal)
4445     Second = scalbn(Second, -Exp, RM);
4446   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4447 }
4448 
4449 } // End detail namespace
4450 
Storage(IEEEFloat F,const fltSemantics & Semantics)4451 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4452   if (usesLayout<IEEEFloat>(Semantics)) {
4453     new (&IEEE) IEEEFloat(std::move(F));
4454     return;
4455   }
4456   if (usesLayout<DoubleAPFloat>(Semantics)) {
4457     const fltSemantics& S = F.getSemantics();
4458     new (&Double)
4459         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4460                       APFloat(semIEEEdouble));
4461     return;
4462   }
4463   llvm_unreachable("Unexpected semantics");
4464 }
4465 
convertFromString(StringRef Str,roundingMode RM)4466 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4467   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4468 }
4469 
hash_value(const APFloat & Arg)4470 hash_code hash_value(const APFloat &Arg) {
4471   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4472     return hash_value(Arg.U.IEEE);
4473   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4474     return hash_value(Arg.U.Double);
4475   llvm_unreachable("Unexpected semantics");
4476 }
4477 
APFloat(const fltSemantics & Semantics,StringRef S)4478 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4479     : APFloat(Semantics) {
4480   convertFromString(S, rmNearestTiesToEven);
4481 }
4482 
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)4483 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4484                                    roundingMode RM, bool *losesInfo) {
4485   if (&getSemantics() == &ToSemantics) {
4486     *losesInfo = false;
4487     return opOK;
4488   }
4489   if (usesLayout<IEEEFloat>(getSemantics()) &&
4490       usesLayout<IEEEFloat>(ToSemantics))
4491     return U.IEEE.convert(ToSemantics, RM, losesInfo);
4492   if (usesLayout<IEEEFloat>(getSemantics()) &&
4493       usesLayout<DoubleAPFloat>(ToSemantics)) {
4494     assert(&ToSemantics == &semPPCDoubleDouble);
4495     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4496     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4497     return Ret;
4498   }
4499   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4500       usesLayout<IEEEFloat>(ToSemantics)) {
4501     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4502     *this = APFloat(std::move(getIEEE()), ToSemantics);
4503     return Ret;
4504   }
4505   llvm_unreachable("Unexpected semantics");
4506 }
4507 
getAllOnesValue(unsigned BitWidth,bool isIEEE)4508 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4509   if (isIEEE) {
4510     switch (BitWidth) {
4511     case 16:
4512       return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4513     case 32:
4514       return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4515     case 64:
4516       return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4517     case 80:
4518       return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4519     case 128:
4520       return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4521     default:
4522       llvm_unreachable("Unknown floating bit width");
4523     }
4524   } else {
4525     assert(BitWidth == 128);
4526     return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4527   }
4528 }
4529 
print(raw_ostream & OS) const4530 void APFloat::print(raw_ostream &OS) const {
4531   SmallVector<char, 16> Buffer;
4532   toString(Buffer);
4533   OS << Buffer << "\n";
4534 }
4535 
4536 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const4537 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4538 #endif
4539 
Profile(FoldingSetNodeID & NID) const4540 void APFloat::Profile(FoldingSetNodeID &NID) const {
4541   NID.Add(bitcastToAPInt());
4542 }
4543 
4544 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4545    an APSInt, whose initial bit-width and signed-ness are used to determine the
4546    precision of the conversion.
4547  */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const4548 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4549                                             roundingMode rounding_mode,
4550                                             bool *isExact) const {
4551   unsigned bitWidth = result.getBitWidth();
4552   SmallVector<uint64_t, 4> parts(result.getNumWords());
4553   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4554                                      rounding_mode, isExact);
4555   // Keeps the original signed-ness.
4556   result = APInt(bitWidth, parts);
4557   return status;
4558 }
4559 
4560 } // End llvm namespace
4561 
4562 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4563