xref: /openbsd/gnu/llvm/llvm/include/llvm/ADT/APFloat.h (revision d415bd75)
1 //===- llvm/ADT/APFloat.h - Arbitrary Precision Floating Point ---*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file declares a class to represent arbitrary precision floating point
11 /// values and provide a variety of arithmetic operations on them.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_ADT_APFLOAT_H
16 #define LLVM_ADT_APFLOAT_H
17 
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/FloatingPointMode.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include <memory>
23 
24 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
25   do {                                                                         \
26     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
27       return U.IEEE.METHOD_CALL;                                               \
28     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
29       return U.Double.METHOD_CALL;                                             \
30     llvm_unreachable("Unexpected semantics");                                  \
31   } while (false)
32 
33 namespace llvm {
34 
35 struct fltSemantics;
36 class APSInt;
37 class StringRef;
38 class APFloat;
39 class raw_ostream;
40 
41 template <typename T> class Expected;
42 template <typename T> class SmallVectorImpl;
43 
44 /// Enum that represents what fraction of the LSB truncated bits of an fp number
45 /// represent.
46 ///
47 /// This essentially combines the roles of guard and sticky bits.
48 enum lostFraction { // Example of truncated bits:
49   lfExactlyZero,    // 000000
50   lfLessThanHalf,   // 0xxxxx  x's not all zero
51   lfExactlyHalf,    // 100000
52   lfMoreThanHalf    // 1xxxxx  x's not all zero
53 };
54 
55 /// A self-contained host- and target-independent arbitrary-precision
56 /// floating-point software implementation.
57 ///
58 /// APFloat uses bignum integer arithmetic as provided by static functions in
59 /// the APInt class.  The library will work with bignum integers whose parts are
60 /// any unsigned type at least 16 bits wide, but 64 bits is recommended.
61 ///
62 /// Written for clarity rather than speed, in particular with a view to use in
63 /// the front-end of a cross compiler so that target arithmetic can be correctly
64 /// performed on the host.  Performance should nonetheless be reasonable,
65 /// particularly for its intended use.  It may be useful as a base
66 /// implementation for a run-time library during development of a faster
67 /// target-specific one.
68 ///
69 /// All 5 rounding modes in the IEEE-754R draft are handled correctly for all
70 /// implemented operations.  Currently implemented operations are add, subtract,
71 /// multiply, divide, fused-multiply-add, conversion-to-float,
72 /// conversion-to-integer and conversion-from-integer.  New rounding modes
73 /// (e.g. away from zero) can be added with three or four lines of code.
74 ///
75 /// Four formats are built-in: IEEE single precision, double precision,
76 /// quadruple precision, and x87 80-bit extended double (when operating with
77 /// full extended precision).  Adding a new format that obeys IEEE semantics
78 /// only requires adding two lines of code: a declaration and definition of the
79 /// format.
80 ///
81 /// All operations return the status of that operation as an exception bit-mask,
82 /// so multiple operations can be done consecutively with their results or-ed
83 /// together.  The returned status can be useful for compiler diagnostics; e.g.,
84 /// inexact, underflow and overflow can be easily diagnosed on constant folding,
85 /// and compiler optimizers can determine what exceptions would be raised by
86 /// folding operations and optimize, or perhaps not optimize, accordingly.
87 ///
88 /// At present, underflow tininess is detected after rounding; it should be
89 /// straight forward to add support for the before-rounding case too.
90 ///
91 /// The library reads hexadecimal floating point numbers as per C99, and
92 /// correctly rounds if necessary according to the specified rounding mode.
93 /// Syntax is required to have been validated by the caller.  It also converts
94 /// floating point numbers to hexadecimal text as per the C99 %a and %A
95 /// conversions.  The output precision (or alternatively the natural minimal
96 /// precision) can be specified; if the requested precision is less than the
97 /// natural precision the output is correctly rounded for the specified rounding
98 /// mode.
99 ///
100 /// It also reads decimal floating point numbers and correctly rounds according
101 /// to the specified rounding mode.
102 ///
103 /// Conversion to decimal text is not currently implemented.
104 ///
105 /// Non-zero finite numbers are represented internally as a sign bit, a 16-bit
106 /// signed exponent, and the significand as an array of integer parts.  After
107 /// normalization of a number of precision P the exponent is within the range of
108 /// the format, and if the number is not denormal the P-th bit of the
109 /// significand is set as an explicit integer bit.  For denormals the most
110 /// significant bit is shifted right so that the exponent is maintained at the
111 /// format's minimum, so that the smallest denormal has just the least
112 /// significant bit of the significand set.  The sign of zeroes and infinities
113 /// is significant; the exponent and significand of such numbers is not stored,
114 /// but has a known implicit (deterministic) value: 0 for the significands, 0
115 /// for zero exponent, all 1 bits for infinity exponent.  For NaNs the sign and
116 /// significand are deterministic, although not really meaningful, and preserved
117 /// in non-conversion operations.  The exponent is implicitly all 1 bits.
118 ///
119 /// APFloat does not provide any exception handling beyond default exception
120 /// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause
121 /// by encoding Signaling NaNs with the first bit of its trailing significand as
122 /// 0.
123 ///
124 /// TODO
125 /// ====
126 ///
127 /// Some features that may or may not be worth adding:
128 ///
129 /// Binary to decimal conversion (hard).
130 ///
131 /// Optional ability to detect underflow tininess before rounding.
132 ///
133 /// New formats: x87 in single and double precision mode (IEEE apart from
134 /// extended exponent range) (hard).
135 ///
136 /// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward.
137 ///
138 
139 // This is the common type definitions shared by APFloat and its internal
140 // implementation classes. This struct should not define any non-static data
141 // members.
142 struct APFloatBase {
143   typedef APInt::WordType integerPart;
144   static constexpr unsigned integerPartWidth = APInt::APINT_BITS_PER_WORD;
145 
146   /// A signed type to represent a floating point numbers unbiased exponent.
147   typedef int32_t ExponentType;
148 
149   /// \name Floating Point Semantics.
150   /// @{
151   enum Semantics {
152     S_IEEEhalf,
153     S_BFloat,
154     S_IEEEsingle,
155     S_IEEEdouble,
156     S_IEEEquad,
157     S_PPCDoubleDouble,
158     // 8-bit floating point number following IEEE-754 conventions with bit
159     // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
160     S_Float8E5M2,
161     // 8-bit floating point number mostly following IEEE-754 conventions with
162     // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
163     // Unlike IEEE-754 types, there are no infinity values, and NaN is
164     // represented with the exponent and mantissa bits set to all 1s.
165     S_Float8E4M3FN,
166     S_x87DoubleExtended,
167     S_MaxSemantics = S_x87DoubleExtended,
168   };
169 
170   static const llvm::fltSemantics &EnumToSemantics(Semantics S);
171   static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);
172 
173   static const fltSemantics &IEEEhalf() LLVM_READNONE;
174   static const fltSemantics &BFloat() LLVM_READNONE;
175   static const fltSemantics &IEEEsingle() LLVM_READNONE;
176   static const fltSemantics &IEEEdouble() LLVM_READNONE;
177   static const fltSemantics &IEEEquad() LLVM_READNONE;
178   static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;
179   static const fltSemantics &Float8E5M2() LLVM_READNONE;
180   static const fltSemantics &Float8E4M3FN() LLVM_READNONE;
181   static const fltSemantics &x87DoubleExtended() LLVM_READNONE;
182 
183   /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
184   /// anything real.
185   static const fltSemantics &Bogus() LLVM_READNONE;
186 
187   /// @}
188 
189   /// IEEE-754R 5.11: Floating Point Comparison Relations.
190   enum cmpResult {
191     cmpLessThan,
192     cmpEqual,
193     cmpGreaterThan,
194     cmpUnordered
195   };
196 
197   /// IEEE-754R 4.3: Rounding-direction attributes.
198   using roundingMode = llvm::RoundingMode;
199 
200   static constexpr roundingMode rmNearestTiesToEven =
201                                                 RoundingMode::NearestTiesToEven;
202   static constexpr roundingMode rmTowardPositive = RoundingMode::TowardPositive;
203   static constexpr roundingMode rmTowardNegative = RoundingMode::TowardNegative;
204   static constexpr roundingMode rmTowardZero     = RoundingMode::TowardZero;
205   static constexpr roundingMode rmNearestTiesToAway =
206                                                 RoundingMode::NearestTiesToAway;
207 
208   /// IEEE-754R 7: Default exception handling.
209   ///
210   /// opUnderflow or opOverflow are always returned or-ed with opInexact.
211   ///
212   /// APFloat models this behavior specified by IEEE-754:
213   ///   "For operations producing results in floating-point format, the default
214   ///    result of an operation that signals the invalid operation exception
215   ///    shall be a quiet NaN."
216   enum opStatus {
217     opOK = 0x00,
218     opInvalidOp = 0x01,
219     opDivByZero = 0x02,
220     opOverflow = 0x04,
221     opUnderflow = 0x08,
222     opInexact = 0x10
223   };
224 
225   /// Category of internally-represented number.
226   enum fltCategory {
227     fcInfinity,
228     fcNaN,
229     fcNormal,
230     fcZero
231   };
232 
233   /// Convenience enum used to construct an uninitialized APFloat.
234   enum uninitializedTag {
235     uninitialized
236   };
237 
238   /// Enumeration of \c ilogb error results.
239   enum IlogbErrorKinds {
240     IEK_Zero = INT_MIN + 1,
241     IEK_NaN = INT_MIN,
242     IEK_Inf = INT_MAX
243   };
244 
245   static unsigned int semanticsPrecision(const fltSemantics &);
246   static ExponentType semanticsMinExponent(const fltSemantics &);
247   static ExponentType semanticsMaxExponent(const fltSemantics &);
248   static unsigned int semanticsSizeInBits(const fltSemantics &);
249 
250   /// Returns the size of the floating point number (in bits) in the given
251   /// semantics.
252   static unsigned getSizeInBits(const fltSemantics &Sem);
253 };
254 
255 namespace detail {
256 
257 class IEEEFloat final : public APFloatBase {
258 public:
259   /// \name Constructors
260   /// @{
261 
262   IEEEFloat(const fltSemantics &); // Default construct to +0.0
263   IEEEFloat(const fltSemantics &, integerPart);
264   IEEEFloat(const fltSemantics &, uninitializedTag);
265   IEEEFloat(const fltSemantics &, const APInt &);
266   explicit IEEEFloat(double d);
267   explicit IEEEFloat(float f);
268   IEEEFloat(const IEEEFloat &);
269   IEEEFloat(IEEEFloat &&);
270   ~IEEEFloat();
271 
272   /// @}
273 
274   /// Returns whether this instance allocated memory.
needsCleanup()275   bool needsCleanup() const { return partCount() > 1; }
276 
277   /// \name Convenience "constructors"
278   /// @{
279 
280   /// @}
281 
282   /// \name Arithmetic
283   /// @{
284 
285   opStatus add(const IEEEFloat &, roundingMode);
286   opStatus subtract(const IEEEFloat &, roundingMode);
287   opStatus multiply(const IEEEFloat &, roundingMode);
288   opStatus divide(const IEEEFloat &, roundingMode);
289   /// IEEE remainder.
290   opStatus remainder(const IEEEFloat &);
291   /// C fmod, or llvm frem.
292   opStatus mod(const IEEEFloat &);
293   opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode);
294   opStatus roundToIntegral(roundingMode);
295   /// IEEE-754R 5.3.1: nextUp/nextDown.
296   opStatus next(bool nextDown);
297 
298   /// @}
299 
300   /// \name Sign operations.
301   /// @{
302 
303   void changeSign();
304 
305   /// @}
306 
307   /// \name Conversions
308   /// @{
309 
310   opStatus convert(const fltSemantics &, roundingMode, bool *);
311   opStatus convertToInteger(MutableArrayRef<integerPart>, unsigned int, bool,
312                             roundingMode, bool *) const;
313   opStatus convertFromAPInt(const APInt &, bool, roundingMode);
314   opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int,
315                                           bool, roundingMode);
316   opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int,
317                                           bool, roundingMode);
318   Expected<opStatus> convertFromString(StringRef, roundingMode);
319   APInt bitcastToAPInt() const;
320   double convertToDouble() const;
321   float convertToFloat() const;
322 
323   /// @}
324 
325   /// The definition of equality is not straightforward for floating point, so
326   /// we won't use operator==.  Use one of the following, or write whatever it
327   /// is you really mean.
328   bool operator==(const IEEEFloat &) const = delete;
329 
330   /// IEEE comparison with another floating point number (NaNs compare
331   /// unordered, 0==-0).
332   cmpResult compare(const IEEEFloat &) const;
333 
334   /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
335   bool bitwiseIsEqual(const IEEEFloat &) const;
336 
337   /// Write out a hexadecimal representation of the floating point value to DST,
338   /// which must be of sufficient size, in the C99 form [-]0xh.hhhhp[+-]d.
339   /// Return the number of characters written, excluding the terminating NUL.
340   unsigned int convertToHexString(char *dst, unsigned int hexDigits,
341                                   bool upperCase, roundingMode) const;
342 
343   /// \name IEEE-754R 5.7.2 General operations.
344   /// @{
345 
346   /// IEEE-754R isSignMinus: Returns true if and only if the current value is
347   /// negative.
348   ///
349   /// This applies to zeros and NaNs as well.
isNegative()350   bool isNegative() const { return sign; }
351 
352   /// IEEE-754R isNormal: Returns true if and only if the current value is normal.
353   ///
354   /// This implies that the current value of the float is not zero, subnormal,
355   /// infinite, or NaN following the definition of normality from IEEE-754R.
isNormal()356   bool isNormal() const { return !isDenormal() && isFiniteNonZero(); }
357 
358   /// Returns true if and only if the current value is zero, subnormal, or
359   /// normal.
360   ///
361   /// This means that the value is not infinite or NaN.
isFinite()362   bool isFinite() const { return !isNaN() && !isInfinity(); }
363 
364   /// Returns true if and only if the float is plus or minus zero.
isZero()365   bool isZero() const { return category == fcZero; }
366 
367   /// IEEE-754R isSubnormal(): Returns true if and only if the float is a
368   /// denormal.
369   bool isDenormal() const;
370 
371   /// IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
isInfinity()372   bool isInfinity() const { return category == fcInfinity; }
373 
374   /// Returns true if and only if the float is a quiet or signaling NaN.
isNaN()375   bool isNaN() const { return category == fcNaN; }
376 
377   /// Returns true if and only if the float is a signaling NaN.
378   bool isSignaling() const;
379 
380   /// @}
381 
382   /// \name Simple Queries
383   /// @{
384 
getCategory()385   fltCategory getCategory() const { return category; }
getSemantics()386   const fltSemantics &getSemantics() const { return *semantics; }
isNonZero()387   bool isNonZero() const { return category != fcZero; }
isFiniteNonZero()388   bool isFiniteNonZero() const { return isFinite() && !isZero(); }
isPosZero()389   bool isPosZero() const { return isZero() && !isNegative(); }
isNegZero()390   bool isNegZero() const { return isZero() && isNegative(); }
391 
392   /// Returns true if and only if the number has the smallest possible non-zero
393   /// magnitude in the current semantics.
394   bool isSmallest() const;
395 
396   /// Returns true if this is the smallest (by magnitude) normalized finite
397   /// number in the given semantics.
398   bool isSmallestNormalized() const;
399 
400   /// Returns true if and only if the number has the largest possible finite
401   /// magnitude in the current semantics.
402   bool isLargest() const;
403 
404   /// Returns true if and only if the number is an exact integer.
405   bool isInteger() const;
406 
407   /// @}
408 
409   IEEEFloat &operator=(const IEEEFloat &);
410   IEEEFloat &operator=(IEEEFloat &&);
411 
412   /// Overload to compute a hash code for an APFloat value.
413   ///
414   /// Note that the use of hash codes for floating point values is in general
415   /// frought with peril. Equality is hard to define for these values. For
416   /// example, should negative and positive zero hash to different codes? Are
417   /// they equal or not? This hash value implementation specifically
418   /// emphasizes producing different codes for different inputs in order to
419   /// be used in canonicalization and memoization. As such, equality is
420   /// bitwiseIsEqual, and 0 != -0.
421   friend hash_code hash_value(const IEEEFloat &Arg);
422 
423   /// Converts this value into a decimal string.
424   ///
425   /// \param FormatPrecision The maximum number of digits of
426   ///   precision to output.  If there are fewer digits available,
427   ///   zero padding will not be used unless the value is
428   ///   integral and small enough to be expressed in
429   ///   FormatPrecision digits.  0 means to use the natural
430   ///   precision of the number.
431   /// \param FormatMaxPadding The maximum number of zeros to
432   ///   consider inserting before falling back to scientific
433   ///   notation.  0 means to always use scientific notation.
434   ///
435   /// \param TruncateZero Indicate whether to remove the trailing zero in
436   ///   fraction part or not. Also setting this parameter to false forcing
437   ///   producing of output more similar to default printf behavior.
438   ///   Specifically the lower e is used as exponent delimiter and exponent
439   ///   always contains no less than two digits.
440   ///
441   /// Number       Precision    MaxPadding      Result
442   /// ------       ---------    ----------      ------
443   /// 1.01E+4              5             2       10100
444   /// 1.01E+4              4             2       1.01E+4
445   /// 1.01E+4              5             1       1.01E+4
446   /// 1.01E-2              5             2       0.0101
447   /// 1.01E-2              4             2       0.0101
448   /// 1.01E-2              4             1       1.01E-2
449   void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
450                 unsigned FormatMaxPadding = 3, bool TruncateZero = true) const;
451 
452   /// If this value has an exact multiplicative inverse, store it in inv and
453   /// return true.
454   bool getExactInverse(APFloat *inv) const;
455 
456   /// Returns the exponent of the internal representation of the APFloat.
457   ///
458   /// Because the radix of APFloat is 2, this is equivalent to floor(log2(x)).
459   /// For special APFloat values, this returns special error codes:
460   ///
461   ///   NaN -> \c IEK_NaN
462   ///   0   -> \c IEK_Zero
463   ///   Inf -> \c IEK_Inf
464   ///
465   friend int ilogb(const IEEEFloat &Arg);
466 
467   /// Returns: X * 2^Exp for integral exponents.
468   friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode);
469 
470   friend IEEEFloat frexp(const IEEEFloat &X, int &Exp, roundingMode);
471 
472   /// \name Special value setters.
473   /// @{
474 
475   void makeLargest(bool Neg = false);
476   void makeSmallest(bool Neg = false);
477   void makeNaN(bool SNaN = false, bool Neg = false,
478                const APInt *fill = nullptr);
479   void makeInf(bool Neg = false);
480   void makeZero(bool Neg = false);
481   void makeQuiet();
482 
483   /// Returns the smallest (by magnitude) normalized finite number in the given
484   /// semantics.
485   ///
486   /// \param Negative - True iff the number should be negative
487   void makeSmallestNormalized(bool Negative = false);
488 
489   /// @}
490 
491   cmpResult compareAbsoluteValue(const IEEEFloat &) const;
492 
493 private:
494   /// \name Simple Queries
495   /// @{
496 
497   integerPart *significandParts();
498   const integerPart *significandParts() const;
499   unsigned int partCount() const;
500 
501   /// @}
502 
503   /// \name Significand operations.
504   /// @{
505 
506   integerPart addSignificand(const IEEEFloat &);
507   integerPart subtractSignificand(const IEEEFloat &, integerPart);
508   lostFraction addOrSubtractSignificand(const IEEEFloat &, bool subtract);
509   lostFraction multiplySignificand(const IEEEFloat &, IEEEFloat);
510   lostFraction multiplySignificand(const IEEEFloat&);
511   lostFraction divideSignificand(const IEEEFloat &);
512   void incrementSignificand();
513   void initialize(const fltSemantics *);
514   void shiftSignificandLeft(unsigned int);
515   lostFraction shiftSignificandRight(unsigned int);
516   unsigned int significandLSB() const;
517   unsigned int significandMSB() const;
518   void zeroSignificand();
519   /// Return true if the significand excluding the integral bit is all ones.
520   bool isSignificandAllOnes() const;
521   bool isSignificandAllOnesExceptLSB() const;
522   /// Return true if the significand excluding the integral bit is all zeros.
523   bool isSignificandAllZeros() const;
524   bool isSignificandAllZerosExceptMSB() const;
525 
526   /// @}
527 
528   /// \name Arithmetic on special values.
529   /// @{
530 
531   opStatus addOrSubtractSpecials(const IEEEFloat &, bool subtract);
532   opStatus divideSpecials(const IEEEFloat &);
533   opStatus multiplySpecials(const IEEEFloat &);
534   opStatus modSpecials(const IEEEFloat &);
535   opStatus remainderSpecials(const IEEEFloat&);
536 
537   /// @}
538 
539   /// \name Miscellany
540   /// @{
541 
542   bool convertFromStringSpecials(StringRef str);
543   opStatus normalize(roundingMode, lostFraction);
544   opStatus addOrSubtract(const IEEEFloat &, roundingMode, bool subtract);
545   opStatus handleOverflow(roundingMode);
546   bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const;
547   opStatus convertToSignExtendedInteger(MutableArrayRef<integerPart>,
548                                         unsigned int, bool, roundingMode,
549                                         bool *) const;
550   opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
551                                     roundingMode);
552   Expected<opStatus> convertFromHexadecimalString(StringRef, roundingMode);
553   Expected<opStatus> convertFromDecimalString(StringRef, roundingMode);
554   char *convertNormalToHexString(char *, unsigned int, bool,
555                                  roundingMode) const;
556   opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int,
557                                         roundingMode);
558   ExponentType exponentNaN() const;
559   ExponentType exponentInf() const;
560   ExponentType exponentZero() const;
561 
562   /// @}
563 
564   APInt convertHalfAPFloatToAPInt() const;
565   APInt convertBFloatAPFloatToAPInt() const;
566   APInt convertFloatAPFloatToAPInt() const;
567   APInt convertDoubleAPFloatToAPInt() const;
568   APInt convertQuadrupleAPFloatToAPInt() const;
569   APInt convertF80LongDoubleAPFloatToAPInt() const;
570   APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
571   APInt convertFloat8E5M2APFloatToAPInt() const;
572   APInt convertFloat8E4M3FNAPFloatToAPInt() const;
573   void initFromAPInt(const fltSemantics *Sem, const APInt &api);
574   void initFromHalfAPInt(const APInt &api);
575   void initFromBFloatAPInt(const APInt &api);
576   void initFromFloatAPInt(const APInt &api);
577   void initFromDoubleAPInt(const APInt &api);
578   void initFromQuadrupleAPInt(const APInt &api);
579   void initFromF80LongDoubleAPInt(const APInt &api);
580   void initFromPPCDoubleDoubleAPInt(const APInt &api);
581   void initFromFloat8E5M2APInt(const APInt &api);
582   void initFromFloat8E4M3FNAPInt(const APInt &api);
583 
584   void assign(const IEEEFloat &);
585   void copySignificand(const IEEEFloat &);
586   void freeSignificand();
587 
588   /// Note: this must be the first data member.
589   /// The semantics that this value obeys.
590   const fltSemantics *semantics;
591 
592   /// A binary fraction with an explicit integer bit.
593   ///
594   /// The significand must be at least one bit wider than the target precision.
595   union Significand {
596     integerPart part;
597     integerPart *parts;
598   } significand;
599 
600   /// The signed unbiased exponent of the value.
601   ExponentType exponent;
602 
603   /// What kind of floating point number this is.
604   ///
605   /// Only 2 bits are required, but VisualStudio incorrectly sign extends it.
606   /// Using the extra bit keeps it from failing under VisualStudio.
607   fltCategory category : 3;
608 
609   /// Sign bit of the number.
610   unsigned int sign : 1;
611 };
612 
613 hash_code hash_value(const IEEEFloat &Arg);
614 int ilogb(const IEEEFloat &Arg);
615 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode);
616 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM);
617 
618 // This mode implements more precise float in terms of two APFloats.
619 // The interface and layout is designed for arbitrary underlying semantics,
620 // though currently only PPCDoubleDouble semantics are supported, whose
621 // corresponding underlying semantics are IEEEdouble.
622 class DoubleAPFloat final : public APFloatBase {
623   // Note: this must be the first data member.
624   const fltSemantics *Semantics;
625   std::unique_ptr<APFloat[]> Floats;
626 
627   opStatus addImpl(const APFloat &a, const APFloat &aa, const APFloat &c,
628                    const APFloat &cc, roundingMode RM);
629 
630   opStatus addWithSpecial(const DoubleAPFloat &LHS, const DoubleAPFloat &RHS,
631                           DoubleAPFloat &Out, roundingMode RM);
632 
633 public:
634   DoubleAPFloat(const fltSemantics &S);
635   DoubleAPFloat(const fltSemantics &S, uninitializedTag);
636   DoubleAPFloat(const fltSemantics &S, integerPart);
637   DoubleAPFloat(const fltSemantics &S, const APInt &I);
638   DoubleAPFloat(const fltSemantics &S, APFloat &&First, APFloat &&Second);
639   DoubleAPFloat(const DoubleAPFloat &RHS);
640   DoubleAPFloat(DoubleAPFloat &&RHS);
641 
642   DoubleAPFloat &operator=(const DoubleAPFloat &RHS);
643 
644   DoubleAPFloat &operator=(DoubleAPFloat &&RHS) {
645     if (this != &RHS) {
646       this->~DoubleAPFloat();
647       new (this) DoubleAPFloat(std::move(RHS));
648     }
649     return *this;
650   }
651 
needsCleanup()652   bool needsCleanup() const { return Floats != nullptr; }
653 
getFirst()654   APFloat &getFirst() { return Floats[0]; }
getFirst()655   const APFloat &getFirst() const { return Floats[0]; }
getSecond()656   APFloat &getSecond() { return Floats[1]; }
getSecond()657   const APFloat &getSecond() const { return Floats[1]; }
658 
659   opStatus add(const DoubleAPFloat &RHS, roundingMode RM);
660   opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM);
661   opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM);
662   opStatus divide(const DoubleAPFloat &RHS, roundingMode RM);
663   opStatus remainder(const DoubleAPFloat &RHS);
664   opStatus mod(const DoubleAPFloat &RHS);
665   opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
666                             const DoubleAPFloat &Addend, roundingMode RM);
667   opStatus roundToIntegral(roundingMode RM);
668   void changeSign();
669   cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const;
670 
671   fltCategory getCategory() const;
672   bool isNegative() const;
673 
674   void makeInf(bool Neg);
675   void makeZero(bool Neg);
676   void makeLargest(bool Neg);
677   void makeSmallest(bool Neg);
678   void makeSmallestNormalized(bool Neg);
679   void makeNaN(bool SNaN, bool Neg, const APInt *fill);
680 
681   cmpResult compare(const DoubleAPFloat &RHS) const;
682   bool bitwiseIsEqual(const DoubleAPFloat &RHS) const;
683   APInt bitcastToAPInt() const;
684   Expected<opStatus> convertFromString(StringRef, roundingMode);
685   opStatus next(bool nextDown);
686 
687   opStatus convertToInteger(MutableArrayRef<integerPart> Input,
688                             unsigned int Width, bool IsSigned, roundingMode RM,
689                             bool *IsExact) const;
690   opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM);
691   opStatus convertFromSignExtendedInteger(const integerPart *Input,
692                                           unsigned int InputSize, bool IsSigned,
693                                           roundingMode RM);
694   opStatus convertFromZeroExtendedInteger(const integerPart *Input,
695                                           unsigned int InputSize, bool IsSigned,
696                                           roundingMode RM);
697   unsigned int convertToHexString(char *DST, unsigned int HexDigits,
698                                   bool UpperCase, roundingMode RM) const;
699 
700   bool isDenormal() const;
701   bool isSmallest() const;
702   bool isSmallestNormalized() const;
703   bool isLargest() const;
704   bool isInteger() const;
705 
706   void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
707                 unsigned FormatMaxPadding, bool TruncateZero = true) const;
708 
709   bool getExactInverse(APFloat *inv) const;
710 
711   friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode);
712   friend DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, roundingMode);
713   friend hash_code hash_value(const DoubleAPFloat &Arg);
714 };
715 
716 hash_code hash_value(const DoubleAPFloat &Arg);
717 
718 } // End detail namespace
719 
720 // This is a interface class that is currently forwarding functionalities from
721 // detail::IEEEFloat.
722 class APFloat : public APFloatBase {
723   typedef detail::IEEEFloat IEEEFloat;
724   typedef detail::DoubleAPFloat DoubleAPFloat;
725 
726   static_assert(std::is_standard_layout<IEEEFloat>::value);
727 
728   union Storage {
729     const fltSemantics *semantics;
730     IEEEFloat IEEE;
731     DoubleAPFloat Double;
732 
733     explicit Storage(IEEEFloat F, const fltSemantics &S);
Storage(DoubleAPFloat F,const fltSemantics & S)734     explicit Storage(DoubleAPFloat F, const fltSemantics &S)
735         : Double(std::move(F)) {
736       assert(&S == &PPCDoubleDouble());
737     }
738 
739     template <typename... ArgTypes>
Storage(const fltSemantics & Semantics,ArgTypes &&...Args)740     Storage(const fltSemantics &Semantics, ArgTypes &&... Args) {
741       if (usesLayout<IEEEFloat>(Semantics)) {
742         new (&IEEE) IEEEFloat(Semantics, std::forward<ArgTypes>(Args)...);
743         return;
744       }
745       if (usesLayout<DoubleAPFloat>(Semantics)) {
746         new (&Double) DoubleAPFloat(Semantics, std::forward<ArgTypes>(Args)...);
747         return;
748       }
749       llvm_unreachable("Unexpected semantics");
750     }
751 
~Storage()752     ~Storage() {
753       if (usesLayout<IEEEFloat>(*semantics)) {
754         IEEE.~IEEEFloat();
755         return;
756       }
757       if (usesLayout<DoubleAPFloat>(*semantics)) {
758         Double.~DoubleAPFloat();
759         return;
760       }
761       llvm_unreachable("Unexpected semantics");
762     }
763 
Storage(const Storage & RHS)764     Storage(const Storage &RHS) {
765       if (usesLayout<IEEEFloat>(*RHS.semantics)) {
766         new (this) IEEEFloat(RHS.IEEE);
767         return;
768       }
769       if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
770         new (this) DoubleAPFloat(RHS.Double);
771         return;
772       }
773       llvm_unreachable("Unexpected semantics");
774     }
775 
Storage(Storage && RHS)776     Storage(Storage &&RHS) {
777       if (usesLayout<IEEEFloat>(*RHS.semantics)) {
778         new (this) IEEEFloat(std::move(RHS.IEEE));
779         return;
780       }
781       if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
782         new (this) DoubleAPFloat(std::move(RHS.Double));
783         return;
784       }
785       llvm_unreachable("Unexpected semantics");
786     }
787 
788     Storage &operator=(const Storage &RHS) {
789       if (usesLayout<IEEEFloat>(*semantics) &&
790           usesLayout<IEEEFloat>(*RHS.semantics)) {
791         IEEE = RHS.IEEE;
792       } else if (usesLayout<DoubleAPFloat>(*semantics) &&
793                  usesLayout<DoubleAPFloat>(*RHS.semantics)) {
794         Double = RHS.Double;
795       } else if (this != &RHS) {
796         this->~Storage();
797         new (this) Storage(RHS);
798       }
799       return *this;
800     }
801 
802     Storage &operator=(Storage &&RHS) {
803       if (usesLayout<IEEEFloat>(*semantics) &&
804           usesLayout<IEEEFloat>(*RHS.semantics)) {
805         IEEE = std::move(RHS.IEEE);
806       } else if (usesLayout<DoubleAPFloat>(*semantics) &&
807                  usesLayout<DoubleAPFloat>(*RHS.semantics)) {
808         Double = std::move(RHS.Double);
809       } else if (this != &RHS) {
810         this->~Storage();
811         new (this) Storage(std::move(RHS));
812       }
813       return *this;
814     }
815   } U;
816 
usesLayout(const fltSemantics & Semantics)817   template <typename T> static bool usesLayout(const fltSemantics &Semantics) {
818     static_assert(std::is_same<T, IEEEFloat>::value ||
819                   std::is_same<T, DoubleAPFloat>::value);
820     if (std::is_same<T, DoubleAPFloat>::value) {
821       return &Semantics == &PPCDoubleDouble();
822     }
823     return &Semantics != &PPCDoubleDouble();
824   }
825 
getIEEE()826   IEEEFloat &getIEEE() {
827     if (usesLayout<IEEEFloat>(*U.semantics))
828       return U.IEEE;
829     if (usesLayout<DoubleAPFloat>(*U.semantics))
830       return U.Double.getFirst().U.IEEE;
831     llvm_unreachable("Unexpected semantics");
832   }
833 
getIEEE()834   const IEEEFloat &getIEEE() const {
835     if (usesLayout<IEEEFloat>(*U.semantics))
836       return U.IEEE;
837     if (usesLayout<DoubleAPFloat>(*U.semantics))
838       return U.Double.getFirst().U.IEEE;
839     llvm_unreachable("Unexpected semantics");
840   }
841 
makeZero(bool Neg)842   void makeZero(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeZero(Neg)); }
843 
makeInf(bool Neg)844   void makeInf(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeInf(Neg)); }
845 
makeNaN(bool SNaN,bool Neg,const APInt * fill)846   void makeNaN(bool SNaN, bool Neg, const APInt *fill) {
847     APFLOAT_DISPATCH_ON_SEMANTICS(makeNaN(SNaN, Neg, fill));
848   }
849 
makeLargest(bool Neg)850   void makeLargest(bool Neg) {
851     APFLOAT_DISPATCH_ON_SEMANTICS(makeLargest(Neg));
852   }
853 
makeSmallest(bool Neg)854   void makeSmallest(bool Neg) {
855     APFLOAT_DISPATCH_ON_SEMANTICS(makeSmallest(Neg));
856   }
857 
makeSmallestNormalized(bool Neg)858   void makeSmallestNormalized(bool Neg) {
859     APFLOAT_DISPATCH_ON_SEMANTICS(makeSmallestNormalized(Neg));
860   }
861 
APFloat(IEEEFloat F,const fltSemantics & S)862   explicit APFloat(IEEEFloat F, const fltSemantics &S) : U(std::move(F), S) {}
APFloat(DoubleAPFloat F,const fltSemantics & S)863   explicit APFloat(DoubleAPFloat F, const fltSemantics &S)
864       : U(std::move(F), S) {}
865 
compareAbsoluteValue(const APFloat & RHS)866   cmpResult compareAbsoluteValue(const APFloat &RHS) const {
867     assert(&getSemantics() == &RHS.getSemantics() &&
868            "Should only compare APFloats with the same semantics");
869     if (usesLayout<IEEEFloat>(getSemantics()))
870       return U.IEEE.compareAbsoluteValue(RHS.U.IEEE);
871     if (usesLayout<DoubleAPFloat>(getSemantics()))
872       return U.Double.compareAbsoluteValue(RHS.U.Double);
873     llvm_unreachable("Unexpected semantics");
874   }
875 
876 public:
APFloat(const fltSemantics & Semantics)877   APFloat(const fltSemantics &Semantics) : U(Semantics) {}
878   APFloat(const fltSemantics &Semantics, StringRef S);
APFloat(const fltSemantics & Semantics,integerPart I)879   APFloat(const fltSemantics &Semantics, integerPart I) : U(Semantics, I) {}
880   template <typename T,
881             typename = std::enable_if_t<std::is_floating_point<T>::value>>
882   APFloat(const fltSemantics &Semantics, T V) = delete;
883   // TODO: Remove this constructor. This isn't faster than the first one.
APFloat(const fltSemantics & Semantics,uninitializedTag)884   APFloat(const fltSemantics &Semantics, uninitializedTag)
885       : U(Semantics, uninitialized) {}
APFloat(const fltSemantics & Semantics,const APInt & I)886   APFloat(const fltSemantics &Semantics, const APInt &I) : U(Semantics, I) {}
APFloat(double d)887   explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble()) {}
APFloat(float f)888   explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle()) {}
889   APFloat(const APFloat &RHS) = default;
890   APFloat(APFloat &&RHS) = default;
891 
892   ~APFloat() = default;
893 
needsCleanup()894   bool needsCleanup() const { APFLOAT_DISPATCH_ON_SEMANTICS(needsCleanup()); }
895 
896   /// Factory for Positive and Negative Zero.
897   ///
898   /// \param Negative True iff the number should be negative.
899   static APFloat getZero(const fltSemantics &Sem, bool Negative = false) {
900     APFloat Val(Sem, uninitialized);
901     Val.makeZero(Negative);
902     return Val;
903   }
904 
905   /// Factory for Positive and Negative Infinity.
906   ///
907   /// \param Negative True iff the number should be negative.
908   static APFloat getInf(const fltSemantics &Sem, bool Negative = false) {
909     APFloat Val(Sem, uninitialized);
910     Val.makeInf(Negative);
911     return Val;
912   }
913 
914   /// Factory for NaN values.
915   ///
916   /// \param Negative - True iff the NaN generated should be negative.
917   /// \param payload - The unspecified fill bits for creating the NaN, 0 by
918   /// default.  The value is truncated as necessary.
919   static APFloat getNaN(const fltSemantics &Sem, bool Negative = false,
920                         uint64_t payload = 0) {
921     if (payload) {
922       APInt intPayload(64, payload);
923       return getQNaN(Sem, Negative, &intPayload);
924     } else {
925       return getQNaN(Sem, Negative, nullptr);
926     }
927   }
928 
929   /// Factory for QNaN values.
930   static APFloat getQNaN(const fltSemantics &Sem, bool Negative = false,
931                          const APInt *payload = nullptr) {
932     APFloat Val(Sem, uninitialized);
933     Val.makeNaN(false, Negative, payload);
934     return Val;
935   }
936 
937   /// Factory for SNaN values.
938   static APFloat getSNaN(const fltSemantics &Sem, bool Negative = false,
939                          const APInt *payload = nullptr) {
940     APFloat Val(Sem, uninitialized);
941     Val.makeNaN(true, Negative, payload);
942     return Val;
943   }
944 
945   /// Returns the largest finite number in the given semantics.
946   ///
947   /// \param Negative - True iff the number should be negative
948   static APFloat getLargest(const fltSemantics &Sem, bool Negative = false) {
949     APFloat Val(Sem, uninitialized);
950     Val.makeLargest(Negative);
951     return Val;
952   }
953 
954   /// Returns the smallest (by magnitude) finite number in the given semantics.
955   /// Might be denormalized, which implies a relative loss of precision.
956   ///
957   /// \param Negative - True iff the number should be negative
958   static APFloat getSmallest(const fltSemantics &Sem, bool Negative = false) {
959     APFloat Val(Sem, uninitialized);
960     Val.makeSmallest(Negative);
961     return Val;
962   }
963 
964   /// Returns the smallest (by magnitude) normalized finite number in the given
965   /// semantics.
966   ///
967   /// \param Negative - True iff the number should be negative
968   static APFloat getSmallestNormalized(const fltSemantics &Sem,
969                                        bool Negative = false) {
970     APFloat Val(Sem, uninitialized);
971     Val.makeSmallestNormalized(Negative);
972     return Val;
973   }
974 
975   /// Returns a float which is bitcasted from an all one value int.
976   ///
977   /// \param Semantics - type float semantics
978   static APFloat getAllOnesValue(const fltSemantics &Semantics);
979 
980   /// Used to insert APFloat objects, or objects that contain APFloat objects,
981   /// into FoldingSets.
982   void Profile(FoldingSetNodeID &NID) const;
983 
add(const APFloat & RHS,roundingMode RM)984   opStatus add(const APFloat &RHS, roundingMode RM) {
985     assert(&getSemantics() == &RHS.getSemantics() &&
986            "Should only call on two APFloats with the same semantics");
987     if (usesLayout<IEEEFloat>(getSemantics()))
988       return U.IEEE.add(RHS.U.IEEE, RM);
989     if (usesLayout<DoubleAPFloat>(getSemantics()))
990       return U.Double.add(RHS.U.Double, RM);
991     llvm_unreachable("Unexpected semantics");
992   }
subtract(const APFloat & RHS,roundingMode RM)993   opStatus subtract(const APFloat &RHS, roundingMode RM) {
994     assert(&getSemantics() == &RHS.getSemantics() &&
995            "Should only call on two APFloats with the same semantics");
996     if (usesLayout<IEEEFloat>(getSemantics()))
997       return U.IEEE.subtract(RHS.U.IEEE, RM);
998     if (usesLayout<DoubleAPFloat>(getSemantics()))
999       return U.Double.subtract(RHS.U.Double, RM);
1000     llvm_unreachable("Unexpected semantics");
1001   }
multiply(const APFloat & RHS,roundingMode RM)1002   opStatus multiply(const APFloat &RHS, roundingMode RM) {
1003     assert(&getSemantics() == &RHS.getSemantics() &&
1004            "Should only call on two APFloats with the same semantics");
1005     if (usesLayout<IEEEFloat>(getSemantics()))
1006       return U.IEEE.multiply(RHS.U.IEEE, RM);
1007     if (usesLayout<DoubleAPFloat>(getSemantics()))
1008       return U.Double.multiply(RHS.U.Double, RM);
1009     llvm_unreachable("Unexpected semantics");
1010   }
divide(const APFloat & RHS,roundingMode RM)1011   opStatus divide(const APFloat &RHS, roundingMode RM) {
1012     assert(&getSemantics() == &RHS.getSemantics() &&
1013            "Should only call on two APFloats with the same semantics");
1014     if (usesLayout<IEEEFloat>(getSemantics()))
1015       return U.IEEE.divide(RHS.U.IEEE, RM);
1016     if (usesLayout<DoubleAPFloat>(getSemantics()))
1017       return U.Double.divide(RHS.U.Double, RM);
1018     llvm_unreachable("Unexpected semantics");
1019   }
remainder(const APFloat & RHS)1020   opStatus remainder(const APFloat &RHS) {
1021     assert(&getSemantics() == &RHS.getSemantics() &&
1022            "Should only call on two APFloats with the same semantics");
1023     if (usesLayout<IEEEFloat>(getSemantics()))
1024       return U.IEEE.remainder(RHS.U.IEEE);
1025     if (usesLayout<DoubleAPFloat>(getSemantics()))
1026       return U.Double.remainder(RHS.U.Double);
1027     llvm_unreachable("Unexpected semantics");
1028   }
mod(const APFloat & RHS)1029   opStatus mod(const APFloat &RHS) {
1030     assert(&getSemantics() == &RHS.getSemantics() &&
1031            "Should only call on two APFloats with the same semantics");
1032     if (usesLayout<IEEEFloat>(getSemantics()))
1033       return U.IEEE.mod(RHS.U.IEEE);
1034     if (usesLayout<DoubleAPFloat>(getSemantics()))
1035       return U.Double.mod(RHS.U.Double);
1036     llvm_unreachable("Unexpected semantics");
1037   }
fusedMultiplyAdd(const APFloat & Multiplicand,const APFloat & Addend,roundingMode RM)1038   opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend,
1039                             roundingMode RM) {
1040     assert(&getSemantics() == &Multiplicand.getSemantics() &&
1041            "Should only call on APFloats with the same semantics");
1042     assert(&getSemantics() == &Addend.getSemantics() &&
1043            "Should only call on APFloats with the same semantics");
1044     if (usesLayout<IEEEFloat>(getSemantics()))
1045       return U.IEEE.fusedMultiplyAdd(Multiplicand.U.IEEE, Addend.U.IEEE, RM);
1046     if (usesLayout<DoubleAPFloat>(getSemantics()))
1047       return U.Double.fusedMultiplyAdd(Multiplicand.U.Double, Addend.U.Double,
1048                                        RM);
1049     llvm_unreachable("Unexpected semantics");
1050   }
roundToIntegral(roundingMode RM)1051   opStatus roundToIntegral(roundingMode RM) {
1052     APFLOAT_DISPATCH_ON_SEMANTICS(roundToIntegral(RM));
1053   }
1054 
1055   // TODO: bool parameters are not readable and a source of bugs.
1056   // Do something.
next(bool nextDown)1057   opStatus next(bool nextDown) {
1058     APFLOAT_DISPATCH_ON_SEMANTICS(next(nextDown));
1059   }
1060 
1061   /// Negate an APFloat.
1062   APFloat operator-() const {
1063     APFloat Result(*this);
1064     Result.changeSign();
1065     return Result;
1066   }
1067 
1068   /// Add two APFloats, rounding ties to the nearest even.
1069   /// No error checking.
1070   APFloat operator+(const APFloat &RHS) const {
1071     APFloat Result(*this);
1072     (void)Result.add(RHS, rmNearestTiesToEven);
1073     return Result;
1074   }
1075 
1076   /// Subtract two APFloats, rounding ties to the nearest even.
1077   /// No error checking.
1078   APFloat operator-(const APFloat &RHS) const {
1079     APFloat Result(*this);
1080     (void)Result.subtract(RHS, rmNearestTiesToEven);
1081     return Result;
1082   }
1083 
1084   /// Multiply two APFloats, rounding ties to the nearest even.
1085   /// No error checking.
1086   APFloat operator*(const APFloat &RHS) const {
1087     APFloat Result(*this);
1088     (void)Result.multiply(RHS, rmNearestTiesToEven);
1089     return Result;
1090   }
1091 
1092   /// Divide the first APFloat by the second, rounding ties to the nearest even.
1093   /// No error checking.
1094   APFloat operator/(const APFloat &RHS) const {
1095     APFloat Result(*this);
1096     (void)Result.divide(RHS, rmNearestTiesToEven);
1097     return Result;
1098   }
1099 
changeSign()1100   void changeSign() { APFLOAT_DISPATCH_ON_SEMANTICS(changeSign()); }
clearSign()1101   void clearSign() {
1102     if (isNegative())
1103       changeSign();
1104   }
copySign(const APFloat & RHS)1105   void copySign(const APFloat &RHS) {
1106     if (isNegative() != RHS.isNegative())
1107       changeSign();
1108   }
1109 
1110   /// A static helper to produce a copy of an APFloat value with its sign
1111   /// copied from some other APFloat.
copySign(APFloat Value,const APFloat & Sign)1112   static APFloat copySign(APFloat Value, const APFloat &Sign) {
1113     Value.copySign(Sign);
1114     return Value;
1115   }
1116 
1117   opStatus convert(const fltSemantics &ToSemantics, roundingMode RM,
1118                    bool *losesInfo);
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact)1119   opStatus convertToInteger(MutableArrayRef<integerPart> Input,
1120                             unsigned int Width, bool IsSigned, roundingMode RM,
1121                             bool *IsExact) const {
1122     APFLOAT_DISPATCH_ON_SEMANTICS(
1123         convertToInteger(Input, Width, IsSigned, RM, IsExact));
1124   }
1125   opStatus convertToInteger(APSInt &Result, roundingMode RM,
1126                             bool *IsExact) const;
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)1127   opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
1128                             roundingMode RM) {
1129     APFLOAT_DISPATCH_ON_SEMANTICS(convertFromAPInt(Input, IsSigned, RM));
1130   }
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)1131   opStatus convertFromSignExtendedInteger(const integerPart *Input,
1132                                           unsigned int InputSize, bool IsSigned,
1133                                           roundingMode RM) {
1134     APFLOAT_DISPATCH_ON_SEMANTICS(
1135         convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM));
1136   }
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)1137   opStatus convertFromZeroExtendedInteger(const integerPart *Input,
1138                                           unsigned int InputSize, bool IsSigned,
1139                                           roundingMode RM) {
1140     APFLOAT_DISPATCH_ON_SEMANTICS(
1141         convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM));
1142   }
1143   Expected<opStatus> convertFromString(StringRef, roundingMode);
bitcastToAPInt()1144   APInt bitcastToAPInt() const {
1145     APFLOAT_DISPATCH_ON_SEMANTICS(bitcastToAPInt());
1146   }
1147 
1148   /// Converts this APFloat to host double value.
1149   ///
1150   /// \pre The APFloat must be built using semantics, that can be represented by
1151   /// the host double type without loss of precision. It can be IEEEdouble and
1152   /// shorter semantics, like IEEEsingle and others.
1153   double convertToDouble() const;
1154 
1155   /// Converts this APFloat to host float value.
1156   ///
1157   /// \pre The APFloat must be built using semantics, that can be represented by
1158   /// the host float type without loss of precision. It can be IEEEsingle and
1159   /// shorter semantics, like IEEEhalf.
1160   float convertToFloat() const;
1161 
1162   bool operator==(const APFloat &RHS) const { return compare(RHS) == cmpEqual; }
1163 
1164   bool operator!=(const APFloat &RHS) const { return compare(RHS) != cmpEqual; }
1165 
1166   bool operator<(const APFloat &RHS) const {
1167     return compare(RHS) == cmpLessThan;
1168   }
1169 
1170   bool operator>(const APFloat &RHS) const {
1171     return compare(RHS) == cmpGreaterThan;
1172   }
1173 
1174   bool operator<=(const APFloat &RHS) const {
1175     cmpResult Res = compare(RHS);
1176     return Res == cmpLessThan || Res == cmpEqual;
1177   }
1178 
1179   bool operator>=(const APFloat &RHS) const {
1180     cmpResult Res = compare(RHS);
1181     return Res == cmpGreaterThan || Res == cmpEqual;
1182   }
1183 
compare(const APFloat & RHS)1184   cmpResult compare(const APFloat &RHS) const {
1185     assert(&getSemantics() == &RHS.getSemantics() &&
1186            "Should only compare APFloats with the same semantics");
1187     if (usesLayout<IEEEFloat>(getSemantics()))
1188       return U.IEEE.compare(RHS.U.IEEE);
1189     if (usesLayout<DoubleAPFloat>(getSemantics()))
1190       return U.Double.compare(RHS.U.Double);
1191     llvm_unreachable("Unexpected semantics");
1192   }
1193 
bitwiseIsEqual(const APFloat & RHS)1194   bool bitwiseIsEqual(const APFloat &RHS) const {
1195     if (&getSemantics() != &RHS.getSemantics())
1196       return false;
1197     if (usesLayout<IEEEFloat>(getSemantics()))
1198       return U.IEEE.bitwiseIsEqual(RHS.U.IEEE);
1199     if (usesLayout<DoubleAPFloat>(getSemantics()))
1200       return U.Double.bitwiseIsEqual(RHS.U.Double);
1201     llvm_unreachable("Unexpected semantics");
1202   }
1203 
1204   /// We don't rely on operator== working on double values, as
1205   /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1206   /// As such, this method can be used to do an exact bit-for-bit comparison of
1207   /// two floating point values.
1208   ///
1209   /// We leave the version with the double argument here because it's just so
1210   /// convenient to write "2.0" and the like.  Without this function we'd
1211   /// have to duplicate its logic everywhere it's called.
isExactlyValue(double V)1212   bool isExactlyValue(double V) const {
1213     bool ignored;
1214     APFloat Tmp(V);
1215     Tmp.convert(getSemantics(), APFloat::rmNearestTiesToEven, &ignored);
1216     return bitwiseIsEqual(Tmp);
1217   }
1218 
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM)1219   unsigned int convertToHexString(char *DST, unsigned int HexDigits,
1220                                   bool UpperCase, roundingMode RM) const {
1221     APFLOAT_DISPATCH_ON_SEMANTICS(
1222         convertToHexString(DST, HexDigits, UpperCase, RM));
1223   }
1224 
isZero()1225   bool isZero() const { return getCategory() == fcZero; }
isInfinity()1226   bool isInfinity() const { return getCategory() == fcInfinity; }
isNaN()1227   bool isNaN() const { return getCategory() == fcNaN; }
1228 
isNegative()1229   bool isNegative() const { return getIEEE().isNegative(); }
isDenormal()1230   bool isDenormal() const { APFLOAT_DISPATCH_ON_SEMANTICS(isDenormal()); }
isSignaling()1231   bool isSignaling() const { return getIEEE().isSignaling(); }
1232 
isNormal()1233   bool isNormal() const { return !isDenormal() && isFiniteNonZero(); }
isFinite()1234   bool isFinite() const { return !isNaN() && !isInfinity(); }
1235 
getCategory()1236   fltCategory getCategory() const { return getIEEE().getCategory(); }
getSemantics()1237   const fltSemantics &getSemantics() const { return *U.semantics; }
isNonZero()1238   bool isNonZero() const { return !isZero(); }
isFiniteNonZero()1239   bool isFiniteNonZero() const { return isFinite() && !isZero(); }
isPosZero()1240   bool isPosZero() const { return isZero() && !isNegative(); }
isNegZero()1241   bool isNegZero() const { return isZero() && isNegative(); }
isPosInfinity()1242   bool isPosInfinity() const { return isInfinity() && !isNegative(); }
isNegInfinity()1243   bool isNegInfinity() const { return isInfinity() && isNegative(); }
isSmallest()1244   bool isSmallest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isSmallest()); }
isLargest()1245   bool isLargest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isLargest()); }
isInteger()1246   bool isInteger() const { APFLOAT_DISPATCH_ON_SEMANTICS(isInteger()); }
isIEEE()1247   bool isIEEE() const { return usesLayout<IEEEFloat>(getSemantics()); }
1248 
isSmallestNormalized()1249   bool isSmallestNormalized() const {
1250     APFLOAT_DISPATCH_ON_SEMANTICS(isSmallestNormalized());
1251   }
1252 
1253   APFloat &operator=(const APFloat &RHS) = default;
1254   APFloat &operator=(APFloat &&RHS) = default;
1255 
1256   void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
1257                 unsigned FormatMaxPadding = 3, bool TruncateZero = true) const {
1258     APFLOAT_DISPATCH_ON_SEMANTICS(
1259         toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero));
1260   }
1261 
1262   void print(raw_ostream &) const;
1263   void dump() const;
1264 
getExactInverse(APFloat * inv)1265   bool getExactInverse(APFloat *inv) const {
1266     APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv));
1267   }
1268 
1269   friend hash_code hash_value(const APFloat &Arg);
ilogb(const APFloat & Arg)1270   friend int ilogb(const APFloat &Arg) { return ilogb(Arg.getIEEE()); }
1271   friend APFloat scalbn(APFloat X, int Exp, roundingMode RM);
1272   friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM);
1273   friend IEEEFloat;
1274   friend DoubleAPFloat;
1275 };
1276 
1277 /// See friend declarations above.
1278 ///
1279 /// These additional declarations are required in order to compile LLVM with IBM
1280 /// xlC compiler.
1281 hash_code hash_value(const APFloat &Arg);
scalbn(APFloat X,int Exp,APFloat::roundingMode RM)1282 inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) {
1283   if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics()))
1284     return APFloat(scalbn(X.U.IEEE, Exp, RM), X.getSemantics());
1285   if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics()))
1286     return APFloat(scalbn(X.U.Double, Exp, RM), X.getSemantics());
1287   llvm_unreachable("Unexpected semantics");
1288 }
1289 
1290 /// Equivalent of C standard library function.
1291 ///
1292 /// While the C standard says Exp is an unspecified value for infinity and nan,
1293 /// this returns INT_MAX for infinities, and INT_MIN for NaNs.
frexp(const APFloat & X,int & Exp,APFloat::roundingMode RM)1294 inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) {
1295   if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics()))
1296     return APFloat(frexp(X.U.IEEE, Exp, RM), X.getSemantics());
1297   if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics()))
1298     return APFloat(frexp(X.U.Double, Exp, RM), X.getSemantics());
1299   llvm_unreachable("Unexpected semantics");
1300 }
1301 /// Returns the absolute value of the argument.
abs(APFloat X)1302 inline APFloat abs(APFloat X) {
1303   X.clearSign();
1304   return X;
1305 }
1306 
1307 /// Returns the negated value of the argument.
neg(APFloat X)1308 inline APFloat neg(APFloat X) {
1309   X.changeSign();
1310   return X;
1311 }
1312 
1313 /// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if
1314 /// both are not NaN. If either argument is a NaN, returns the other argument.
1315 LLVM_READONLY
minnum(const APFloat & A,const APFloat & B)1316 inline APFloat minnum(const APFloat &A, const APFloat &B) {
1317   if (A.isNaN())
1318     return B;
1319   if (B.isNaN())
1320     return A;
1321   return B < A ? B : A;
1322 }
1323 
1324 /// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if
1325 /// both are not NaN. If either argument is a NaN, returns the other argument.
1326 LLVM_READONLY
maxnum(const APFloat & A,const APFloat & B)1327 inline APFloat maxnum(const APFloat &A, const APFloat &B) {
1328   if (A.isNaN())
1329     return B;
1330   if (B.isNaN())
1331     return A;
1332   return A < B ? B : A;
1333 }
1334 
1335 /// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2
1336 /// arguments, propagating NaNs and treating -0 as less than +0.
1337 LLVM_READONLY
minimum(const APFloat & A,const APFloat & B)1338 inline APFloat minimum(const APFloat &A, const APFloat &B) {
1339   if (A.isNaN())
1340     return A;
1341   if (B.isNaN())
1342     return B;
1343   if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
1344     return A.isNegative() ? A : B;
1345   return B < A ? B : A;
1346 }
1347 
1348 /// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2
1349 /// arguments, propagating NaNs and treating -0 as less than +0.
1350 LLVM_READONLY
maximum(const APFloat & A,const APFloat & B)1351 inline APFloat maximum(const APFloat &A, const APFloat &B) {
1352   if (A.isNaN())
1353     return A;
1354   if (B.isNaN())
1355     return B;
1356   if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
1357     return A.isNegative() ? B : A;
1358   return A < B ? B : A;
1359 }
1360 
1361 } // namespace llvm
1362 
1363 #undef APFLOAT_DISPATCH_ON_SEMANTICS
1364 #endif // LLVM_ADT_APFLOAT_H
1365