1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
30 do { \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
36 } while (false)
37
38 using namespace llvm;
39
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51
52 namespace llvm {
53
54 // How the nonfinite values Inf and NaN are represented.
55 enum class fltNonfiniteBehavior {
56 // Represents standard IEEE 754 behavior. A value is nonfinite if the
57 // exponent field is all 1s. In such cases, a value is Inf if the
58 // significand bits are all zero, and NaN otherwise
59 IEEE754,
60
61 // Only the Float8E5M2 has this behavior. There is no Inf representation. A
62 // value is NaN if the exponent field and the mantissa field are all 1s.
63 // This behavior matches the FP8 E4M3 type described in
64 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
65 // as non-signalling, although the paper does not state whether the NaN
66 // values are signalling or not.
67 NanOnly,
68 };
69
70 /* Represents floating point arithmetic semantics. */
71 struct fltSemantics {
72 /* The largest E such that 2^E is representable; this matches the
73 definition of IEEE 754. */
74 APFloatBase::ExponentType maxExponent;
75
76 /* The smallest E such that 2^E is a normalized number; this
77 matches the definition of IEEE 754. */
78 APFloatBase::ExponentType minExponent;
79
80 /* Number of bits in the significand. This includes the integer
81 bit. */
82 unsigned int precision;
83
84 /* Number of bits actually used in the semantics. */
85 unsigned int sizeInBits;
86
87 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
88
89 // Returns true if any number described by this semantics can be precisely
90 // represented by the specified semantics. Does not take into account
91 // the value of fltNonfiniteBehavior.
isRepresentableByllvm::fltSemantics92 bool isRepresentableBy(const fltSemantics &S) const {
93 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
94 precision <= S.precision;
95 }
96 };
97
98 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
99 static const fltSemantics semBFloat = {127, -126, 8, 16};
100 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
101 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
102 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
103 static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
104 static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
105 fltNonfiniteBehavior::NanOnly};
106 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
107 static const fltSemantics semBogus = {0, 0, 0, 0};
108
109 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
110 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
111 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
112 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
113 to each other, and two 11-bit exponents.
114
115 Note: we need to make the value different from semBogus as otherwise
116 an unsafe optimization may collapse both values to a single address,
117 and we heavily rely on them having distinct addresses. */
118 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
119
120 /* These are legacy semantics for the fallback, inaccrurate implementation of
121 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
122 operation. It's equivalent to having an IEEE number with consecutive 106
123 bits of mantissa and 11 bits of exponent.
124
125 It's not equivalent to IBM double-double. For example, a legit IBM
126 double-double, 1 + epsilon:
127
128 1 + epsilon = 1 + (1 >> 1076)
129
130 is not representable by a consecutive 106 bits of mantissa.
131
132 Currently, these semantics are used in the following way:
133
134 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
135 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
136 semPPCDoubleDoubleLegacy -> IEEE operations
137
138 We use bitcastToAPInt() to get the bit representation (in APInt) of the
139 underlying IEEEdouble, then use the APInt constructor to construct the
140 legacy IEEE float.
141
142 TODO: Implement all operations in semPPCDoubleDouble, and delete these
143 semantics. */
144 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
145 53 + 53, 128};
146
EnumToSemantics(Semantics S)147 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
148 switch (S) {
149 case S_IEEEhalf:
150 return IEEEhalf();
151 case S_BFloat:
152 return BFloat();
153 case S_IEEEsingle:
154 return IEEEsingle();
155 case S_IEEEdouble:
156 return IEEEdouble();
157 case S_IEEEquad:
158 return IEEEquad();
159 case S_PPCDoubleDouble:
160 return PPCDoubleDouble();
161 case S_Float8E5M2:
162 return Float8E5M2();
163 case S_Float8E4M3FN:
164 return Float8E4M3FN();
165 case S_x87DoubleExtended:
166 return x87DoubleExtended();
167 }
168 llvm_unreachable("Unrecognised floating semantics");
169 }
170
171 APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)172 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
173 if (&Sem == &llvm::APFloat::IEEEhalf())
174 return S_IEEEhalf;
175 else if (&Sem == &llvm::APFloat::BFloat())
176 return S_BFloat;
177 else if (&Sem == &llvm::APFloat::IEEEsingle())
178 return S_IEEEsingle;
179 else if (&Sem == &llvm::APFloat::IEEEdouble())
180 return S_IEEEdouble;
181 else if (&Sem == &llvm::APFloat::IEEEquad())
182 return S_IEEEquad;
183 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
184 return S_PPCDoubleDouble;
185 else if (&Sem == &llvm::APFloat::Float8E5M2())
186 return S_Float8E5M2;
187 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
188 return S_Float8E4M3FN;
189 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
190 return S_x87DoubleExtended;
191 else
192 llvm_unreachable("Unknown floating semantics");
193 }
194
IEEEhalf()195 const fltSemantics &APFloatBase::IEEEhalf() {
196 return semIEEEhalf;
197 }
BFloat()198 const fltSemantics &APFloatBase::BFloat() {
199 return semBFloat;
200 }
IEEEsingle()201 const fltSemantics &APFloatBase::IEEEsingle() {
202 return semIEEEsingle;
203 }
IEEEdouble()204 const fltSemantics &APFloatBase::IEEEdouble() {
205 return semIEEEdouble;
206 }
IEEEquad()207 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
PPCDoubleDouble()208 const fltSemantics &APFloatBase::PPCDoubleDouble() {
209 return semPPCDoubleDouble;
210 }
Float8E5M2()211 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
Float8E4M3FN()212 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
x87DoubleExtended()213 const fltSemantics &APFloatBase::x87DoubleExtended() {
214 return semX87DoubleExtended;
215 }
Bogus()216 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
217
218 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
219 constexpr RoundingMode APFloatBase::rmTowardPositive;
220 constexpr RoundingMode APFloatBase::rmTowardNegative;
221 constexpr RoundingMode APFloatBase::rmTowardZero;
222 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
223
224 /* A tight upper bound on number of parts required to hold the value
225 pow(5, power) is
226
227 power * 815 / (351 * integerPartWidth) + 1
228
229 However, whilst the result may require only this many parts,
230 because we are multiplying two values to get it, the
231 multiplication may require an extra part with the excess part
232 being zero (consider the trivial case of 1 * 1, tcFullMultiply
233 requires two parts to hold the single-part result). So we add an
234 extra one to guarantee enough space whilst multiplying. */
235 const unsigned int maxExponent = 16383;
236 const unsigned int maxPrecision = 113;
237 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
238 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
239
semanticsPrecision(const fltSemantics & semantics)240 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
241 return semantics.precision;
242 }
243 APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)244 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
245 return semantics.maxExponent;
246 }
247 APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)248 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
249 return semantics.minExponent;
250 }
semanticsSizeInBits(const fltSemantics & semantics)251 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
252 return semantics.sizeInBits;
253 }
254
getSizeInBits(const fltSemantics & Sem)255 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
256 return Sem.sizeInBits;
257 }
258
259 /* A bunch of private, handy routines. */
260
createError(const Twine & Err)261 static inline Error createError(const Twine &Err) {
262 return make_error<StringError>(Err, inconvertibleErrorCode());
263 }
264
265 static inline unsigned int
partCountForBits(unsigned int bits)266 partCountForBits(unsigned int bits)
267 {
268 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
269 }
270
271 /* Returns 0U-9U. Return values >= 10U are not digits. */
272 static inline unsigned int
decDigitValue(unsigned int c)273 decDigitValue(unsigned int c)
274 {
275 return c - '0';
276 }
277
278 /* Return the value of a decimal exponent of the form
279 [+-]ddddddd.
280
281 If the exponent overflows, returns a large exponent with the
282 appropriate sign. */
readExponent(StringRef::iterator begin,StringRef::iterator end)283 static Expected<int> readExponent(StringRef::iterator begin,
284 StringRef::iterator end) {
285 bool isNegative;
286 unsigned int absExponent;
287 const unsigned int overlargeExponent = 24000; /* FIXME. */
288 StringRef::iterator p = begin;
289
290 // Treat no exponent as 0 to match binutils
291 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
292 return 0;
293 }
294
295 isNegative = (*p == '-');
296 if (*p == '-' || *p == '+') {
297 p++;
298 if (p == end)
299 return createError("Exponent has no digits");
300 }
301
302 absExponent = decDigitValue(*p++);
303 if (absExponent >= 10U)
304 return createError("Invalid character in exponent");
305
306 for (; p != end; ++p) {
307 unsigned int value;
308
309 value = decDigitValue(*p);
310 if (value >= 10U)
311 return createError("Invalid character in exponent");
312
313 absExponent = absExponent * 10U + value;
314 if (absExponent >= overlargeExponent) {
315 absExponent = overlargeExponent;
316 break;
317 }
318 }
319
320 if (isNegative)
321 return -(int) absExponent;
322 else
323 return (int) absExponent;
324 }
325
326 /* This is ugly and needs cleaning up, but I don't immediately see
327 how whilst remaining safe. */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)328 static Expected<int> totalExponent(StringRef::iterator p,
329 StringRef::iterator end,
330 int exponentAdjustment) {
331 int unsignedExponent;
332 bool negative, overflow;
333 int exponent = 0;
334
335 if (p == end)
336 return createError("Exponent has no digits");
337
338 negative = *p == '-';
339 if (*p == '-' || *p == '+') {
340 p++;
341 if (p == end)
342 return createError("Exponent has no digits");
343 }
344
345 unsignedExponent = 0;
346 overflow = false;
347 for (; p != end; ++p) {
348 unsigned int value;
349
350 value = decDigitValue(*p);
351 if (value >= 10U)
352 return createError("Invalid character in exponent");
353
354 unsignedExponent = unsignedExponent * 10 + value;
355 if (unsignedExponent > 32767) {
356 overflow = true;
357 break;
358 }
359 }
360
361 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
362 overflow = true;
363
364 if (!overflow) {
365 exponent = unsignedExponent;
366 if (negative)
367 exponent = -exponent;
368 exponent += exponentAdjustment;
369 if (exponent > 32767 || exponent < -32768)
370 overflow = true;
371 }
372
373 if (overflow)
374 exponent = negative ? -32768: 32767;
375
376 return exponent;
377 }
378
379 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)380 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
381 StringRef::iterator *dot) {
382 StringRef::iterator p = begin;
383 *dot = end;
384 while (p != end && *p == '0')
385 p++;
386
387 if (p != end && *p == '.') {
388 *dot = p++;
389
390 if (end - begin == 1)
391 return createError("Significand has no digits");
392
393 while (p != end && *p == '0')
394 p++;
395 }
396
397 return p;
398 }
399
400 /* Given a normal decimal floating point number of the form
401
402 dddd.dddd[eE][+-]ddd
403
404 where the decimal point and exponent are optional, fill out the
405 structure D. Exponent is appropriate if the significand is
406 treated as an integer, and normalizedExponent if the significand
407 is taken to have the decimal point after a single leading
408 non-zero digit.
409
410 If the value is zero, V->firstSigDigit points to a non-digit, and
411 the return exponent is zero.
412 */
413 struct decimalInfo {
414 const char *firstSigDigit;
415 const char *lastSigDigit;
416 int exponent;
417 int normalizedExponent;
418 };
419
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)420 static Error interpretDecimal(StringRef::iterator begin,
421 StringRef::iterator end, decimalInfo *D) {
422 StringRef::iterator dot = end;
423
424 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
425 if (!PtrOrErr)
426 return PtrOrErr.takeError();
427 StringRef::iterator p = *PtrOrErr;
428
429 D->firstSigDigit = p;
430 D->exponent = 0;
431 D->normalizedExponent = 0;
432
433 for (; p != end; ++p) {
434 if (*p == '.') {
435 if (dot != end)
436 return createError("String contains multiple dots");
437 dot = p++;
438 if (p == end)
439 break;
440 }
441 if (decDigitValue(*p) >= 10U)
442 break;
443 }
444
445 if (p != end) {
446 if (*p != 'e' && *p != 'E')
447 return createError("Invalid character in significand");
448 if (p == begin)
449 return createError("Significand has no digits");
450 if (dot != end && p - begin == 1)
451 return createError("Significand has no digits");
452
453 /* p points to the first non-digit in the string */
454 auto ExpOrErr = readExponent(p + 1, end);
455 if (!ExpOrErr)
456 return ExpOrErr.takeError();
457 D->exponent = *ExpOrErr;
458
459 /* Implied decimal point? */
460 if (dot == end)
461 dot = p;
462 }
463
464 /* If number is all zeroes accept any exponent. */
465 if (p != D->firstSigDigit) {
466 /* Drop insignificant trailing zeroes. */
467 if (p != begin) {
468 do
469 do
470 p--;
471 while (p != begin && *p == '0');
472 while (p != begin && *p == '.');
473 }
474
475 /* Adjust the exponents for any decimal point. */
476 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
477 D->normalizedExponent = (D->exponent +
478 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
479 - (dot > D->firstSigDigit && dot < p)));
480 }
481
482 D->lastSigDigit = p;
483 return Error::success();
484 }
485
486 /* Return the trailing fraction of a hexadecimal number.
487 DIGITVALUE is the first hex digit of the fraction, P points to
488 the next digit. */
489 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)490 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
491 unsigned int digitValue) {
492 unsigned int hexDigit;
493
494 /* If the first trailing digit isn't 0 or 8 we can work out the
495 fraction immediately. */
496 if (digitValue > 8)
497 return lfMoreThanHalf;
498 else if (digitValue < 8 && digitValue > 0)
499 return lfLessThanHalf;
500
501 // Otherwise we need to find the first non-zero digit.
502 while (p != end && (*p == '0' || *p == '.'))
503 p++;
504
505 if (p == end)
506 return createError("Invalid trailing hexadecimal fraction!");
507
508 hexDigit = hexDigitValue(*p);
509
510 /* If we ran off the end it is exactly zero or one-half, otherwise
511 a little more. */
512 if (hexDigit == -1U)
513 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
514 else
515 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
516 }
517
518 /* Return the fraction lost were a bignum truncated losing the least
519 significant BITS bits. */
520 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)521 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
522 unsigned int partCount,
523 unsigned int bits)
524 {
525 unsigned int lsb;
526
527 lsb = APInt::tcLSB(parts, partCount);
528
529 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
530 if (bits <= lsb)
531 return lfExactlyZero;
532 if (bits == lsb + 1)
533 return lfExactlyHalf;
534 if (bits <= partCount * APFloatBase::integerPartWidth &&
535 APInt::tcExtractBit(parts, bits - 1))
536 return lfMoreThanHalf;
537
538 return lfLessThanHalf;
539 }
540
541 /* Shift DST right BITS bits noting lost fraction. */
542 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)543 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
544 {
545 lostFraction lost_fraction;
546
547 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
548
549 APInt::tcShiftRight(dst, parts, bits);
550
551 return lost_fraction;
552 }
553
554 /* Combine the effect of two lost fractions. */
555 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)556 combineLostFractions(lostFraction moreSignificant,
557 lostFraction lessSignificant)
558 {
559 if (lessSignificant != lfExactlyZero) {
560 if (moreSignificant == lfExactlyZero)
561 moreSignificant = lfLessThanHalf;
562 else if (moreSignificant == lfExactlyHalf)
563 moreSignificant = lfMoreThanHalf;
564 }
565
566 return moreSignificant;
567 }
568
569 /* The error from the true value, in half-ulps, on multiplying two
570 floating point numbers, which differ from the value they
571 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
572 than the returned value.
573
574 See "How to Read Floating Point Numbers Accurately" by William D
575 Clinger. */
576 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)577 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
578 {
579 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
580
581 if (HUerr1 + HUerr2 == 0)
582 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
583 else
584 return inexactMultiply + 2 * (HUerr1 + HUerr2);
585 }
586
587 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
588 when the least significant BITS are truncated. BITS cannot be
589 zero. */
590 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)591 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
592 bool isNearest) {
593 unsigned int count, partBits;
594 APFloatBase::integerPart part, boundary;
595
596 assert(bits != 0);
597
598 bits--;
599 count = bits / APFloatBase::integerPartWidth;
600 partBits = bits % APFloatBase::integerPartWidth + 1;
601
602 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
603
604 if (isNearest)
605 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
606 else
607 boundary = 0;
608
609 if (count == 0) {
610 if (part - boundary <= boundary - part)
611 return part - boundary;
612 else
613 return boundary - part;
614 }
615
616 if (part == boundary) {
617 while (--count)
618 if (parts[count])
619 return ~(APFloatBase::integerPart) 0; /* A lot. */
620
621 return parts[0];
622 } else if (part == boundary - 1) {
623 while (--count)
624 if (~parts[count])
625 return ~(APFloatBase::integerPart) 0; /* A lot. */
626
627 return -parts[0];
628 }
629
630 return ~(APFloatBase::integerPart) 0; /* A lot. */
631 }
632
633 /* Place pow(5, power) in DST, and return the number of parts used.
634 DST must be at least one part larger than size of the answer. */
635 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)636 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
637 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
638 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
639 pow5s[0] = 78125 * 5;
640
641 unsigned int partsCount[16] = { 1 };
642 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
643 unsigned int result;
644 assert(power <= maxExponent);
645
646 p1 = dst;
647 p2 = scratch;
648
649 *p1 = firstEightPowers[power & 7];
650 power >>= 3;
651
652 result = 1;
653 pow5 = pow5s;
654
655 for (unsigned int n = 0; power; power >>= 1, n++) {
656 unsigned int pc;
657
658 pc = partsCount[n];
659
660 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
661 if (pc == 0) {
662 pc = partsCount[n - 1];
663 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
664 pc *= 2;
665 if (pow5[pc - 1] == 0)
666 pc--;
667 partsCount[n] = pc;
668 }
669
670 if (power & 1) {
671 APFloatBase::integerPart *tmp;
672
673 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
674 result += pc;
675 if (p2[result - 1] == 0)
676 result--;
677
678 /* Now result is in p1 with partsCount parts and p2 is scratch
679 space. */
680 tmp = p1;
681 p1 = p2;
682 p2 = tmp;
683 }
684
685 pow5 += pc;
686 }
687
688 if (p1 != dst)
689 APInt::tcAssign(dst, p1, result);
690
691 return result;
692 }
693
694 /* Zero at the end to avoid modular arithmetic when adding one; used
695 when rounding up during hexadecimal output. */
696 static const char hexDigitsLower[] = "0123456789abcdef0";
697 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
698 static const char infinityL[] = "infinity";
699 static const char infinityU[] = "INFINITY";
700 static const char NaNL[] = "nan";
701 static const char NaNU[] = "NAN";
702
703 /* Write out an integerPart in hexadecimal, starting with the most
704 significant nibble. Write out exactly COUNT hexdigits, return
705 COUNT. */
706 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)707 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
708 const char *hexDigitChars)
709 {
710 unsigned int result = count;
711
712 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
713
714 part >>= (APFloatBase::integerPartWidth - 4 * count);
715 while (count--) {
716 dst[count] = hexDigitChars[part & 0xf];
717 part >>= 4;
718 }
719
720 return result;
721 }
722
723 /* Write out an unsigned decimal integer. */
724 static char *
writeUnsignedDecimal(char * dst,unsigned int n)725 writeUnsignedDecimal (char *dst, unsigned int n)
726 {
727 char buff[40], *p;
728
729 p = buff;
730 do
731 *p++ = '0' + n % 10;
732 while (n /= 10);
733
734 do
735 *dst++ = *--p;
736 while (p != buff);
737
738 return dst;
739 }
740
741 /* Write out a signed decimal integer. */
742 static char *
writeSignedDecimal(char * dst,int value)743 writeSignedDecimal (char *dst, int value)
744 {
745 if (value < 0) {
746 *dst++ = '-';
747 dst = writeUnsignedDecimal(dst, -(unsigned) value);
748 } else
749 dst = writeUnsignedDecimal(dst, value);
750
751 return dst;
752 }
753
754 namespace detail {
755 /* Constructors. */
initialize(const fltSemantics * ourSemantics)756 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
757 unsigned int count;
758
759 semantics = ourSemantics;
760 count = partCount();
761 if (count > 1)
762 significand.parts = new integerPart[count];
763 }
764
freeSignificand()765 void IEEEFloat::freeSignificand() {
766 if (needsCleanup())
767 delete [] significand.parts;
768 }
769
assign(const IEEEFloat & rhs)770 void IEEEFloat::assign(const IEEEFloat &rhs) {
771 assert(semantics == rhs.semantics);
772
773 sign = rhs.sign;
774 category = rhs.category;
775 exponent = rhs.exponent;
776 if (isFiniteNonZero() || category == fcNaN)
777 copySignificand(rhs);
778 }
779
copySignificand(const IEEEFloat & rhs)780 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
781 assert(isFiniteNonZero() || category == fcNaN);
782 assert(rhs.partCount() >= partCount());
783
784 APInt::tcAssign(significandParts(), rhs.significandParts(),
785 partCount());
786 }
787
788 /* Make this number a NaN, with an arbitrary but deterministic value
789 for the significand. If double or longer, this is a signalling NaN,
790 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)791 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
792 category = fcNaN;
793 sign = Negative;
794 exponent = exponentNaN();
795
796 integerPart *significand = significandParts();
797 unsigned numParts = partCount();
798
799 APInt fill_storage;
800 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
801 // The only NaN representation is where the mantissa is all 1s, which is
802 // non-signalling.
803 SNaN = false;
804 fill_storage = APInt::getAllOnes(semantics->precision - 1);
805 fill = &fill_storage;
806 }
807
808 // Set the significand bits to the fill.
809 if (!fill || fill->getNumWords() < numParts)
810 APInt::tcSet(significand, 0, numParts);
811 if (fill) {
812 APInt::tcAssign(significand, fill->getRawData(),
813 std::min(fill->getNumWords(), numParts));
814
815 // Zero out the excess bits of the significand.
816 unsigned bitsToPreserve = semantics->precision - 1;
817 unsigned part = bitsToPreserve / 64;
818 bitsToPreserve %= 64;
819 significand[part] &= ((1ULL << bitsToPreserve) - 1);
820 for (part++; part != numParts; ++part)
821 significand[part] = 0;
822 }
823
824 unsigned QNaNBit = semantics->precision - 2;
825
826 if (SNaN) {
827 // We always have to clear the QNaN bit to make it an SNaN.
828 APInt::tcClearBit(significand, QNaNBit);
829
830 // If there are no bits set in the payload, we have to set
831 // *something* to make it a NaN instead of an infinity;
832 // conventionally, this is the next bit down from the QNaN bit.
833 if (APInt::tcIsZero(significand, numParts))
834 APInt::tcSetBit(significand, QNaNBit - 1);
835 } else {
836 // We always have to set the QNaN bit to make it a QNaN.
837 APInt::tcSetBit(significand, QNaNBit);
838 }
839
840 // For x87 extended precision, we want to make a NaN, not a
841 // pseudo-NaN. Maybe we should expose the ability to make
842 // pseudo-NaNs?
843 if (semantics == &semX87DoubleExtended)
844 APInt::tcSetBit(significand, QNaNBit + 1);
845 }
846
operator =(const IEEEFloat & rhs)847 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
848 if (this != &rhs) {
849 if (semantics != rhs.semantics) {
850 freeSignificand();
851 initialize(rhs.semantics);
852 }
853 assign(rhs);
854 }
855
856 return *this;
857 }
858
operator =(IEEEFloat && rhs)859 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
860 freeSignificand();
861
862 semantics = rhs.semantics;
863 significand = rhs.significand;
864 exponent = rhs.exponent;
865 category = rhs.category;
866 sign = rhs.sign;
867
868 rhs.semantics = &semBogus;
869 return *this;
870 }
871
isDenormal() const872 bool IEEEFloat::isDenormal() const {
873 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
874 (APInt::tcExtractBit(significandParts(),
875 semantics->precision - 1) == 0);
876 }
877
isSmallest() const878 bool IEEEFloat::isSmallest() const {
879 // The smallest number by magnitude in our format will be the smallest
880 // denormal, i.e. the floating point number with exponent being minimum
881 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
882 return isFiniteNonZero() && exponent == semantics->minExponent &&
883 significandMSB() == 0;
884 }
885
isSmallestNormalized() const886 bool IEEEFloat::isSmallestNormalized() const {
887 return getCategory() == fcNormal && exponent == semantics->minExponent &&
888 isSignificandAllZerosExceptMSB();
889 }
890
isSignificandAllOnes() const891 bool IEEEFloat::isSignificandAllOnes() const {
892 // Test if the significand excluding the integral bit is all ones. This allows
893 // us to test for binade boundaries.
894 const integerPart *Parts = significandParts();
895 const unsigned PartCount = partCountForBits(semantics->precision);
896 for (unsigned i = 0; i < PartCount - 1; i++)
897 if (~Parts[i])
898 return false;
899
900 // Set the unused high bits to all ones when we compare.
901 const unsigned NumHighBits =
902 PartCount*integerPartWidth - semantics->precision + 1;
903 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
904 "Can not have more high bits to fill than integerPartWidth");
905 const integerPart HighBitFill =
906 ~integerPart(0) << (integerPartWidth - NumHighBits);
907 if (~(Parts[PartCount - 1] | HighBitFill))
908 return false;
909
910 return true;
911 }
912
isSignificandAllOnesExceptLSB() const913 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
914 // Test if the significand excluding the integral bit is all ones except for
915 // the least significant bit.
916 const integerPart *Parts = significandParts();
917
918 if (Parts[0] & 1)
919 return false;
920
921 const unsigned PartCount = partCountForBits(semantics->precision);
922 for (unsigned i = 0; i < PartCount - 1; i++) {
923 if (~Parts[i] & ~unsigned{!i})
924 return false;
925 }
926
927 // Set the unused high bits to all ones when we compare.
928 const unsigned NumHighBits =
929 PartCount * integerPartWidth - semantics->precision + 1;
930 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
931 "Can not have more high bits to fill than integerPartWidth");
932 const integerPart HighBitFill = ~integerPart(0)
933 << (integerPartWidth - NumHighBits);
934 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
935 return false;
936
937 return true;
938 }
939
isSignificandAllZeros() const940 bool IEEEFloat::isSignificandAllZeros() const {
941 // Test if the significand excluding the integral bit is all zeros. This
942 // allows us to test for binade boundaries.
943 const integerPart *Parts = significandParts();
944 const unsigned PartCount = partCountForBits(semantics->precision);
945
946 for (unsigned i = 0; i < PartCount - 1; i++)
947 if (Parts[i])
948 return false;
949
950 // Compute how many bits are used in the final word.
951 const unsigned NumHighBits =
952 PartCount*integerPartWidth - semantics->precision + 1;
953 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
954 "clear than integerPartWidth");
955 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
956
957 if (Parts[PartCount - 1] & HighBitMask)
958 return false;
959
960 return true;
961 }
962
isSignificandAllZerosExceptMSB() const963 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
964 const integerPart *Parts = significandParts();
965 const unsigned PartCount = partCountForBits(semantics->precision);
966
967 for (unsigned i = 0; i < PartCount - 1; i++) {
968 if (Parts[i])
969 return false;
970 }
971
972 const unsigned NumHighBits =
973 PartCount * integerPartWidth - semantics->precision + 1;
974 return Parts[PartCount - 1] == integerPart(1)
975 << (integerPartWidth - NumHighBits);
976 }
977
isLargest() const978 bool IEEEFloat::isLargest() const {
979 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
980 // The largest number by magnitude in our format will be the floating point
981 // number with maximum exponent and with significand that is all ones except
982 // the LSB.
983 return isFiniteNonZero() && exponent == semantics->maxExponent &&
984 isSignificandAllOnesExceptLSB();
985 } else {
986 // The largest number by magnitude in our format will be the floating point
987 // number with maximum exponent and with significand that is all ones.
988 return isFiniteNonZero() && exponent == semantics->maxExponent &&
989 isSignificandAllOnes();
990 }
991 }
992
isInteger() const993 bool IEEEFloat::isInteger() const {
994 // This could be made more efficient; I'm going for obviously correct.
995 if (!isFinite()) return false;
996 IEEEFloat truncated = *this;
997 truncated.roundToIntegral(rmTowardZero);
998 return compare(truncated) == cmpEqual;
999 }
1000
bitwiseIsEqual(const IEEEFloat & rhs) const1001 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1002 if (this == &rhs)
1003 return true;
1004 if (semantics != rhs.semantics ||
1005 category != rhs.category ||
1006 sign != rhs.sign)
1007 return false;
1008 if (category==fcZero || category==fcInfinity)
1009 return true;
1010
1011 if (isFiniteNonZero() && exponent != rhs.exponent)
1012 return false;
1013
1014 return std::equal(significandParts(), significandParts() + partCount(),
1015 rhs.significandParts());
1016 }
1017
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)1018 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1019 initialize(&ourSemantics);
1020 sign = 0;
1021 category = fcNormal;
1022 zeroSignificand();
1023 exponent = ourSemantics.precision - 1;
1024 significandParts()[0] = value;
1025 normalize(rmNearestTiesToEven, lfExactlyZero);
1026 }
1027
IEEEFloat(const fltSemantics & ourSemantics)1028 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1029 initialize(&ourSemantics);
1030 makeZero(false);
1031 }
1032
1033 // Delegate to the previous constructor, because later copy constructor may
1034 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)1035 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1036 : IEEEFloat(ourSemantics) {}
1037
IEEEFloat(const IEEEFloat & rhs)1038 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1039 initialize(rhs.semantics);
1040 assign(rhs);
1041 }
1042
IEEEFloat(IEEEFloat && rhs)1043 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1044 *this = std::move(rhs);
1045 }
1046
~IEEEFloat()1047 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1048
partCount() const1049 unsigned int IEEEFloat::partCount() const {
1050 return partCountForBits(semantics->precision + 1);
1051 }
1052
significandParts() const1053 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1054 return const_cast<IEEEFloat *>(this)->significandParts();
1055 }
1056
significandParts()1057 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1058 if (partCount() > 1)
1059 return significand.parts;
1060 else
1061 return &significand.part;
1062 }
1063
zeroSignificand()1064 void IEEEFloat::zeroSignificand() {
1065 APInt::tcSet(significandParts(), 0, partCount());
1066 }
1067
1068 /* Increment an fcNormal floating point number's significand. */
incrementSignificand()1069 void IEEEFloat::incrementSignificand() {
1070 integerPart carry;
1071
1072 carry = APInt::tcIncrement(significandParts(), partCount());
1073
1074 /* Our callers should never cause us to overflow. */
1075 assert(carry == 0);
1076 (void)carry;
1077 }
1078
1079 /* Add the significand of the RHS. Returns the carry flag. */
addSignificand(const IEEEFloat & rhs)1080 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1081 integerPart *parts;
1082
1083 parts = significandParts();
1084
1085 assert(semantics == rhs.semantics);
1086 assert(exponent == rhs.exponent);
1087
1088 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1089 }
1090
1091 /* Subtract the significand of the RHS with a borrow flag. Returns
1092 the borrow flag. */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)1093 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1094 integerPart borrow) {
1095 integerPart *parts;
1096
1097 parts = significandParts();
1098
1099 assert(semantics == rhs.semantics);
1100 assert(exponent == rhs.exponent);
1101
1102 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1103 partCount());
1104 }
1105
1106 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1107 on to the full-precision result of the multiplication. Returns the
1108 lost fraction. */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)1109 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1110 IEEEFloat addend) {
1111 unsigned int omsb; // One, not zero, based MSB.
1112 unsigned int partsCount, newPartsCount, precision;
1113 integerPart *lhsSignificand;
1114 integerPart scratch[4];
1115 integerPart *fullSignificand;
1116 lostFraction lost_fraction;
1117 bool ignored;
1118
1119 assert(semantics == rhs.semantics);
1120
1121 precision = semantics->precision;
1122
1123 // Allocate space for twice as many bits as the original significand, plus one
1124 // extra bit for the addition to overflow into.
1125 newPartsCount = partCountForBits(precision * 2 + 1);
1126
1127 if (newPartsCount > 4)
1128 fullSignificand = new integerPart[newPartsCount];
1129 else
1130 fullSignificand = scratch;
1131
1132 lhsSignificand = significandParts();
1133 partsCount = partCount();
1134
1135 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1136 rhs.significandParts(), partsCount, partsCount);
1137
1138 lost_fraction = lfExactlyZero;
1139 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1140 exponent += rhs.exponent;
1141
1142 // Assume the operands involved in the multiplication are single-precision
1143 // FP, and the two multiplicants are:
1144 // *this = a23 . a22 ... a0 * 2^e1
1145 // rhs = b23 . b22 ... b0 * 2^e2
1146 // the result of multiplication is:
1147 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1148 // Note that there are three significant bits at the left-hand side of the
1149 // radix point: two for the multiplication, and an overflow bit for the
1150 // addition (that will always be zero at this point). Move the radix point
1151 // toward left by two bits, and adjust exponent accordingly.
1152 exponent += 2;
1153
1154 if (addend.isNonZero()) {
1155 // The intermediate result of the multiplication has "2 * precision"
1156 // signicant bit; adjust the addend to be consistent with mul result.
1157 //
1158 Significand savedSignificand = significand;
1159 const fltSemantics *savedSemantics = semantics;
1160 fltSemantics extendedSemantics;
1161 opStatus status;
1162 unsigned int extendedPrecision;
1163
1164 // Normalize our MSB to one below the top bit to allow for overflow.
1165 extendedPrecision = 2 * precision + 1;
1166 if (omsb != extendedPrecision - 1) {
1167 assert(extendedPrecision > omsb);
1168 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1169 (extendedPrecision - 1) - omsb);
1170 exponent -= (extendedPrecision - 1) - omsb;
1171 }
1172
1173 /* Create new semantics. */
1174 extendedSemantics = *semantics;
1175 extendedSemantics.precision = extendedPrecision;
1176
1177 if (newPartsCount == 1)
1178 significand.part = fullSignificand[0];
1179 else
1180 significand.parts = fullSignificand;
1181 semantics = &extendedSemantics;
1182
1183 // Make a copy so we can convert it to the extended semantics.
1184 // Note that we cannot convert the addend directly, as the extendedSemantics
1185 // is a local variable (which we take a reference to).
1186 IEEEFloat extendedAddend(addend);
1187 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1188 assert(status == opOK);
1189 (void)status;
1190
1191 // Shift the significand of the addend right by one bit. This guarantees
1192 // that the high bit of the significand is zero (same as fullSignificand),
1193 // so the addition will overflow (if it does overflow at all) into the top bit.
1194 lost_fraction = extendedAddend.shiftSignificandRight(1);
1195 assert(lost_fraction == lfExactlyZero &&
1196 "Lost precision while shifting addend for fused-multiply-add.");
1197
1198 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1199
1200 /* Restore our state. */
1201 if (newPartsCount == 1)
1202 fullSignificand[0] = significand.part;
1203 significand = savedSignificand;
1204 semantics = savedSemantics;
1205
1206 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1207 }
1208
1209 // Convert the result having "2 * precision" significant-bits back to the one
1210 // having "precision" significant-bits. First, move the radix point from
1211 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1212 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1213 exponent -= precision + 1;
1214
1215 // In case MSB resides at the left-hand side of radix point, shift the
1216 // mantissa right by some amount to make sure the MSB reside right before
1217 // the radix point (i.e. "MSB . rest-significant-bits").
1218 //
1219 // Note that the result is not normalized when "omsb < precision". So, the
1220 // caller needs to call IEEEFloat::normalize() if normalized value is
1221 // expected.
1222 if (omsb > precision) {
1223 unsigned int bits, significantParts;
1224 lostFraction lf;
1225
1226 bits = omsb - precision;
1227 significantParts = partCountForBits(omsb);
1228 lf = shiftRight(fullSignificand, significantParts, bits);
1229 lost_fraction = combineLostFractions(lf, lost_fraction);
1230 exponent += bits;
1231 }
1232
1233 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1234
1235 if (newPartsCount > 4)
1236 delete [] fullSignificand;
1237
1238 return lost_fraction;
1239 }
1240
multiplySignificand(const IEEEFloat & rhs)1241 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1242 return multiplySignificand(rhs, IEEEFloat(*semantics));
1243 }
1244
1245 /* Multiply the significands of LHS and RHS to DST. */
divideSignificand(const IEEEFloat & rhs)1246 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1247 unsigned int bit, i, partsCount;
1248 const integerPart *rhsSignificand;
1249 integerPart *lhsSignificand, *dividend, *divisor;
1250 integerPart scratch[4];
1251 lostFraction lost_fraction;
1252
1253 assert(semantics == rhs.semantics);
1254
1255 lhsSignificand = significandParts();
1256 rhsSignificand = rhs.significandParts();
1257 partsCount = partCount();
1258
1259 if (partsCount > 2)
1260 dividend = new integerPart[partsCount * 2];
1261 else
1262 dividend = scratch;
1263
1264 divisor = dividend + partsCount;
1265
1266 /* Copy the dividend and divisor as they will be modified in-place. */
1267 for (i = 0; i < partsCount; i++) {
1268 dividend[i] = lhsSignificand[i];
1269 divisor[i] = rhsSignificand[i];
1270 lhsSignificand[i] = 0;
1271 }
1272
1273 exponent -= rhs.exponent;
1274
1275 unsigned int precision = semantics->precision;
1276
1277 /* Normalize the divisor. */
1278 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1279 if (bit) {
1280 exponent += bit;
1281 APInt::tcShiftLeft(divisor, partsCount, bit);
1282 }
1283
1284 /* Normalize the dividend. */
1285 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1286 if (bit) {
1287 exponent -= bit;
1288 APInt::tcShiftLeft(dividend, partsCount, bit);
1289 }
1290
1291 /* Ensure the dividend >= divisor initially for the loop below.
1292 Incidentally, this means that the division loop below is
1293 guaranteed to set the integer bit to one. */
1294 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1295 exponent--;
1296 APInt::tcShiftLeft(dividend, partsCount, 1);
1297 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1298 }
1299
1300 /* Long division. */
1301 for (bit = precision; bit; bit -= 1) {
1302 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1303 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1304 APInt::tcSetBit(lhsSignificand, bit - 1);
1305 }
1306
1307 APInt::tcShiftLeft(dividend, partsCount, 1);
1308 }
1309
1310 /* Figure out the lost fraction. */
1311 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1312
1313 if (cmp > 0)
1314 lost_fraction = lfMoreThanHalf;
1315 else if (cmp == 0)
1316 lost_fraction = lfExactlyHalf;
1317 else if (APInt::tcIsZero(dividend, partsCount))
1318 lost_fraction = lfExactlyZero;
1319 else
1320 lost_fraction = lfLessThanHalf;
1321
1322 if (partsCount > 2)
1323 delete [] dividend;
1324
1325 return lost_fraction;
1326 }
1327
significandMSB() const1328 unsigned int IEEEFloat::significandMSB() const {
1329 return APInt::tcMSB(significandParts(), partCount());
1330 }
1331
significandLSB() const1332 unsigned int IEEEFloat::significandLSB() const {
1333 return APInt::tcLSB(significandParts(), partCount());
1334 }
1335
1336 /* Note that a zero result is NOT normalized to fcZero. */
shiftSignificandRight(unsigned int bits)1337 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1338 /* Our exponent should not overflow. */
1339 assert((ExponentType) (exponent + bits) >= exponent);
1340
1341 exponent += bits;
1342
1343 return shiftRight(significandParts(), partCount(), bits);
1344 }
1345
1346 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
shiftSignificandLeft(unsigned int bits)1347 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1348 assert(bits < semantics->precision);
1349
1350 if (bits) {
1351 unsigned int partsCount = partCount();
1352
1353 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1354 exponent -= bits;
1355
1356 assert(!APInt::tcIsZero(significandParts(), partsCount));
1357 }
1358 }
1359
1360 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1361 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1362 int compare;
1363
1364 assert(semantics == rhs.semantics);
1365 assert(isFiniteNonZero());
1366 assert(rhs.isFiniteNonZero());
1367
1368 compare = exponent - rhs.exponent;
1369
1370 /* If exponents are equal, do an unsigned bignum comparison of the
1371 significands. */
1372 if (compare == 0)
1373 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1374 partCount());
1375
1376 if (compare > 0)
1377 return cmpGreaterThan;
1378 else if (compare < 0)
1379 return cmpLessThan;
1380 else
1381 return cmpEqual;
1382 }
1383
1384 /* Set the least significant BITS bits of a bignum, clear the
1385 rest. */
tcSetLeastSignificantBits(APInt::WordType * dst,unsigned parts,unsigned bits)1386 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1387 unsigned bits) {
1388 unsigned i = 0;
1389 while (bits > APInt::APINT_BITS_PER_WORD) {
1390 dst[i++] = ~(APInt::WordType)0;
1391 bits -= APInt::APINT_BITS_PER_WORD;
1392 }
1393
1394 if (bits)
1395 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1396
1397 while (i < parts)
1398 dst[i++] = 0;
1399 }
1400
1401 /* Handle overflow. Sign is preserved. We either become infinity or
1402 the largest finite number. */
handleOverflow(roundingMode rounding_mode)1403 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1404 /* Infinity? */
1405 if (rounding_mode == rmNearestTiesToEven ||
1406 rounding_mode == rmNearestTiesToAway ||
1407 (rounding_mode == rmTowardPositive && !sign) ||
1408 (rounding_mode == rmTowardNegative && sign)) {
1409 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1410 makeNaN(false, sign);
1411 else
1412 category = fcInfinity;
1413 return (opStatus) (opOverflow | opInexact);
1414 }
1415
1416 /* Otherwise we become the largest finite number. */
1417 category = fcNormal;
1418 exponent = semantics->maxExponent;
1419 tcSetLeastSignificantBits(significandParts(), partCount(),
1420 semantics->precision);
1421 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1422 APInt::tcClearBit(significandParts(), 0);
1423
1424 return opInexact;
1425 }
1426
1427 /* Returns TRUE if, when truncating the current number, with BIT the
1428 new LSB, with the given lost fraction and rounding mode, the result
1429 would need to be rounded away from zero (i.e., by increasing the
1430 signficand). This routine must work for fcZero of both signs, and
1431 fcNormal numbers. */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1432 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1433 lostFraction lost_fraction,
1434 unsigned int bit) const {
1435 /* NaNs and infinities should not have lost fractions. */
1436 assert(isFiniteNonZero() || category == fcZero);
1437
1438 /* Current callers never pass this so we don't handle it. */
1439 assert(lost_fraction != lfExactlyZero);
1440
1441 switch (rounding_mode) {
1442 case rmNearestTiesToAway:
1443 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1444
1445 case rmNearestTiesToEven:
1446 if (lost_fraction == lfMoreThanHalf)
1447 return true;
1448
1449 /* Our zeroes don't have a significand to test. */
1450 if (lost_fraction == lfExactlyHalf && category != fcZero)
1451 return APInt::tcExtractBit(significandParts(), bit);
1452
1453 return false;
1454
1455 case rmTowardZero:
1456 return false;
1457
1458 case rmTowardPositive:
1459 return !sign;
1460
1461 case rmTowardNegative:
1462 return sign;
1463
1464 default:
1465 break;
1466 }
1467 llvm_unreachable("Invalid rounding mode found");
1468 }
1469
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1470 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1471 lostFraction lost_fraction) {
1472 unsigned int omsb; /* One, not zero, based MSB. */
1473 int exponentChange;
1474
1475 if (!isFiniteNonZero())
1476 return opOK;
1477
1478 /* Before rounding normalize the exponent of fcNormal numbers. */
1479 omsb = significandMSB() + 1;
1480
1481 if (omsb) {
1482 /* OMSB is numbered from 1. We want to place it in the integer
1483 bit numbered PRECISION if possible, with a compensating change in
1484 the exponent. */
1485 exponentChange = omsb - semantics->precision;
1486
1487 /* If the resulting exponent is too high, overflow according to
1488 the rounding mode. */
1489 if (exponent + exponentChange > semantics->maxExponent)
1490 return handleOverflow(rounding_mode);
1491
1492 /* Subnormal numbers have exponent minExponent, and their MSB
1493 is forced based on that. */
1494 if (exponent + exponentChange < semantics->minExponent)
1495 exponentChange = semantics->minExponent - exponent;
1496
1497 /* Shifting left is easy as we don't lose precision. */
1498 if (exponentChange < 0) {
1499 assert(lost_fraction == lfExactlyZero);
1500
1501 shiftSignificandLeft(-exponentChange);
1502
1503 return opOK;
1504 }
1505
1506 if (exponentChange > 0) {
1507 lostFraction lf;
1508
1509 /* Shift right and capture any new lost fraction. */
1510 lf = shiftSignificandRight(exponentChange);
1511
1512 lost_fraction = combineLostFractions(lf, lost_fraction);
1513
1514 /* Keep OMSB up-to-date. */
1515 if (omsb > (unsigned) exponentChange)
1516 omsb -= exponentChange;
1517 else
1518 omsb = 0;
1519 }
1520 }
1521
1522 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1523 exponent == semantics->maxExponent && isSignificandAllOnes())
1524 return handleOverflow(rounding_mode);
1525
1526 /* Now round the number according to rounding_mode given the lost
1527 fraction. */
1528
1529 /* As specified in IEEE 754, since we do not trap we do not report
1530 underflow for exact results. */
1531 if (lost_fraction == lfExactlyZero) {
1532 /* Canonicalize zeroes. */
1533 if (omsb == 0)
1534 category = fcZero;
1535
1536 return opOK;
1537 }
1538
1539 /* Increment the significand if we're rounding away from zero. */
1540 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1541 if (omsb == 0)
1542 exponent = semantics->minExponent;
1543
1544 incrementSignificand();
1545 omsb = significandMSB() + 1;
1546
1547 /* Did the significand increment overflow? */
1548 if (omsb == (unsigned) semantics->precision + 1) {
1549 /* Renormalize by incrementing the exponent and shifting our
1550 significand right one. However if we already have the
1551 maximum exponent we overflow to infinity. */
1552 if (exponent == semantics->maxExponent) {
1553 category = fcInfinity;
1554
1555 return (opStatus) (opOverflow | opInexact);
1556 }
1557
1558 shiftSignificandRight(1);
1559
1560 return opInexact;
1561 }
1562
1563 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1564 exponent == semantics->maxExponent && isSignificandAllOnes())
1565 return handleOverflow(rounding_mode);
1566 }
1567
1568 /* The normal case - we were and are not denormal, and any
1569 significand increment above didn't overflow. */
1570 if (omsb == semantics->precision)
1571 return opInexact;
1572
1573 /* We have a non-zero denormal. */
1574 assert(omsb < semantics->precision);
1575
1576 /* Canonicalize zeroes. */
1577 if (omsb == 0)
1578 category = fcZero;
1579
1580 /* The fcZero case is a denormal that underflowed to zero. */
1581 return (opStatus) (opUnderflow | opInexact);
1582 }
1583
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1584 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1585 bool subtract) {
1586 switch (PackCategoriesIntoKey(category, rhs.category)) {
1587 default:
1588 llvm_unreachable(nullptr);
1589
1590 case PackCategoriesIntoKey(fcZero, fcNaN):
1591 case PackCategoriesIntoKey(fcNormal, fcNaN):
1592 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1593 assign(rhs);
1594 [[fallthrough]];
1595 case PackCategoriesIntoKey(fcNaN, fcZero):
1596 case PackCategoriesIntoKey(fcNaN, fcNormal):
1597 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1598 case PackCategoriesIntoKey(fcNaN, fcNaN):
1599 if (isSignaling()) {
1600 makeQuiet();
1601 return opInvalidOp;
1602 }
1603 return rhs.isSignaling() ? opInvalidOp : opOK;
1604
1605 case PackCategoriesIntoKey(fcNormal, fcZero):
1606 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1607 case PackCategoriesIntoKey(fcInfinity, fcZero):
1608 return opOK;
1609
1610 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1611 case PackCategoriesIntoKey(fcZero, fcInfinity):
1612 category = fcInfinity;
1613 sign = rhs.sign ^ subtract;
1614 return opOK;
1615
1616 case PackCategoriesIntoKey(fcZero, fcNormal):
1617 assign(rhs);
1618 sign = rhs.sign ^ subtract;
1619 return opOK;
1620
1621 case PackCategoriesIntoKey(fcZero, fcZero):
1622 /* Sign depends on rounding mode; handled by caller. */
1623 return opOK;
1624
1625 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1626 /* Differently signed infinities can only be validly
1627 subtracted. */
1628 if (((sign ^ rhs.sign)!=0) != subtract) {
1629 makeNaN();
1630 return opInvalidOp;
1631 }
1632
1633 return opOK;
1634
1635 case PackCategoriesIntoKey(fcNormal, fcNormal):
1636 return opDivByZero;
1637 }
1638 }
1639
1640 /* Add or subtract two normal numbers. */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1641 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1642 bool subtract) {
1643 integerPart carry;
1644 lostFraction lost_fraction;
1645 int bits;
1646
1647 /* Determine if the operation on the absolute values is effectively
1648 an addition or subtraction. */
1649 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1650
1651 /* Are we bigger exponent-wise than the RHS? */
1652 bits = exponent - rhs.exponent;
1653
1654 /* Subtraction is more subtle than one might naively expect. */
1655 if (subtract) {
1656 IEEEFloat temp_rhs(rhs);
1657
1658 if (bits == 0)
1659 lost_fraction = lfExactlyZero;
1660 else if (bits > 0) {
1661 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1662 shiftSignificandLeft(1);
1663 } else {
1664 lost_fraction = shiftSignificandRight(-bits - 1);
1665 temp_rhs.shiftSignificandLeft(1);
1666 }
1667
1668 // Should we reverse the subtraction.
1669 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1670 carry = temp_rhs.subtractSignificand
1671 (*this, lost_fraction != lfExactlyZero);
1672 copySignificand(temp_rhs);
1673 sign = !sign;
1674 } else {
1675 carry = subtractSignificand
1676 (temp_rhs, lost_fraction != lfExactlyZero);
1677 }
1678
1679 /* Invert the lost fraction - it was on the RHS and
1680 subtracted. */
1681 if (lost_fraction == lfLessThanHalf)
1682 lost_fraction = lfMoreThanHalf;
1683 else if (lost_fraction == lfMoreThanHalf)
1684 lost_fraction = lfLessThanHalf;
1685
1686 /* The code above is intended to ensure that no borrow is
1687 necessary. */
1688 assert(!carry);
1689 (void)carry;
1690 } else {
1691 if (bits > 0) {
1692 IEEEFloat temp_rhs(rhs);
1693
1694 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1695 carry = addSignificand(temp_rhs);
1696 } else {
1697 lost_fraction = shiftSignificandRight(-bits);
1698 carry = addSignificand(rhs);
1699 }
1700
1701 /* We have a guard bit; generating a carry cannot happen. */
1702 assert(!carry);
1703 (void)carry;
1704 }
1705
1706 return lost_fraction;
1707 }
1708
multiplySpecials(const IEEEFloat & rhs)1709 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1710 switch (PackCategoriesIntoKey(category, rhs.category)) {
1711 default:
1712 llvm_unreachable(nullptr);
1713
1714 case PackCategoriesIntoKey(fcZero, fcNaN):
1715 case PackCategoriesIntoKey(fcNormal, fcNaN):
1716 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1717 assign(rhs);
1718 sign = false;
1719 [[fallthrough]];
1720 case PackCategoriesIntoKey(fcNaN, fcZero):
1721 case PackCategoriesIntoKey(fcNaN, fcNormal):
1722 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1723 case PackCategoriesIntoKey(fcNaN, fcNaN):
1724 sign ^= rhs.sign; // restore the original sign
1725 if (isSignaling()) {
1726 makeQuiet();
1727 return opInvalidOp;
1728 }
1729 return rhs.isSignaling() ? opInvalidOp : opOK;
1730
1731 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1732 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1733 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1734 category = fcInfinity;
1735 return opOK;
1736
1737 case PackCategoriesIntoKey(fcZero, fcNormal):
1738 case PackCategoriesIntoKey(fcNormal, fcZero):
1739 case PackCategoriesIntoKey(fcZero, fcZero):
1740 category = fcZero;
1741 return opOK;
1742
1743 case PackCategoriesIntoKey(fcZero, fcInfinity):
1744 case PackCategoriesIntoKey(fcInfinity, fcZero):
1745 makeNaN();
1746 return opInvalidOp;
1747
1748 case PackCategoriesIntoKey(fcNormal, fcNormal):
1749 return opOK;
1750 }
1751 }
1752
divideSpecials(const IEEEFloat & rhs)1753 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1754 switch (PackCategoriesIntoKey(category, rhs.category)) {
1755 default:
1756 llvm_unreachable(nullptr);
1757
1758 case PackCategoriesIntoKey(fcZero, fcNaN):
1759 case PackCategoriesIntoKey(fcNormal, fcNaN):
1760 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1761 assign(rhs);
1762 sign = false;
1763 [[fallthrough]];
1764 case PackCategoriesIntoKey(fcNaN, fcZero):
1765 case PackCategoriesIntoKey(fcNaN, fcNormal):
1766 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1767 case PackCategoriesIntoKey(fcNaN, fcNaN):
1768 sign ^= rhs.sign; // restore the original sign
1769 if (isSignaling()) {
1770 makeQuiet();
1771 return opInvalidOp;
1772 }
1773 return rhs.isSignaling() ? opInvalidOp : opOK;
1774
1775 case PackCategoriesIntoKey(fcInfinity, fcZero):
1776 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1777 case PackCategoriesIntoKey(fcZero, fcInfinity):
1778 case PackCategoriesIntoKey(fcZero, fcNormal):
1779 return opOK;
1780
1781 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1782 category = fcZero;
1783 return opOK;
1784
1785 case PackCategoriesIntoKey(fcNormal, fcZero):
1786 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1787 makeNaN(false, sign);
1788 else
1789 category = fcInfinity;
1790 return opDivByZero;
1791
1792 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1793 case PackCategoriesIntoKey(fcZero, fcZero):
1794 makeNaN();
1795 return opInvalidOp;
1796
1797 case PackCategoriesIntoKey(fcNormal, fcNormal):
1798 return opOK;
1799 }
1800 }
1801
modSpecials(const IEEEFloat & rhs)1802 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1803 switch (PackCategoriesIntoKey(category, rhs.category)) {
1804 default:
1805 llvm_unreachable(nullptr);
1806
1807 case PackCategoriesIntoKey(fcZero, fcNaN):
1808 case PackCategoriesIntoKey(fcNormal, fcNaN):
1809 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1810 assign(rhs);
1811 [[fallthrough]];
1812 case PackCategoriesIntoKey(fcNaN, fcZero):
1813 case PackCategoriesIntoKey(fcNaN, fcNormal):
1814 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1815 case PackCategoriesIntoKey(fcNaN, fcNaN):
1816 if (isSignaling()) {
1817 makeQuiet();
1818 return opInvalidOp;
1819 }
1820 return rhs.isSignaling() ? opInvalidOp : opOK;
1821
1822 case PackCategoriesIntoKey(fcZero, fcInfinity):
1823 case PackCategoriesIntoKey(fcZero, fcNormal):
1824 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1825 return opOK;
1826
1827 case PackCategoriesIntoKey(fcNormal, fcZero):
1828 case PackCategoriesIntoKey(fcInfinity, fcZero):
1829 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1830 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1831 case PackCategoriesIntoKey(fcZero, fcZero):
1832 makeNaN();
1833 return opInvalidOp;
1834
1835 case PackCategoriesIntoKey(fcNormal, fcNormal):
1836 return opOK;
1837 }
1838 }
1839
remainderSpecials(const IEEEFloat & rhs)1840 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1841 switch (PackCategoriesIntoKey(category, rhs.category)) {
1842 default:
1843 llvm_unreachable(nullptr);
1844
1845 case PackCategoriesIntoKey(fcZero, fcNaN):
1846 case PackCategoriesIntoKey(fcNormal, fcNaN):
1847 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1848 assign(rhs);
1849 [[fallthrough]];
1850 case PackCategoriesIntoKey(fcNaN, fcZero):
1851 case PackCategoriesIntoKey(fcNaN, fcNormal):
1852 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1853 case PackCategoriesIntoKey(fcNaN, fcNaN):
1854 if (isSignaling()) {
1855 makeQuiet();
1856 return opInvalidOp;
1857 }
1858 return rhs.isSignaling() ? opInvalidOp : opOK;
1859
1860 case PackCategoriesIntoKey(fcZero, fcInfinity):
1861 case PackCategoriesIntoKey(fcZero, fcNormal):
1862 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1863 return opOK;
1864
1865 case PackCategoriesIntoKey(fcNormal, fcZero):
1866 case PackCategoriesIntoKey(fcInfinity, fcZero):
1867 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1868 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1869 case PackCategoriesIntoKey(fcZero, fcZero):
1870 makeNaN();
1871 return opInvalidOp;
1872
1873 case PackCategoriesIntoKey(fcNormal, fcNormal):
1874 return opDivByZero; // fake status, indicating this is not a special case
1875 }
1876 }
1877
1878 /* Change sign. */
changeSign()1879 void IEEEFloat::changeSign() {
1880 /* Look mummy, this one's easy. */
1881 sign = !sign;
1882 }
1883
1884 /* Normalized addition or subtraction. */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1885 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1886 roundingMode rounding_mode,
1887 bool subtract) {
1888 opStatus fs;
1889
1890 fs = addOrSubtractSpecials(rhs, subtract);
1891
1892 /* This return code means it was not a simple case. */
1893 if (fs == opDivByZero) {
1894 lostFraction lost_fraction;
1895
1896 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1897 fs = normalize(rounding_mode, lost_fraction);
1898
1899 /* Can only be zero if we lost no fraction. */
1900 assert(category != fcZero || lost_fraction == lfExactlyZero);
1901 }
1902
1903 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1904 positive zero unless rounding to minus infinity, except that
1905 adding two like-signed zeroes gives that zero. */
1906 if (category == fcZero) {
1907 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1908 sign = (rounding_mode == rmTowardNegative);
1909 }
1910
1911 return fs;
1912 }
1913
1914 /* Normalized addition. */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1915 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1916 roundingMode rounding_mode) {
1917 return addOrSubtract(rhs, rounding_mode, false);
1918 }
1919
1920 /* Normalized subtraction. */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1921 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1922 roundingMode rounding_mode) {
1923 return addOrSubtract(rhs, rounding_mode, true);
1924 }
1925
1926 /* Normalized multiply. */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1927 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1928 roundingMode rounding_mode) {
1929 opStatus fs;
1930
1931 sign ^= rhs.sign;
1932 fs = multiplySpecials(rhs);
1933
1934 if (isFiniteNonZero()) {
1935 lostFraction lost_fraction = multiplySignificand(rhs);
1936 fs = normalize(rounding_mode, lost_fraction);
1937 if (lost_fraction != lfExactlyZero)
1938 fs = (opStatus) (fs | opInexact);
1939 }
1940
1941 return fs;
1942 }
1943
1944 /* Normalized divide. */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1945 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1946 roundingMode rounding_mode) {
1947 opStatus fs;
1948
1949 sign ^= rhs.sign;
1950 fs = divideSpecials(rhs);
1951
1952 if (isFiniteNonZero()) {
1953 lostFraction lost_fraction = divideSignificand(rhs);
1954 fs = normalize(rounding_mode, lost_fraction);
1955 if (lost_fraction != lfExactlyZero)
1956 fs = (opStatus) (fs | opInexact);
1957 }
1958
1959 return fs;
1960 }
1961
1962 /* Normalized remainder. */
remainder(const IEEEFloat & rhs)1963 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1964 opStatus fs;
1965 unsigned int origSign = sign;
1966
1967 // First handle the special cases.
1968 fs = remainderSpecials(rhs);
1969 if (fs != opDivByZero)
1970 return fs;
1971
1972 fs = opOK;
1973
1974 // Make sure the current value is less than twice the denom. If the addition
1975 // did not succeed (an overflow has happened), which means that the finite
1976 // value we currently posses must be less than twice the denom (as we are
1977 // using the same semantics).
1978 IEEEFloat P2 = rhs;
1979 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1980 fs = mod(P2);
1981 assert(fs == opOK);
1982 }
1983
1984 // Lets work with absolute numbers.
1985 IEEEFloat P = rhs;
1986 P.sign = false;
1987 sign = false;
1988
1989 //
1990 // To calculate the remainder we use the following scheme.
1991 //
1992 // The remainder is defained as follows:
1993 //
1994 // remainder = numer - rquot * denom = x - r * p
1995 //
1996 // Where r is the result of: x/p, rounded toward the nearest integral value
1997 // (with halfway cases rounded toward the even number).
1998 //
1999 // Currently, (after x mod 2p):
2000 // r is the number of 2p's present inside x, which is inherently, an even
2001 // number of p's.
2002 //
2003 // We may split the remaining calculation into 4 options:
2004 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2005 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2006 // are done as well.
2007 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2008 // to subtract 1p at least once.
2009 // - if x >= p then we must subtract p at least once, as x must be a
2010 // remainder.
2011 //
2012 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2013 //
2014 // We can now split the remaining calculation to the following 3 options:
2015 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2016 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2017 // must round up to the next even number. so we must subtract p once more.
2018 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2019 // integral, and subtract p once more.
2020 //
2021
2022 // Extend the semantics to prevent an overflow/underflow or inexact result.
2023 bool losesInfo;
2024 fltSemantics extendedSemantics = *semantics;
2025 extendedSemantics.maxExponent++;
2026 extendedSemantics.minExponent--;
2027 extendedSemantics.precision += 2;
2028
2029 IEEEFloat VEx = *this;
2030 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2031 assert(fs == opOK && !losesInfo);
2032 IEEEFloat PEx = P;
2033 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2034 assert(fs == opOK && !losesInfo);
2035
2036 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2037 // any fraction.
2038 fs = VEx.add(VEx, rmNearestTiesToEven);
2039 assert(fs == opOK);
2040
2041 if (VEx.compare(PEx) == cmpGreaterThan) {
2042 fs = subtract(P, rmNearestTiesToEven);
2043 assert(fs == opOK);
2044
2045 // Make VEx = this.add(this), but because we have different semantics, we do
2046 // not want to `convert` again, so we just subtract PEx twice (which equals
2047 // to the desired value).
2048 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2049 assert(fs == opOK);
2050 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2051 assert(fs == opOK);
2052
2053 cmpResult result = VEx.compare(PEx);
2054 if (result == cmpGreaterThan || result == cmpEqual) {
2055 fs = subtract(P, rmNearestTiesToEven);
2056 assert(fs == opOK);
2057 }
2058 }
2059
2060 if (isZero())
2061 sign = origSign; // IEEE754 requires this
2062 else
2063 sign ^= origSign;
2064 return fs;
2065 }
2066
2067 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)2068 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2069 opStatus fs;
2070 fs = modSpecials(rhs);
2071 unsigned int origSign = sign;
2072
2073 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2074 compareAbsoluteValue(rhs) != cmpLessThan) {
2075 int Exp = ilogb(*this) - ilogb(rhs);
2076 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2077 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2078 // check for it.
2079 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2080 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2081 V.sign = sign;
2082
2083 fs = subtract(V, rmNearestTiesToEven);
2084 assert(fs==opOK);
2085 }
2086 if (isZero())
2087 sign = origSign; // fmod requires this
2088 return fs;
2089 }
2090
2091 /* Normalized fused-multiply-add. */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)2092 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2093 const IEEEFloat &addend,
2094 roundingMode rounding_mode) {
2095 opStatus fs;
2096
2097 /* Post-multiplication sign, before addition. */
2098 sign ^= multiplicand.sign;
2099
2100 /* If and only if all arguments are normal do we need to do an
2101 extended-precision calculation. */
2102 if (isFiniteNonZero() &&
2103 multiplicand.isFiniteNonZero() &&
2104 addend.isFinite()) {
2105 lostFraction lost_fraction;
2106
2107 lost_fraction = multiplySignificand(multiplicand, addend);
2108 fs = normalize(rounding_mode, lost_fraction);
2109 if (lost_fraction != lfExactlyZero)
2110 fs = (opStatus) (fs | opInexact);
2111
2112 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2113 positive zero unless rounding to minus infinity, except that
2114 adding two like-signed zeroes gives that zero. */
2115 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
2116 sign = (rounding_mode == rmTowardNegative);
2117 } else {
2118 fs = multiplySpecials(multiplicand);
2119
2120 /* FS can only be opOK or opInvalidOp. There is no more work
2121 to do in the latter case. The IEEE-754R standard says it is
2122 implementation-defined in this case whether, if ADDEND is a
2123 quiet NaN, we raise invalid op; this implementation does so.
2124
2125 If we need to do the addition we can do so with normal
2126 precision. */
2127 if (fs == opOK)
2128 fs = addOrSubtract(addend, rounding_mode, false);
2129 }
2130
2131 return fs;
2132 }
2133
2134 /* Rounding-mode correct round to integral value. */
roundToIntegral(roundingMode rounding_mode)2135 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2136 opStatus fs;
2137
2138 if (isInfinity())
2139 // [IEEE Std 754-2008 6.1]:
2140 // The behavior of infinity in floating-point arithmetic is derived from the
2141 // limiting cases of real arithmetic with operands of arbitrarily
2142 // large magnitude, when such a limit exists.
2143 // ...
2144 // Operations on infinite operands are usually exact and therefore signal no
2145 // exceptions ...
2146 return opOK;
2147
2148 if (isNaN()) {
2149 if (isSignaling()) {
2150 // [IEEE Std 754-2008 6.2]:
2151 // Under default exception handling, any operation signaling an invalid
2152 // operation exception and for which a floating-point result is to be
2153 // delivered shall deliver a quiet NaN.
2154 makeQuiet();
2155 // [IEEE Std 754-2008 6.2]:
2156 // Signaling NaNs shall be reserved operands that, under default exception
2157 // handling, signal the invalid operation exception(see 7.2) for every
2158 // general-computational and signaling-computational operation except for
2159 // the conversions described in 5.12.
2160 return opInvalidOp;
2161 } else {
2162 // [IEEE Std 754-2008 6.2]:
2163 // For an operation with quiet NaN inputs, other than maximum and minimum
2164 // operations, if a floating-point result is to be delivered the result
2165 // shall be a quiet NaN which should be one of the input NaNs.
2166 // ...
2167 // Every general-computational and quiet-computational operation involving
2168 // one or more input NaNs, none of them signaling, shall signal no
2169 // exception, except fusedMultiplyAdd might signal the invalid operation
2170 // exception(see 7.2).
2171 return opOK;
2172 }
2173 }
2174
2175 if (isZero()) {
2176 // [IEEE Std 754-2008 6.3]:
2177 // ... the sign of the result of conversions, the quantize operation, the
2178 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2179 // the sign of the first or only operand.
2180 return opOK;
2181 }
2182
2183 // If the exponent is large enough, we know that this value is already
2184 // integral, and the arithmetic below would potentially cause it to saturate
2185 // to +/-Inf. Bail out early instead.
2186 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2187 return opOK;
2188
2189 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2190 // precision of our format, and then subtract it back off again. The choice
2191 // of rounding modes for the addition/subtraction determines the rounding mode
2192 // for our integral rounding as well.
2193 // NOTE: When the input value is negative, we do subtraction followed by
2194 // addition instead.
2195 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2196 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2197 IEEEFloat MagicConstant(*semantics);
2198 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2199 rmNearestTiesToEven);
2200 assert(fs == opOK);
2201 MagicConstant.sign = sign;
2202
2203 // Preserve the input sign so that we can handle the case of zero result
2204 // correctly.
2205 bool inputSign = isNegative();
2206
2207 fs = add(MagicConstant, rounding_mode);
2208
2209 // Current value and 'MagicConstant' are both integers, so the result of the
2210 // subtraction is always exact according to Sterbenz' lemma.
2211 subtract(MagicConstant, rounding_mode);
2212
2213 // Restore the input sign.
2214 if (inputSign != isNegative())
2215 changeSign();
2216
2217 return fs;
2218 }
2219
2220
2221 /* Comparison requires normalized numbers. */
compare(const IEEEFloat & rhs) const2222 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2223 cmpResult result;
2224
2225 assert(semantics == rhs.semantics);
2226
2227 switch (PackCategoriesIntoKey(category, rhs.category)) {
2228 default:
2229 llvm_unreachable(nullptr);
2230
2231 case PackCategoriesIntoKey(fcNaN, fcZero):
2232 case PackCategoriesIntoKey(fcNaN, fcNormal):
2233 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2234 case PackCategoriesIntoKey(fcNaN, fcNaN):
2235 case PackCategoriesIntoKey(fcZero, fcNaN):
2236 case PackCategoriesIntoKey(fcNormal, fcNaN):
2237 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2238 return cmpUnordered;
2239
2240 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2241 case PackCategoriesIntoKey(fcInfinity, fcZero):
2242 case PackCategoriesIntoKey(fcNormal, fcZero):
2243 if (sign)
2244 return cmpLessThan;
2245 else
2246 return cmpGreaterThan;
2247
2248 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2249 case PackCategoriesIntoKey(fcZero, fcInfinity):
2250 case PackCategoriesIntoKey(fcZero, fcNormal):
2251 if (rhs.sign)
2252 return cmpGreaterThan;
2253 else
2254 return cmpLessThan;
2255
2256 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2257 if (sign == rhs.sign)
2258 return cmpEqual;
2259 else if (sign)
2260 return cmpLessThan;
2261 else
2262 return cmpGreaterThan;
2263
2264 case PackCategoriesIntoKey(fcZero, fcZero):
2265 return cmpEqual;
2266
2267 case PackCategoriesIntoKey(fcNormal, fcNormal):
2268 break;
2269 }
2270
2271 /* Two normal numbers. Do they have the same sign? */
2272 if (sign != rhs.sign) {
2273 if (sign)
2274 result = cmpLessThan;
2275 else
2276 result = cmpGreaterThan;
2277 } else {
2278 /* Compare absolute values; invert result if negative. */
2279 result = compareAbsoluteValue(rhs);
2280
2281 if (sign) {
2282 if (result == cmpLessThan)
2283 result = cmpGreaterThan;
2284 else if (result == cmpGreaterThan)
2285 result = cmpLessThan;
2286 }
2287 }
2288
2289 return result;
2290 }
2291
2292 /// IEEEFloat::convert - convert a value of one floating point type to another.
2293 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2294 /// records whether the transformation lost information, i.e. whether
2295 /// converting the result back to the original type will produce the
2296 /// original value (this is almost the same as return value==fsOK, but there
2297 /// are edge cases where this is not so).
2298
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2299 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2300 roundingMode rounding_mode,
2301 bool *losesInfo) {
2302 lostFraction lostFraction;
2303 unsigned int newPartCount, oldPartCount;
2304 opStatus fs;
2305 int shift;
2306 const fltSemantics &fromSemantics = *semantics;
2307 bool is_signaling = isSignaling();
2308
2309 lostFraction = lfExactlyZero;
2310 newPartCount = partCountForBits(toSemantics.precision + 1);
2311 oldPartCount = partCount();
2312 shift = toSemantics.precision - fromSemantics.precision;
2313
2314 bool X86SpecialNan = false;
2315 if (&fromSemantics == &semX87DoubleExtended &&
2316 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2317 (!(*significandParts() & 0x8000000000000000ULL) ||
2318 !(*significandParts() & 0x4000000000000000ULL))) {
2319 // x86 has some unusual NaNs which cannot be represented in any other
2320 // format; note them here.
2321 X86SpecialNan = true;
2322 }
2323
2324 // If this is a truncation of a denormal number, and the target semantics
2325 // has larger exponent range than the source semantics (this can happen
2326 // when truncating from PowerPC double-double to double format), the
2327 // right shift could lose result mantissa bits. Adjust exponent instead
2328 // of performing excessive shift.
2329 // Also do a similar trick in case shifting denormal would produce zero
2330 // significand as this case isn't handled correctly by normalize.
2331 if (shift < 0 && isFiniteNonZero()) {
2332 int omsb = significandMSB() + 1;
2333 int exponentChange = omsb - fromSemantics.precision;
2334 if (exponent + exponentChange < toSemantics.minExponent)
2335 exponentChange = toSemantics.minExponent - exponent;
2336 if (exponentChange < shift)
2337 exponentChange = shift;
2338 if (exponentChange < 0) {
2339 shift -= exponentChange;
2340 exponent += exponentChange;
2341 } else if (omsb <= -shift) {
2342 exponentChange = omsb + shift - 1; // leave at least one bit set
2343 shift -= exponentChange;
2344 exponent += exponentChange;
2345 }
2346 }
2347
2348 // If this is a truncation, perform the shift before we narrow the storage.
2349 if (shift < 0 && (isFiniteNonZero() ||
2350 (category == fcNaN && semantics->nonFiniteBehavior !=
2351 fltNonfiniteBehavior::NanOnly)))
2352 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2353
2354 // Fix the storage so it can hold to new value.
2355 if (newPartCount > oldPartCount) {
2356 // The new type requires more storage; make it available.
2357 integerPart *newParts;
2358 newParts = new integerPart[newPartCount];
2359 APInt::tcSet(newParts, 0, newPartCount);
2360 if (isFiniteNonZero() || category==fcNaN)
2361 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2362 freeSignificand();
2363 significand.parts = newParts;
2364 } else if (newPartCount == 1 && oldPartCount != 1) {
2365 // Switch to built-in storage for a single part.
2366 integerPart newPart = 0;
2367 if (isFiniteNonZero() || category==fcNaN)
2368 newPart = significandParts()[0];
2369 freeSignificand();
2370 significand.part = newPart;
2371 }
2372
2373 // Now that we have the right storage, switch the semantics.
2374 semantics = &toSemantics;
2375
2376 // If this is an extension, perform the shift now that the storage is
2377 // available.
2378 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2379 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2380
2381 if (isFiniteNonZero()) {
2382 fs = normalize(rounding_mode, lostFraction);
2383 *losesInfo = (fs != opOK);
2384 } else if (category == fcNaN) {
2385 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2386 *losesInfo =
2387 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2388 makeNaN(false, sign);
2389 return is_signaling ? opInvalidOp : opOK;
2390 }
2391
2392 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2393
2394 // For x87 extended precision, we want to make a NaN, not a special NaN if
2395 // the input wasn't special either.
2396 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2397 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2398
2399 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2400 // This also guarantees that a sNaN does not become Inf on a truncation
2401 // that loses all payload bits.
2402 if (is_signaling) {
2403 makeQuiet();
2404 fs = opInvalidOp;
2405 } else {
2406 fs = opOK;
2407 }
2408 } else if (category == fcInfinity &&
2409 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2410 makeNaN(false, sign);
2411 *losesInfo = true;
2412 fs = opInexact;
2413 } else {
2414 *losesInfo = false;
2415 fs = opOK;
2416 }
2417
2418 return fs;
2419 }
2420
2421 /* Convert a floating point number to an integer according to the
2422 rounding mode. If the rounded integer value is out of range this
2423 returns an invalid operation exception and the contents of the
2424 destination parts are unspecified. If the rounded value is in
2425 range but the floating point number is not the exact integer, the C
2426 standard doesn't require an inexact exception to be raised. IEEE
2427 854 does require it so we do that.
2428
2429 Note that for conversions to integer type the C standard requires
2430 round-to-zero to always be used. */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2431 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2432 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2433 roundingMode rounding_mode, bool *isExact) const {
2434 lostFraction lost_fraction;
2435 const integerPart *src;
2436 unsigned int dstPartsCount, truncatedBits;
2437
2438 *isExact = false;
2439
2440 /* Handle the three special cases first. */
2441 if (category == fcInfinity || category == fcNaN)
2442 return opInvalidOp;
2443
2444 dstPartsCount = partCountForBits(width);
2445 assert(dstPartsCount <= parts.size() && "Integer too big");
2446
2447 if (category == fcZero) {
2448 APInt::tcSet(parts.data(), 0, dstPartsCount);
2449 // Negative zero can't be represented as an int.
2450 *isExact = !sign;
2451 return opOK;
2452 }
2453
2454 src = significandParts();
2455
2456 /* Step 1: place our absolute value, with any fraction truncated, in
2457 the destination. */
2458 if (exponent < 0) {
2459 /* Our absolute value is less than one; truncate everything. */
2460 APInt::tcSet(parts.data(), 0, dstPartsCount);
2461 /* For exponent -1 the integer bit represents .5, look at that.
2462 For smaller exponents leftmost truncated bit is 0. */
2463 truncatedBits = semantics->precision -1U - exponent;
2464 } else {
2465 /* We want the most significant (exponent + 1) bits; the rest are
2466 truncated. */
2467 unsigned int bits = exponent + 1U;
2468
2469 /* Hopelessly large in magnitude? */
2470 if (bits > width)
2471 return opInvalidOp;
2472
2473 if (bits < semantics->precision) {
2474 /* We truncate (semantics->precision - bits) bits. */
2475 truncatedBits = semantics->precision - bits;
2476 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2477 } else {
2478 /* We want at least as many bits as are available. */
2479 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2480 0);
2481 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2482 bits - semantics->precision);
2483 truncatedBits = 0;
2484 }
2485 }
2486
2487 /* Step 2: work out any lost fraction, and increment the absolute
2488 value if we would round away from zero. */
2489 if (truncatedBits) {
2490 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2491 truncatedBits);
2492 if (lost_fraction != lfExactlyZero &&
2493 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2494 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2495 return opInvalidOp; /* Overflow. */
2496 }
2497 } else {
2498 lost_fraction = lfExactlyZero;
2499 }
2500
2501 /* Step 3: check if we fit in the destination. */
2502 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2503
2504 if (sign) {
2505 if (!isSigned) {
2506 /* Negative numbers cannot be represented as unsigned. */
2507 if (omsb != 0)
2508 return opInvalidOp;
2509 } else {
2510 /* It takes omsb bits to represent the unsigned integer value.
2511 We lose a bit for the sign, but care is needed as the
2512 maximally negative integer is a special case. */
2513 if (omsb == width &&
2514 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2515 return opInvalidOp;
2516
2517 /* This case can happen because of rounding. */
2518 if (omsb > width)
2519 return opInvalidOp;
2520 }
2521
2522 APInt::tcNegate (parts.data(), dstPartsCount);
2523 } else {
2524 if (omsb >= width + !isSigned)
2525 return opInvalidOp;
2526 }
2527
2528 if (lost_fraction == lfExactlyZero) {
2529 *isExact = true;
2530 return opOK;
2531 } else
2532 return opInexact;
2533 }
2534
2535 /* Same as convertToSignExtendedInteger, except we provide
2536 deterministic values in case of an invalid operation exception,
2537 namely zero for NaNs and the minimal or maximal value respectively
2538 for underflow or overflow.
2539 The *isExact output tells whether the result is exact, in the sense
2540 that converting it back to the original floating point type produces
2541 the original value. This is almost equivalent to result==opOK,
2542 except for negative zeroes.
2543 */
2544 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2545 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2546 unsigned int width, bool isSigned,
2547 roundingMode rounding_mode, bool *isExact) const {
2548 opStatus fs;
2549
2550 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2551 isExact);
2552
2553 if (fs == opInvalidOp) {
2554 unsigned int bits, dstPartsCount;
2555
2556 dstPartsCount = partCountForBits(width);
2557 assert(dstPartsCount <= parts.size() && "Integer too big");
2558
2559 if (category == fcNaN)
2560 bits = 0;
2561 else if (sign)
2562 bits = isSigned;
2563 else
2564 bits = width - isSigned;
2565
2566 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2567 if (sign && isSigned)
2568 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2569 }
2570
2571 return fs;
2572 }
2573
2574 /* Convert an unsigned integer SRC to a floating point number,
2575 rounding according to ROUNDING_MODE. The sign of the floating
2576 point number is not modified. */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2577 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2578 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2579 unsigned int omsb, precision, dstCount;
2580 integerPart *dst;
2581 lostFraction lost_fraction;
2582
2583 category = fcNormal;
2584 omsb = APInt::tcMSB(src, srcCount) + 1;
2585 dst = significandParts();
2586 dstCount = partCount();
2587 precision = semantics->precision;
2588
2589 /* We want the most significant PRECISION bits of SRC. There may not
2590 be that many; extract what we can. */
2591 if (precision <= omsb) {
2592 exponent = omsb - 1;
2593 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2594 omsb - precision);
2595 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2596 } else {
2597 exponent = precision - 1;
2598 lost_fraction = lfExactlyZero;
2599 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2600 }
2601
2602 return normalize(rounding_mode, lost_fraction);
2603 }
2604
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2605 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2606 roundingMode rounding_mode) {
2607 unsigned int partCount = Val.getNumWords();
2608 APInt api = Val;
2609
2610 sign = false;
2611 if (isSigned && api.isNegative()) {
2612 sign = true;
2613 api = -api;
2614 }
2615
2616 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2617 }
2618
2619 /* Convert a two's complement integer SRC to a floating point number,
2620 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2621 integer is signed, in which case it must be sign-extended. */
2622 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2623 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2624 unsigned int srcCount, bool isSigned,
2625 roundingMode rounding_mode) {
2626 opStatus status;
2627
2628 if (isSigned &&
2629 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2630 integerPart *copy;
2631
2632 /* If we're signed and negative negate a copy. */
2633 sign = true;
2634 copy = new integerPart[srcCount];
2635 APInt::tcAssign(copy, src, srcCount);
2636 APInt::tcNegate(copy, srcCount);
2637 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2638 delete [] copy;
2639 } else {
2640 sign = false;
2641 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2642 }
2643
2644 return status;
2645 }
2646
2647 /* FIXME: should this just take a const APInt reference? */
2648 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2649 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2650 unsigned int width, bool isSigned,
2651 roundingMode rounding_mode) {
2652 unsigned int partCount = partCountForBits(width);
2653 APInt api = APInt(width, ArrayRef(parts, partCount));
2654
2655 sign = false;
2656 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2657 sign = true;
2658 api = -api;
2659 }
2660
2661 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2662 }
2663
2664 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2665 IEEEFloat::convertFromHexadecimalString(StringRef s,
2666 roundingMode rounding_mode) {
2667 lostFraction lost_fraction = lfExactlyZero;
2668
2669 category = fcNormal;
2670 zeroSignificand();
2671 exponent = 0;
2672
2673 integerPart *significand = significandParts();
2674 unsigned partsCount = partCount();
2675 unsigned bitPos = partsCount * integerPartWidth;
2676 bool computedTrailingFraction = false;
2677
2678 // Skip leading zeroes and any (hexa)decimal point.
2679 StringRef::iterator begin = s.begin();
2680 StringRef::iterator end = s.end();
2681 StringRef::iterator dot;
2682 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2683 if (!PtrOrErr)
2684 return PtrOrErr.takeError();
2685 StringRef::iterator p = *PtrOrErr;
2686 StringRef::iterator firstSignificantDigit = p;
2687
2688 while (p != end) {
2689 integerPart hex_value;
2690
2691 if (*p == '.') {
2692 if (dot != end)
2693 return createError("String contains multiple dots");
2694 dot = p++;
2695 continue;
2696 }
2697
2698 hex_value = hexDigitValue(*p);
2699 if (hex_value == -1U)
2700 break;
2701
2702 p++;
2703
2704 // Store the number while we have space.
2705 if (bitPos) {
2706 bitPos -= 4;
2707 hex_value <<= bitPos % integerPartWidth;
2708 significand[bitPos / integerPartWidth] |= hex_value;
2709 } else if (!computedTrailingFraction) {
2710 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2711 if (!FractOrErr)
2712 return FractOrErr.takeError();
2713 lost_fraction = *FractOrErr;
2714 computedTrailingFraction = true;
2715 }
2716 }
2717
2718 /* Hex floats require an exponent but not a hexadecimal point. */
2719 if (p == end)
2720 return createError("Hex strings require an exponent");
2721 if (*p != 'p' && *p != 'P')
2722 return createError("Invalid character in significand");
2723 if (p == begin)
2724 return createError("Significand has no digits");
2725 if (dot != end && p - begin == 1)
2726 return createError("Significand has no digits");
2727
2728 /* Ignore the exponent if we are zero. */
2729 if (p != firstSignificantDigit) {
2730 int expAdjustment;
2731
2732 /* Implicit hexadecimal point? */
2733 if (dot == end)
2734 dot = p;
2735
2736 /* Calculate the exponent adjustment implicit in the number of
2737 significant digits. */
2738 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2739 if (expAdjustment < 0)
2740 expAdjustment++;
2741 expAdjustment = expAdjustment * 4 - 1;
2742
2743 /* Adjust for writing the significand starting at the most
2744 significant nibble. */
2745 expAdjustment += semantics->precision;
2746 expAdjustment -= partsCount * integerPartWidth;
2747
2748 /* Adjust for the given exponent. */
2749 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2750 if (!ExpOrErr)
2751 return ExpOrErr.takeError();
2752 exponent = *ExpOrErr;
2753 }
2754
2755 return normalize(rounding_mode, lost_fraction);
2756 }
2757
2758 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2759 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2760 unsigned sigPartCount, int exp,
2761 roundingMode rounding_mode) {
2762 unsigned int parts, pow5PartCount;
2763 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2764 integerPart pow5Parts[maxPowerOfFiveParts];
2765 bool isNearest;
2766
2767 isNearest = (rounding_mode == rmNearestTiesToEven ||
2768 rounding_mode == rmNearestTiesToAway);
2769
2770 parts = partCountForBits(semantics->precision + 11);
2771
2772 /* Calculate pow(5, abs(exp)). */
2773 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2774
2775 for (;; parts *= 2) {
2776 opStatus sigStatus, powStatus;
2777 unsigned int excessPrecision, truncatedBits;
2778
2779 calcSemantics.precision = parts * integerPartWidth - 1;
2780 excessPrecision = calcSemantics.precision - semantics->precision;
2781 truncatedBits = excessPrecision;
2782
2783 IEEEFloat decSig(calcSemantics, uninitialized);
2784 decSig.makeZero(sign);
2785 IEEEFloat pow5(calcSemantics);
2786
2787 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2788 rmNearestTiesToEven);
2789 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2790 rmNearestTiesToEven);
2791 /* Add exp, as 10^n = 5^n * 2^n. */
2792 decSig.exponent += exp;
2793
2794 lostFraction calcLostFraction;
2795 integerPart HUerr, HUdistance;
2796 unsigned int powHUerr;
2797
2798 if (exp >= 0) {
2799 /* multiplySignificand leaves the precision-th bit set to 1. */
2800 calcLostFraction = decSig.multiplySignificand(pow5);
2801 powHUerr = powStatus != opOK;
2802 } else {
2803 calcLostFraction = decSig.divideSignificand(pow5);
2804 /* Denormal numbers have less precision. */
2805 if (decSig.exponent < semantics->minExponent) {
2806 excessPrecision += (semantics->minExponent - decSig.exponent);
2807 truncatedBits = excessPrecision;
2808 if (excessPrecision > calcSemantics.precision)
2809 excessPrecision = calcSemantics.precision;
2810 }
2811 /* Extra half-ulp lost in reciprocal of exponent. */
2812 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2813 }
2814
2815 /* Both multiplySignificand and divideSignificand return the
2816 result with the integer bit set. */
2817 assert(APInt::tcExtractBit
2818 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2819
2820 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2821 powHUerr);
2822 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2823 excessPrecision, isNearest);
2824
2825 /* Are we guaranteed to round correctly if we truncate? */
2826 if (HUdistance >= HUerr) {
2827 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2828 calcSemantics.precision - excessPrecision,
2829 excessPrecision);
2830 /* Take the exponent of decSig. If we tcExtract-ed less bits
2831 above we must adjust our exponent to compensate for the
2832 implicit right shift. */
2833 exponent = (decSig.exponent + semantics->precision
2834 - (calcSemantics.precision - excessPrecision));
2835 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2836 decSig.partCount(),
2837 truncatedBits);
2838 return normalize(rounding_mode, calcLostFraction);
2839 }
2840 }
2841 }
2842
2843 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2844 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2845 decimalInfo D;
2846 opStatus fs;
2847
2848 /* Scan the text. */
2849 StringRef::iterator p = str.begin();
2850 if (Error Err = interpretDecimal(p, str.end(), &D))
2851 return std::move(Err);
2852
2853 /* Handle the quick cases. First the case of no significant digits,
2854 i.e. zero, and then exponents that are obviously too large or too
2855 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2856 definitely overflows if
2857
2858 (exp - 1) * L >= maxExponent
2859
2860 and definitely underflows to zero where
2861
2862 (exp + 1) * L <= minExponent - precision
2863
2864 With integer arithmetic the tightest bounds for L are
2865
2866 93/28 < L < 196/59 [ numerator <= 256 ]
2867 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2868 */
2869
2870 // Test if we have a zero number allowing for strings with no null terminators
2871 // and zero decimals with non-zero exponents.
2872 //
2873 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2874 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2875 // be at most one dot. On the other hand, if we have a zero with a non-zero
2876 // exponent, then we know that D.firstSigDigit will be non-numeric.
2877 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2878 category = fcZero;
2879 fs = opOK;
2880
2881 /* Check whether the normalized exponent is high enough to overflow
2882 max during the log-rebasing in the max-exponent check below. */
2883 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2884 fs = handleOverflow(rounding_mode);
2885
2886 /* If it wasn't, then it also wasn't high enough to overflow max
2887 during the log-rebasing in the min-exponent check. Check that it
2888 won't overflow min in either check, then perform the min-exponent
2889 check. */
2890 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2891 (D.normalizedExponent + 1) * 28738 <=
2892 8651 * (semantics->minExponent - (int) semantics->precision)) {
2893 /* Underflow to zero and round. */
2894 category = fcNormal;
2895 zeroSignificand();
2896 fs = normalize(rounding_mode, lfLessThanHalf);
2897
2898 /* We can finally safely perform the max-exponent check. */
2899 } else if ((D.normalizedExponent - 1) * 42039
2900 >= 12655 * semantics->maxExponent) {
2901 /* Overflow and round. */
2902 fs = handleOverflow(rounding_mode);
2903 } else {
2904 integerPart *decSignificand;
2905 unsigned int partCount;
2906
2907 /* A tight upper bound on number of bits required to hold an
2908 N-digit decimal integer is N * 196 / 59. Allocate enough space
2909 to hold the full significand, and an extra part required by
2910 tcMultiplyPart. */
2911 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2912 partCount = partCountForBits(1 + 196 * partCount / 59);
2913 decSignificand = new integerPart[partCount + 1];
2914 partCount = 0;
2915
2916 /* Convert to binary efficiently - we do almost all multiplication
2917 in an integerPart. When this would overflow do we do a single
2918 bignum multiplication, and then revert again to multiplication
2919 in an integerPart. */
2920 do {
2921 integerPart decValue, val, multiplier;
2922
2923 val = 0;
2924 multiplier = 1;
2925
2926 do {
2927 if (*p == '.') {
2928 p++;
2929 if (p == str.end()) {
2930 break;
2931 }
2932 }
2933 decValue = decDigitValue(*p++);
2934 if (decValue >= 10U) {
2935 delete[] decSignificand;
2936 return createError("Invalid character in significand");
2937 }
2938 multiplier *= 10;
2939 val = val * 10 + decValue;
2940 /* The maximum number that can be multiplied by ten with any
2941 digit added without overflowing an integerPart. */
2942 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2943
2944 /* Multiply out the current part. */
2945 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2946 partCount, partCount + 1, false);
2947
2948 /* If we used another part (likely but not guaranteed), increase
2949 the count. */
2950 if (decSignificand[partCount])
2951 partCount++;
2952 } while (p <= D.lastSigDigit);
2953
2954 category = fcNormal;
2955 fs = roundSignificandWithExponent(decSignificand, partCount,
2956 D.exponent, rounding_mode);
2957
2958 delete [] decSignificand;
2959 }
2960
2961 return fs;
2962 }
2963
convertFromStringSpecials(StringRef str)2964 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2965 const size_t MIN_NAME_SIZE = 3;
2966
2967 if (str.size() < MIN_NAME_SIZE)
2968 return false;
2969
2970 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2971 makeInf(false);
2972 return true;
2973 }
2974
2975 bool IsNegative = str.front() == '-';
2976 if (IsNegative) {
2977 str = str.drop_front();
2978 if (str.size() < MIN_NAME_SIZE)
2979 return false;
2980
2981 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2982 makeInf(true);
2983 return true;
2984 }
2985 }
2986
2987 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2988 bool IsSignaling = str.front() == 's' || str.front() == 'S';
2989 if (IsSignaling) {
2990 str = str.drop_front();
2991 if (str.size() < MIN_NAME_SIZE)
2992 return false;
2993 }
2994
2995 if (str.startswith("nan") || str.startswith("NaN")) {
2996 str = str.drop_front(3);
2997
2998 // A NaN without payload.
2999 if (str.empty()) {
3000 makeNaN(IsSignaling, IsNegative);
3001 return true;
3002 }
3003
3004 // Allow the payload to be inside parentheses.
3005 if (str.front() == '(') {
3006 // Parentheses should be balanced (and not empty).
3007 if (str.size() <= 2 || str.back() != ')')
3008 return false;
3009
3010 str = str.slice(1, str.size() - 1);
3011 }
3012
3013 // Determine the payload number's radix.
3014 unsigned Radix = 10;
3015 if (str[0] == '0') {
3016 if (str.size() > 1 && tolower(str[1]) == 'x') {
3017 str = str.drop_front(2);
3018 Radix = 16;
3019 } else
3020 Radix = 8;
3021 }
3022
3023 // Parse the payload and make the NaN.
3024 APInt Payload;
3025 if (!str.getAsInteger(Radix, Payload)) {
3026 makeNaN(IsSignaling, IsNegative, &Payload);
3027 return true;
3028 }
3029 }
3030
3031 return false;
3032 }
3033
3034 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)3035 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3036 if (str.empty())
3037 return createError("Invalid string length");
3038
3039 // Handle special cases.
3040 if (convertFromStringSpecials(str))
3041 return opOK;
3042
3043 /* Handle a leading minus sign. */
3044 StringRef::iterator p = str.begin();
3045 size_t slen = str.size();
3046 sign = *p == '-' ? 1 : 0;
3047 if (*p == '-' || *p == '+') {
3048 p++;
3049 slen--;
3050 if (!slen)
3051 return createError("String has no digits");
3052 }
3053
3054 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3055 if (slen == 2)
3056 return createError("Invalid string");
3057 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3058 rounding_mode);
3059 }
3060
3061 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3062 }
3063
3064 /* Write out a hexadecimal representation of the floating point value
3065 to DST, which must be of sufficient size, in the C99 form
3066 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3067 excluding the terminating NUL.
3068
3069 If UPPERCASE, the output is in upper case, otherwise in lower case.
3070
3071 HEXDIGITS digits appear altogether, rounding the value if
3072 necessary. If HEXDIGITS is 0, the minimal precision to display the
3073 number precisely is used instead. If nothing would appear after
3074 the decimal point it is suppressed.
3075
3076 The decimal exponent is always printed and has at least one digit.
3077 Zero values display an exponent of zero. Infinities and NaNs
3078 appear as "infinity" or "nan" respectively.
3079
3080 The above rules are as specified by C99. There is ambiguity about
3081 what the leading hexadecimal digit should be. This implementation
3082 uses whatever is necessary so that the exponent is displayed as
3083 stored. This implies the exponent will fall within the IEEE format
3084 range, and the leading hexadecimal digit will be 0 (for denormals),
3085 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3086 any other digits zero).
3087 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3088 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3089 bool upperCase,
3090 roundingMode rounding_mode) const {
3091 char *p;
3092
3093 p = dst;
3094 if (sign)
3095 *dst++ = '-';
3096
3097 switch (category) {
3098 case fcInfinity:
3099 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3100 dst += sizeof infinityL - 1;
3101 break;
3102
3103 case fcNaN:
3104 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3105 dst += sizeof NaNU - 1;
3106 break;
3107
3108 case fcZero:
3109 *dst++ = '0';
3110 *dst++ = upperCase ? 'X': 'x';
3111 *dst++ = '0';
3112 if (hexDigits > 1) {
3113 *dst++ = '.';
3114 memset (dst, '0', hexDigits - 1);
3115 dst += hexDigits - 1;
3116 }
3117 *dst++ = upperCase ? 'P': 'p';
3118 *dst++ = '0';
3119 break;
3120
3121 case fcNormal:
3122 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3123 break;
3124 }
3125
3126 *dst = 0;
3127
3128 return static_cast<unsigned int>(dst - p);
3129 }
3130
3131 /* Does the hard work of outputting the correctly rounded hexadecimal
3132 form of a normal floating point number with the specified number of
3133 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3134 digits necessary to print the value precisely is output. */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const3135 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3136 bool upperCase,
3137 roundingMode rounding_mode) const {
3138 unsigned int count, valueBits, shift, partsCount, outputDigits;
3139 const char *hexDigitChars;
3140 const integerPart *significand;
3141 char *p;
3142 bool roundUp;
3143
3144 *dst++ = '0';
3145 *dst++ = upperCase ? 'X': 'x';
3146
3147 roundUp = false;
3148 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3149
3150 significand = significandParts();
3151 partsCount = partCount();
3152
3153 /* +3 because the first digit only uses the single integer bit, so
3154 we have 3 virtual zero most-significant-bits. */
3155 valueBits = semantics->precision + 3;
3156 shift = integerPartWidth - valueBits % integerPartWidth;
3157
3158 /* The natural number of digits required ignoring trailing
3159 insignificant zeroes. */
3160 outputDigits = (valueBits - significandLSB () + 3) / 4;
3161
3162 /* hexDigits of zero means use the required number for the
3163 precision. Otherwise, see if we are truncating. If we are,
3164 find out if we need to round away from zero. */
3165 if (hexDigits) {
3166 if (hexDigits < outputDigits) {
3167 /* We are dropping non-zero bits, so need to check how to round.
3168 "bits" is the number of dropped bits. */
3169 unsigned int bits;
3170 lostFraction fraction;
3171
3172 bits = valueBits - hexDigits * 4;
3173 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3174 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3175 }
3176 outputDigits = hexDigits;
3177 }
3178
3179 /* Write the digits consecutively, and start writing in the location
3180 of the hexadecimal point. We move the most significant digit
3181 left and add the hexadecimal point later. */
3182 p = ++dst;
3183
3184 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3185
3186 while (outputDigits && count) {
3187 integerPart part;
3188
3189 /* Put the most significant integerPartWidth bits in "part". */
3190 if (--count == partsCount)
3191 part = 0; /* An imaginary higher zero part. */
3192 else
3193 part = significand[count] << shift;
3194
3195 if (count && shift)
3196 part |= significand[count - 1] >> (integerPartWidth - shift);
3197
3198 /* Convert as much of "part" to hexdigits as we can. */
3199 unsigned int curDigits = integerPartWidth / 4;
3200
3201 if (curDigits > outputDigits)
3202 curDigits = outputDigits;
3203 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3204 outputDigits -= curDigits;
3205 }
3206
3207 if (roundUp) {
3208 char *q = dst;
3209
3210 /* Note that hexDigitChars has a trailing '0'. */
3211 do {
3212 q--;
3213 *q = hexDigitChars[hexDigitValue (*q) + 1];
3214 } while (*q == '0');
3215 assert(q >= p);
3216 } else {
3217 /* Add trailing zeroes. */
3218 memset (dst, '0', outputDigits);
3219 dst += outputDigits;
3220 }
3221
3222 /* Move the most significant digit to before the point, and if there
3223 is something after the decimal point add it. This must come
3224 after rounding above. */
3225 p[-1] = p[0];
3226 if (dst -1 == p)
3227 dst--;
3228 else
3229 p[0] = '.';
3230
3231 /* Finally output the exponent. */
3232 *dst++ = upperCase ? 'P': 'p';
3233
3234 return writeSignedDecimal (dst, exponent);
3235 }
3236
hash_value(const IEEEFloat & Arg)3237 hash_code hash_value(const IEEEFloat &Arg) {
3238 if (!Arg.isFiniteNonZero())
3239 return hash_combine((uint8_t)Arg.category,
3240 // NaN has no sign, fix it at zero.
3241 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3242 Arg.semantics->precision);
3243
3244 // Normal floats need their exponent and significand hashed.
3245 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3246 Arg.semantics->precision, Arg.exponent,
3247 hash_combine_range(
3248 Arg.significandParts(),
3249 Arg.significandParts() + Arg.partCount()));
3250 }
3251
3252 // Conversion from APFloat to/from host float/double. It may eventually be
3253 // possible to eliminate these and have everybody deal with APFloats, but that
3254 // will take a while. This approach will not easily extend to long double.
3255 // Current implementation requires integerPartWidth==64, which is correct at
3256 // the moment but could be made more general.
3257
3258 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3259 // the actual IEEE respresentations. We compensate for that here.
3260
convertF80LongDoubleAPFloatToAPInt() const3261 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3262 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3263 assert(partCount()==2);
3264
3265 uint64_t myexponent, mysignificand;
3266
3267 if (isFiniteNonZero()) {
3268 myexponent = exponent+16383; //bias
3269 mysignificand = significandParts()[0];
3270 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3271 myexponent = 0; // denormal
3272 } else if (category==fcZero) {
3273 myexponent = 0;
3274 mysignificand = 0;
3275 } else if (category==fcInfinity) {
3276 myexponent = 0x7fff;
3277 mysignificand = 0x8000000000000000ULL;
3278 } else {
3279 assert(category == fcNaN && "Unknown category");
3280 myexponent = 0x7fff;
3281 mysignificand = significandParts()[0];
3282 }
3283
3284 uint64_t words[2];
3285 words[0] = mysignificand;
3286 words[1] = ((uint64_t)(sign & 1) << 15) |
3287 (myexponent & 0x7fffLL);
3288 return APInt(80, words);
3289 }
3290
convertPPCDoubleDoubleAPFloatToAPInt() const3291 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3292 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3293 assert(partCount()==2);
3294
3295 uint64_t words[2];
3296 opStatus fs;
3297 bool losesInfo;
3298
3299 // Convert number to double. To avoid spurious underflows, we re-
3300 // normalize against the "double" minExponent first, and only *then*
3301 // truncate the mantissa. The result of that second conversion
3302 // may be inexact, but should never underflow.
3303 // Declare fltSemantics before APFloat that uses it (and
3304 // saves pointer to it) to ensure correct destruction order.
3305 fltSemantics extendedSemantics = *semantics;
3306 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3307 IEEEFloat extended(*this);
3308 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3309 assert(fs == opOK && !losesInfo);
3310 (void)fs;
3311
3312 IEEEFloat u(extended);
3313 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3314 assert(fs == opOK || fs == opInexact);
3315 (void)fs;
3316 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3317
3318 // If conversion was exact or resulted in a special case, we're done;
3319 // just set the second double to zero. Otherwise, re-convert back to
3320 // the extended format and compute the difference. This now should
3321 // convert exactly to double.
3322 if (u.isFiniteNonZero() && losesInfo) {
3323 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3324 assert(fs == opOK && !losesInfo);
3325 (void)fs;
3326
3327 IEEEFloat v(extended);
3328 v.subtract(u, rmNearestTiesToEven);
3329 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3330 assert(fs == opOK && !losesInfo);
3331 (void)fs;
3332 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3333 } else {
3334 words[1] = 0;
3335 }
3336
3337 return APInt(128, words);
3338 }
3339
convertQuadrupleAPFloatToAPInt() const3340 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3341 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3342 assert(partCount()==2);
3343
3344 uint64_t myexponent, mysignificand, mysignificand2;
3345
3346 if (isFiniteNonZero()) {
3347 myexponent = exponent+16383; //bias
3348 mysignificand = significandParts()[0];
3349 mysignificand2 = significandParts()[1];
3350 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3351 myexponent = 0; // denormal
3352 } else if (category==fcZero) {
3353 myexponent = 0;
3354 mysignificand = mysignificand2 = 0;
3355 } else if (category==fcInfinity) {
3356 myexponent = 0x7fff;
3357 mysignificand = mysignificand2 = 0;
3358 } else {
3359 assert(category == fcNaN && "Unknown category!");
3360 myexponent = 0x7fff;
3361 mysignificand = significandParts()[0];
3362 mysignificand2 = significandParts()[1];
3363 }
3364
3365 uint64_t words[2];
3366 words[0] = mysignificand;
3367 words[1] = ((uint64_t)(sign & 1) << 63) |
3368 ((myexponent & 0x7fff) << 48) |
3369 (mysignificand2 & 0xffffffffffffLL);
3370
3371 return APInt(128, words);
3372 }
3373
convertDoubleAPFloatToAPInt() const3374 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3375 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3376 assert(partCount()==1);
3377
3378 uint64_t myexponent, mysignificand;
3379
3380 if (isFiniteNonZero()) {
3381 myexponent = exponent+1023; //bias
3382 mysignificand = *significandParts();
3383 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3384 myexponent = 0; // denormal
3385 } else if (category==fcZero) {
3386 myexponent = 0;
3387 mysignificand = 0;
3388 } else if (category==fcInfinity) {
3389 myexponent = 0x7ff;
3390 mysignificand = 0;
3391 } else {
3392 assert(category == fcNaN && "Unknown category!");
3393 myexponent = 0x7ff;
3394 mysignificand = *significandParts();
3395 }
3396
3397 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3398 ((myexponent & 0x7ff) << 52) |
3399 (mysignificand & 0xfffffffffffffLL))));
3400 }
3401
convertFloatAPFloatToAPInt() const3402 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3403 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3404 assert(partCount()==1);
3405
3406 uint32_t myexponent, mysignificand;
3407
3408 if (isFiniteNonZero()) {
3409 myexponent = exponent+127; //bias
3410 mysignificand = (uint32_t)*significandParts();
3411 if (myexponent == 1 && !(mysignificand & 0x800000))
3412 myexponent = 0; // denormal
3413 } else if (category==fcZero) {
3414 myexponent = 0;
3415 mysignificand = 0;
3416 } else if (category==fcInfinity) {
3417 myexponent = 0xff;
3418 mysignificand = 0;
3419 } else {
3420 assert(category == fcNaN && "Unknown category!");
3421 myexponent = 0xff;
3422 mysignificand = (uint32_t)*significandParts();
3423 }
3424
3425 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3426 (mysignificand & 0x7fffff)));
3427 }
3428
convertBFloatAPFloatToAPInt() const3429 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3430 assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3431 assert(partCount() == 1);
3432
3433 uint32_t myexponent, mysignificand;
3434
3435 if (isFiniteNonZero()) {
3436 myexponent = exponent + 127; // bias
3437 mysignificand = (uint32_t)*significandParts();
3438 if (myexponent == 1 && !(mysignificand & 0x80))
3439 myexponent = 0; // denormal
3440 } else if (category == fcZero) {
3441 myexponent = 0;
3442 mysignificand = 0;
3443 } else if (category == fcInfinity) {
3444 myexponent = 0xff;
3445 mysignificand = 0;
3446 } else {
3447 assert(category == fcNaN && "Unknown category!");
3448 myexponent = 0xff;
3449 mysignificand = (uint32_t)*significandParts();
3450 }
3451
3452 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3453 (mysignificand & 0x7f)));
3454 }
3455
convertHalfAPFloatToAPInt() const3456 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3457 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3458 assert(partCount()==1);
3459
3460 uint32_t myexponent, mysignificand;
3461
3462 if (isFiniteNonZero()) {
3463 myexponent = exponent+15; //bias
3464 mysignificand = (uint32_t)*significandParts();
3465 if (myexponent == 1 && !(mysignificand & 0x400))
3466 myexponent = 0; // denormal
3467 } else if (category==fcZero) {
3468 myexponent = 0;
3469 mysignificand = 0;
3470 } else if (category==fcInfinity) {
3471 myexponent = 0x1f;
3472 mysignificand = 0;
3473 } else {
3474 assert(category == fcNaN && "Unknown category!");
3475 myexponent = 0x1f;
3476 mysignificand = (uint32_t)*significandParts();
3477 }
3478
3479 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3480 (mysignificand & 0x3ff)));
3481 }
3482
convertFloat8E5M2APFloatToAPInt() const3483 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3484 assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);
3485 assert(partCount() == 1);
3486
3487 uint32_t myexponent, mysignificand;
3488
3489 if (isFiniteNonZero()) {
3490 myexponent = exponent + 15; // bias
3491 mysignificand = (uint32_t)*significandParts();
3492 if (myexponent == 1 && !(mysignificand & 0x4))
3493 myexponent = 0; // denormal
3494 } else if (category == fcZero) {
3495 myexponent = 0;
3496 mysignificand = 0;
3497 } else if (category == fcInfinity) {
3498 myexponent = 0x1f;
3499 mysignificand = 0;
3500 } else {
3501 assert(category == fcNaN && "Unknown category!");
3502 myexponent = 0x1f;
3503 mysignificand = (uint32_t)*significandParts();
3504 }
3505
3506 return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
3507 (mysignificand & 0x3)));
3508 }
3509
convertFloat8E4M3FNAPFloatToAPInt() const3510 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3511 assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
3512 assert(partCount() == 1);
3513
3514 uint32_t myexponent, mysignificand;
3515
3516 if (isFiniteNonZero()) {
3517 myexponent = exponent + 7; // bias
3518 mysignificand = (uint32_t)*significandParts();
3519 if (myexponent == 1 && !(mysignificand & 0x8))
3520 myexponent = 0; // denormal
3521 } else if (category == fcZero) {
3522 myexponent = 0;
3523 mysignificand = 0;
3524 } else if (category == fcInfinity) {
3525 myexponent = 0xf;
3526 mysignificand = 0;
3527 } else {
3528 assert(category == fcNaN && "Unknown category!");
3529 myexponent = 0xf;
3530 mysignificand = (uint32_t)*significandParts();
3531 }
3532
3533 return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
3534 (mysignificand & 0x7)));
3535 }
3536
3537 // This function creates an APInt that is just a bit map of the floating
3538 // point constant as it would appear in memory. It is not a conversion,
3539 // and treating the result as a normal integer is unlikely to be useful.
3540
bitcastToAPInt() const3541 APInt IEEEFloat::bitcastToAPInt() const {
3542 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3543 return convertHalfAPFloatToAPInt();
3544
3545 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3546 return convertBFloatAPFloatToAPInt();
3547
3548 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3549 return convertFloatAPFloatToAPInt();
3550
3551 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3552 return convertDoubleAPFloatToAPInt();
3553
3554 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3555 return convertQuadrupleAPFloatToAPInt();
3556
3557 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3558 return convertPPCDoubleDoubleAPFloatToAPInt();
3559
3560 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3561 return convertFloat8E5M2APFloatToAPInt();
3562
3563 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3564 return convertFloat8E4M3FNAPFloatToAPInt();
3565
3566 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3567 "unknown format!");
3568 return convertF80LongDoubleAPFloatToAPInt();
3569 }
3570
convertToFloat() const3571 float IEEEFloat::convertToFloat() const {
3572 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3573 "Float semantics are not IEEEsingle");
3574 APInt api = bitcastToAPInt();
3575 return api.bitsToFloat();
3576 }
3577
convertToDouble() const3578 double IEEEFloat::convertToDouble() const {
3579 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3580 "Float semantics are not IEEEdouble");
3581 APInt api = bitcastToAPInt();
3582 return api.bitsToDouble();
3583 }
3584
3585 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3586 /// does not support these bit patterns:
3587 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3588 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3589 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3590 /// exponent = 0, integer bit 1 ("pseudodenormal")
3591 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3592 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3593 uint64_t i1 = api.getRawData()[0];
3594 uint64_t i2 = api.getRawData()[1];
3595 uint64_t myexponent = (i2 & 0x7fff);
3596 uint64_t mysignificand = i1;
3597 uint8_t myintegerbit = mysignificand >> 63;
3598
3599 initialize(&semX87DoubleExtended);
3600 assert(partCount()==2);
3601
3602 sign = static_cast<unsigned int>(i2>>15);
3603 if (myexponent == 0 && mysignificand == 0) {
3604 makeZero(sign);
3605 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3606 makeInf(sign);
3607 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3608 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3609 category = fcNaN;
3610 exponent = exponentNaN();
3611 significandParts()[0] = mysignificand;
3612 significandParts()[1] = 0;
3613 } else {
3614 category = fcNormal;
3615 exponent = myexponent - 16383;
3616 significandParts()[0] = mysignificand;
3617 significandParts()[1] = 0;
3618 if (myexponent==0) // denormal
3619 exponent = -16382;
3620 }
3621 }
3622
initFromPPCDoubleDoubleAPInt(const APInt & api)3623 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3624 uint64_t i1 = api.getRawData()[0];
3625 uint64_t i2 = api.getRawData()[1];
3626 opStatus fs;
3627 bool losesInfo;
3628
3629 // Get the first double and convert to our format.
3630 initFromDoubleAPInt(APInt(64, i1));
3631 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3632 assert(fs == opOK && !losesInfo);
3633 (void)fs;
3634
3635 // Unless we have a special case, add in second double.
3636 if (isFiniteNonZero()) {
3637 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3638 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3639 assert(fs == opOK && !losesInfo);
3640 (void)fs;
3641
3642 add(v, rmNearestTiesToEven);
3643 }
3644 }
3645
initFromQuadrupleAPInt(const APInt & api)3646 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3647 uint64_t i1 = api.getRawData()[0];
3648 uint64_t i2 = api.getRawData()[1];
3649 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3650 uint64_t mysignificand = i1;
3651 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3652
3653 initialize(&semIEEEquad);
3654 assert(partCount()==2);
3655
3656 sign = static_cast<unsigned int>(i2>>63);
3657 if (myexponent==0 &&
3658 (mysignificand==0 && mysignificand2==0)) {
3659 makeZero(sign);
3660 } else if (myexponent==0x7fff &&
3661 (mysignificand==0 && mysignificand2==0)) {
3662 makeInf(sign);
3663 } else if (myexponent==0x7fff &&
3664 (mysignificand!=0 || mysignificand2 !=0)) {
3665 category = fcNaN;
3666 exponent = exponentNaN();
3667 significandParts()[0] = mysignificand;
3668 significandParts()[1] = mysignificand2;
3669 } else {
3670 category = fcNormal;
3671 exponent = myexponent - 16383;
3672 significandParts()[0] = mysignificand;
3673 significandParts()[1] = mysignificand2;
3674 if (myexponent==0) // denormal
3675 exponent = -16382;
3676 else
3677 significandParts()[1] |= 0x1000000000000LL; // integer bit
3678 }
3679 }
3680
initFromDoubleAPInt(const APInt & api)3681 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3682 uint64_t i = *api.getRawData();
3683 uint64_t myexponent = (i >> 52) & 0x7ff;
3684 uint64_t mysignificand = i & 0xfffffffffffffLL;
3685
3686 initialize(&semIEEEdouble);
3687 assert(partCount()==1);
3688
3689 sign = static_cast<unsigned int>(i>>63);
3690 if (myexponent==0 && mysignificand==0) {
3691 makeZero(sign);
3692 } else if (myexponent==0x7ff && mysignificand==0) {
3693 makeInf(sign);
3694 } else if (myexponent==0x7ff && mysignificand!=0) {
3695 category = fcNaN;
3696 exponent = exponentNaN();
3697 *significandParts() = mysignificand;
3698 } else {
3699 category = fcNormal;
3700 exponent = myexponent - 1023;
3701 *significandParts() = mysignificand;
3702 if (myexponent==0) // denormal
3703 exponent = -1022;
3704 else
3705 *significandParts() |= 0x10000000000000LL; // integer bit
3706 }
3707 }
3708
initFromFloatAPInt(const APInt & api)3709 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3710 uint32_t i = (uint32_t)*api.getRawData();
3711 uint32_t myexponent = (i >> 23) & 0xff;
3712 uint32_t mysignificand = i & 0x7fffff;
3713
3714 initialize(&semIEEEsingle);
3715 assert(partCount()==1);
3716
3717 sign = i >> 31;
3718 if (myexponent==0 && mysignificand==0) {
3719 makeZero(sign);
3720 } else if (myexponent==0xff && mysignificand==0) {
3721 makeInf(sign);
3722 } else if (myexponent==0xff && mysignificand!=0) {
3723 category = fcNaN;
3724 exponent = exponentNaN();
3725 *significandParts() = mysignificand;
3726 } else {
3727 category = fcNormal;
3728 exponent = myexponent - 127; //bias
3729 *significandParts() = mysignificand;
3730 if (myexponent==0) // denormal
3731 exponent = -126;
3732 else
3733 *significandParts() |= 0x800000; // integer bit
3734 }
3735 }
3736
initFromBFloatAPInt(const APInt & api)3737 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3738 uint32_t i = (uint32_t)*api.getRawData();
3739 uint32_t myexponent = (i >> 7) & 0xff;
3740 uint32_t mysignificand = i & 0x7f;
3741
3742 initialize(&semBFloat);
3743 assert(partCount() == 1);
3744
3745 sign = i >> 15;
3746 if (myexponent == 0 && mysignificand == 0) {
3747 makeZero(sign);
3748 } else if (myexponent == 0xff && mysignificand == 0) {
3749 makeInf(sign);
3750 } else if (myexponent == 0xff && mysignificand != 0) {
3751 category = fcNaN;
3752 exponent = exponentNaN();
3753 *significandParts() = mysignificand;
3754 } else {
3755 category = fcNormal;
3756 exponent = myexponent - 127; // bias
3757 *significandParts() = mysignificand;
3758 if (myexponent == 0) // denormal
3759 exponent = -126;
3760 else
3761 *significandParts() |= 0x80; // integer bit
3762 }
3763 }
3764
initFromHalfAPInt(const APInt & api)3765 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3766 uint32_t i = (uint32_t)*api.getRawData();
3767 uint32_t myexponent = (i >> 10) & 0x1f;
3768 uint32_t mysignificand = i & 0x3ff;
3769
3770 initialize(&semIEEEhalf);
3771 assert(partCount()==1);
3772
3773 sign = i >> 15;
3774 if (myexponent==0 && mysignificand==0) {
3775 makeZero(sign);
3776 } else if (myexponent==0x1f && mysignificand==0) {
3777 makeInf(sign);
3778 } else if (myexponent==0x1f && mysignificand!=0) {
3779 category = fcNaN;
3780 exponent = exponentNaN();
3781 *significandParts() = mysignificand;
3782 } else {
3783 category = fcNormal;
3784 exponent = myexponent - 15; //bias
3785 *significandParts() = mysignificand;
3786 if (myexponent==0) // denormal
3787 exponent = -14;
3788 else
3789 *significandParts() |= 0x400; // integer bit
3790 }
3791 }
3792
initFromFloat8E5M2APInt(const APInt & api)3793 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3794 uint32_t i = (uint32_t)*api.getRawData();
3795 uint32_t myexponent = (i >> 2) & 0x1f;
3796 uint32_t mysignificand = i & 0x3;
3797
3798 initialize(&semFloat8E5M2);
3799 assert(partCount() == 1);
3800
3801 sign = i >> 7;
3802 if (myexponent == 0 && mysignificand == 0) {
3803 makeZero(sign);
3804 } else if (myexponent == 0x1f && mysignificand == 0) {
3805 makeInf(sign);
3806 } else if (myexponent == 0x1f && mysignificand != 0) {
3807 category = fcNaN;
3808 exponent = exponentNaN();
3809 *significandParts() = mysignificand;
3810 } else {
3811 category = fcNormal;
3812 exponent = myexponent - 15; // bias
3813 *significandParts() = mysignificand;
3814 if (myexponent == 0) // denormal
3815 exponent = -14;
3816 else
3817 *significandParts() |= 0x4; // integer bit
3818 }
3819 }
3820
initFromFloat8E4M3FNAPInt(const APInt & api)3821 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3822 uint32_t i = (uint32_t)*api.getRawData();
3823 uint32_t myexponent = (i >> 3) & 0xf;
3824 uint32_t mysignificand = i & 0x7;
3825
3826 initialize(&semFloat8E4M3FN);
3827 assert(partCount() == 1);
3828
3829 sign = i >> 7;
3830 if (myexponent == 0 && mysignificand == 0) {
3831 makeZero(sign);
3832 } else if (myexponent == 0xf && mysignificand == 7) {
3833 category = fcNaN;
3834 exponent = exponentNaN();
3835 *significandParts() = mysignificand;
3836 } else {
3837 category = fcNormal;
3838 exponent = myexponent - 7; // bias
3839 *significandParts() = mysignificand;
3840 if (myexponent == 0) // denormal
3841 exponent = -6;
3842 else
3843 *significandParts() |= 0x8; // integer bit
3844 }
3845 }
3846
3847 /// Treat api as containing the bits of a floating point number.
initFromAPInt(const fltSemantics * Sem,const APInt & api)3848 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3849 assert(api.getBitWidth() == Sem->sizeInBits);
3850 if (Sem == &semIEEEhalf)
3851 return initFromHalfAPInt(api);
3852 if (Sem == &semBFloat)
3853 return initFromBFloatAPInt(api);
3854 if (Sem == &semIEEEsingle)
3855 return initFromFloatAPInt(api);
3856 if (Sem == &semIEEEdouble)
3857 return initFromDoubleAPInt(api);
3858 if (Sem == &semX87DoubleExtended)
3859 return initFromF80LongDoubleAPInt(api);
3860 if (Sem == &semIEEEquad)
3861 return initFromQuadrupleAPInt(api);
3862 if (Sem == &semPPCDoubleDoubleLegacy)
3863 return initFromPPCDoubleDoubleAPInt(api);
3864 if (Sem == &semFloat8E5M2)
3865 return initFromFloat8E5M2APInt(api);
3866 if (Sem == &semFloat8E4M3FN)
3867 return initFromFloat8E4M3FNAPInt(api);
3868
3869 llvm_unreachable(nullptr);
3870 }
3871
3872 /// Make this number the largest magnitude normal number in the given
3873 /// semantics.
makeLargest(bool Negative)3874 void IEEEFloat::makeLargest(bool Negative) {
3875 // We want (in interchange format):
3876 // sign = {Negative}
3877 // exponent = 1..10
3878 // significand = 1..1
3879 category = fcNormal;
3880 sign = Negative;
3881 exponent = semantics->maxExponent;
3882
3883 // Use memset to set all but the highest integerPart to all ones.
3884 integerPart *significand = significandParts();
3885 unsigned PartCount = partCount();
3886 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3887
3888 // Set the high integerPart especially setting all unused top bits for
3889 // internal consistency.
3890 const unsigned NumUnusedHighBits =
3891 PartCount*integerPartWidth - semantics->precision;
3892 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3893 ? (~integerPart(0) >> NumUnusedHighBits)
3894 : 0;
3895
3896 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
3897 significand[0] &= ~integerPart(1);
3898 }
3899
3900 /// Make this number the smallest magnitude denormal number in the given
3901 /// semantics.
makeSmallest(bool Negative)3902 void IEEEFloat::makeSmallest(bool Negative) {
3903 // We want (in interchange format):
3904 // sign = {Negative}
3905 // exponent = 0..0
3906 // significand = 0..01
3907 category = fcNormal;
3908 sign = Negative;
3909 exponent = semantics->minExponent;
3910 APInt::tcSet(significandParts(), 1, partCount());
3911 }
3912
makeSmallestNormalized(bool Negative)3913 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3914 // We want (in interchange format):
3915 // sign = {Negative}
3916 // exponent = 0..0
3917 // significand = 10..0
3918
3919 category = fcNormal;
3920 zeroSignificand();
3921 sign = Negative;
3922 exponent = semantics->minExponent;
3923 APInt::tcSetBit(significandParts(), semantics->precision - 1);
3924 }
3925
IEEEFloat(const fltSemantics & Sem,const APInt & API)3926 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3927 initFromAPInt(&Sem, API);
3928 }
3929
IEEEFloat(float f)3930 IEEEFloat::IEEEFloat(float f) {
3931 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3932 }
3933
IEEEFloat(double d)3934 IEEEFloat::IEEEFloat(double d) {
3935 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3936 }
3937
3938 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3939 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3940 Buffer.append(Str.begin(), Str.end());
3941 }
3942
3943 /// Removes data from the given significand until it is no more
3944 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3945 void AdjustToPrecision(APInt &significand,
3946 int &exp, unsigned FormatPrecision) {
3947 unsigned bits = significand.getActiveBits();
3948
3949 // 196/59 is a very slight overestimate of lg_2(10).
3950 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3951
3952 if (bits <= bitsRequired) return;
3953
3954 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3955 if (!tensRemovable) return;
3956
3957 exp += tensRemovable;
3958
3959 APInt divisor(significand.getBitWidth(), 1);
3960 APInt powten(significand.getBitWidth(), 10);
3961 while (true) {
3962 if (tensRemovable & 1)
3963 divisor *= powten;
3964 tensRemovable >>= 1;
3965 if (!tensRemovable) break;
3966 powten *= powten;
3967 }
3968
3969 significand = significand.udiv(divisor);
3970
3971 // Truncate the significand down to its active bit count.
3972 significand = significand.trunc(significand.getActiveBits());
3973 }
3974
3975
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3976 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3977 int &exp, unsigned FormatPrecision) {
3978 unsigned N = buffer.size();
3979 if (N <= FormatPrecision) return;
3980
3981 // The most significant figures are the last ones in the buffer.
3982 unsigned FirstSignificant = N - FormatPrecision;
3983
3984 // Round.
3985 // FIXME: this probably shouldn't use 'round half up'.
3986
3987 // Rounding down is just a truncation, except we also want to drop
3988 // trailing zeros from the new result.
3989 if (buffer[FirstSignificant - 1] < '5') {
3990 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3991 FirstSignificant++;
3992
3993 exp += FirstSignificant;
3994 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3995 return;
3996 }
3997
3998 // Rounding up requires a decimal add-with-carry. If we continue
3999 // the carry, the newly-introduced zeros will just be truncated.
4000 for (unsigned I = FirstSignificant; I != N; ++I) {
4001 if (buffer[I] == '9') {
4002 FirstSignificant++;
4003 } else {
4004 buffer[I]++;
4005 break;
4006 }
4007 }
4008
4009 // If we carried through, we have exactly one digit of precision.
4010 if (FirstSignificant == N) {
4011 exp += FirstSignificant;
4012 buffer.clear();
4013 buffer.push_back('1');
4014 return;
4015 }
4016
4017 exp += FirstSignificant;
4018 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4019 }
4020 } // namespace
4021
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4022 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4023 unsigned FormatMaxPadding, bool TruncateZero) const {
4024 switch (category) {
4025 case fcInfinity:
4026 if (isNegative())
4027 return append(Str, "-Inf");
4028 else
4029 return append(Str, "+Inf");
4030
4031 case fcNaN: return append(Str, "NaN");
4032
4033 case fcZero:
4034 if (isNegative())
4035 Str.push_back('-');
4036
4037 if (!FormatMaxPadding) {
4038 if (TruncateZero)
4039 append(Str, "0.0E+0");
4040 else {
4041 append(Str, "0.0");
4042 if (FormatPrecision > 1)
4043 Str.append(FormatPrecision - 1, '0');
4044 append(Str, "e+00");
4045 }
4046 } else
4047 Str.push_back('0');
4048 return;
4049
4050 case fcNormal:
4051 break;
4052 }
4053
4054 if (isNegative())
4055 Str.push_back('-');
4056
4057 // Decompose the number into an APInt and an exponent.
4058 int exp = exponent - ((int) semantics->precision - 1);
4059 APInt significand(
4060 semantics->precision,
4061 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4062
4063 // Set FormatPrecision if zero. We want to do this before we
4064 // truncate trailing zeros, as those are part of the precision.
4065 if (!FormatPrecision) {
4066 // We use enough digits so the number can be round-tripped back to an
4067 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4068 // Accurately" by Steele and White.
4069 // FIXME: Using a formula based purely on the precision is conservative;
4070 // we can print fewer digits depending on the actual value being printed.
4071
4072 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4073 FormatPrecision = 2 + semantics->precision * 59 / 196;
4074 }
4075
4076 // Ignore trailing binary zeros.
4077 int trailingZeros = significand.countTrailingZeros();
4078 exp += trailingZeros;
4079 significand.lshrInPlace(trailingZeros);
4080
4081 // Change the exponent from 2^e to 10^e.
4082 if (exp == 0) {
4083 // Nothing to do.
4084 } else if (exp > 0) {
4085 // Just shift left.
4086 significand = significand.zext(semantics->precision + exp);
4087 significand <<= exp;
4088 exp = 0;
4089 } else { /* exp < 0 */
4090 int texp = -exp;
4091
4092 // We transform this using the identity:
4093 // (N)(2^-e) == (N)(5^e)(10^-e)
4094 // This means we have to multiply N (the significand) by 5^e.
4095 // To avoid overflow, we have to operate on numbers large
4096 // enough to store N * 5^e:
4097 // log2(N * 5^e) == log2(N) + e * log2(5)
4098 // <= semantics->precision + e * 137 / 59
4099 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4100
4101 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4102
4103 // Multiply significand by 5^e.
4104 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4105 significand = significand.zext(precision);
4106 APInt five_to_the_i(precision, 5);
4107 while (true) {
4108 if (texp & 1) significand *= five_to_the_i;
4109
4110 texp >>= 1;
4111 if (!texp) break;
4112 five_to_the_i *= five_to_the_i;
4113 }
4114 }
4115
4116 AdjustToPrecision(significand, exp, FormatPrecision);
4117
4118 SmallVector<char, 256> buffer;
4119
4120 // Fill the buffer.
4121 unsigned precision = significand.getBitWidth();
4122 if (precision < 4) {
4123 // We need enough precision to store the value 10.
4124 precision = 4;
4125 significand = significand.zext(precision);
4126 }
4127 APInt ten(precision, 10);
4128 APInt digit(precision, 0);
4129
4130 bool inTrail = true;
4131 while (significand != 0) {
4132 // digit <- significand % 10
4133 // significand <- significand / 10
4134 APInt::udivrem(significand, ten, significand, digit);
4135
4136 unsigned d = digit.getZExtValue();
4137
4138 // Drop trailing zeros.
4139 if (inTrail && !d) exp++;
4140 else {
4141 buffer.push_back((char) ('0' + d));
4142 inTrail = false;
4143 }
4144 }
4145
4146 assert(!buffer.empty() && "no characters in buffer!");
4147
4148 // Drop down to FormatPrecision.
4149 // TODO: don't do more precise calculations above than are required.
4150 AdjustToPrecision(buffer, exp, FormatPrecision);
4151
4152 unsigned NDigits = buffer.size();
4153
4154 // Check whether we should use scientific notation.
4155 bool FormatScientific;
4156 if (!FormatMaxPadding)
4157 FormatScientific = true;
4158 else {
4159 if (exp >= 0) {
4160 // 765e3 --> 765000
4161 // ^^^
4162 // But we shouldn't make the number look more precise than it is.
4163 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4164 NDigits + (unsigned) exp > FormatPrecision);
4165 } else {
4166 // Power of the most significant digit.
4167 int MSD = exp + (int) (NDigits - 1);
4168 if (MSD >= 0) {
4169 // 765e-2 == 7.65
4170 FormatScientific = false;
4171 } else {
4172 // 765e-5 == 0.00765
4173 // ^ ^^
4174 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4175 }
4176 }
4177 }
4178
4179 // Scientific formatting is pretty straightforward.
4180 if (FormatScientific) {
4181 exp += (NDigits - 1);
4182
4183 Str.push_back(buffer[NDigits-1]);
4184 Str.push_back('.');
4185 if (NDigits == 1 && TruncateZero)
4186 Str.push_back('0');
4187 else
4188 for (unsigned I = 1; I != NDigits; ++I)
4189 Str.push_back(buffer[NDigits-1-I]);
4190 // Fill with zeros up to FormatPrecision.
4191 if (!TruncateZero && FormatPrecision > NDigits - 1)
4192 Str.append(FormatPrecision - NDigits + 1, '0');
4193 // For !TruncateZero we use lower 'e'.
4194 Str.push_back(TruncateZero ? 'E' : 'e');
4195
4196 Str.push_back(exp >= 0 ? '+' : '-');
4197 if (exp < 0) exp = -exp;
4198 SmallVector<char, 6> expbuf;
4199 do {
4200 expbuf.push_back((char) ('0' + (exp % 10)));
4201 exp /= 10;
4202 } while (exp);
4203 // Exponent always at least two digits if we do not truncate zeros.
4204 if (!TruncateZero && expbuf.size() < 2)
4205 expbuf.push_back('0');
4206 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4207 Str.push_back(expbuf[E-1-I]);
4208 return;
4209 }
4210
4211 // Non-scientific, positive exponents.
4212 if (exp >= 0) {
4213 for (unsigned I = 0; I != NDigits; ++I)
4214 Str.push_back(buffer[NDigits-1-I]);
4215 for (unsigned I = 0; I != (unsigned) exp; ++I)
4216 Str.push_back('0');
4217 return;
4218 }
4219
4220 // Non-scientific, negative exponents.
4221
4222 // The number of digits to the left of the decimal point.
4223 int NWholeDigits = exp + (int) NDigits;
4224
4225 unsigned I = 0;
4226 if (NWholeDigits > 0) {
4227 for (; I != (unsigned) NWholeDigits; ++I)
4228 Str.push_back(buffer[NDigits-I-1]);
4229 Str.push_back('.');
4230 } else {
4231 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4232
4233 Str.push_back('0');
4234 Str.push_back('.');
4235 for (unsigned Z = 1; Z != NZeros; ++Z)
4236 Str.push_back('0');
4237 }
4238
4239 for (; I != NDigits; ++I)
4240 Str.push_back(buffer[NDigits-I-1]);
4241 }
4242
getExactInverse(APFloat * inv) const4243 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4244 // Special floats and denormals have no exact inverse.
4245 if (!isFiniteNonZero())
4246 return false;
4247
4248 // Check that the number is a power of two by making sure that only the
4249 // integer bit is set in the significand.
4250 if (significandLSB() != semantics->precision - 1)
4251 return false;
4252
4253 // Get the inverse.
4254 IEEEFloat reciprocal(*semantics, 1ULL);
4255 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4256 return false;
4257
4258 // Avoid multiplication with a denormal, it is not safe on all platforms and
4259 // may be slower than a normal division.
4260 if (reciprocal.isDenormal())
4261 return false;
4262
4263 assert(reciprocal.isFiniteNonZero() &&
4264 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4265
4266 if (inv)
4267 *inv = APFloat(reciprocal, *semantics);
4268
4269 return true;
4270 }
4271
isSignaling() const4272 bool IEEEFloat::isSignaling() const {
4273 if (!isNaN())
4274 return false;
4275 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4276 return false;
4277
4278 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4279 // first bit of the trailing significand being 0.
4280 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4281 }
4282
4283 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4284 ///
4285 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4286 /// appropriate sign switching before/after the computation.
next(bool nextDown)4287 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4288 // If we are performing nextDown, swap sign so we have -x.
4289 if (nextDown)
4290 changeSign();
4291
4292 // Compute nextUp(x)
4293 opStatus result = opOK;
4294
4295 // Handle each float category separately.
4296 switch (category) {
4297 case fcInfinity:
4298 // nextUp(+inf) = +inf
4299 if (!isNegative())
4300 break;
4301 // nextUp(-inf) = -getLargest()
4302 makeLargest(true);
4303 break;
4304 case fcNaN:
4305 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4306 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4307 // change the payload.
4308 if (isSignaling()) {
4309 result = opInvalidOp;
4310 // For consistency, propagate the sign of the sNaN to the qNaN.
4311 makeNaN(false, isNegative(), nullptr);
4312 }
4313 break;
4314 case fcZero:
4315 // nextUp(pm 0) = +getSmallest()
4316 makeSmallest(false);
4317 break;
4318 case fcNormal:
4319 // nextUp(-getSmallest()) = -0
4320 if (isSmallest() && isNegative()) {
4321 APInt::tcSet(significandParts(), 0, partCount());
4322 category = fcZero;
4323 exponent = 0;
4324 break;
4325 }
4326
4327 if (isLargest() && !isNegative()) {
4328 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4329 // nextUp(getLargest()) == NAN
4330 makeNaN();
4331 break;
4332 } else {
4333 // nextUp(getLargest()) == INFINITY
4334 APInt::tcSet(significandParts(), 0, partCount());
4335 category = fcInfinity;
4336 exponent = semantics->maxExponent + 1;
4337 break;
4338 }
4339 }
4340
4341 // nextUp(normal) == normal + inc.
4342 if (isNegative()) {
4343 // If we are negative, we need to decrement the significand.
4344
4345 // We only cross a binade boundary that requires adjusting the exponent
4346 // if:
4347 // 1. exponent != semantics->minExponent. This implies we are not in the
4348 // smallest binade or are dealing with denormals.
4349 // 2. Our significand excluding the integral bit is all zeros.
4350 bool WillCrossBinadeBoundary =
4351 exponent != semantics->minExponent && isSignificandAllZeros();
4352
4353 // Decrement the significand.
4354 //
4355 // We always do this since:
4356 // 1. If we are dealing with a non-binade decrement, by definition we
4357 // just decrement the significand.
4358 // 2. If we are dealing with a normal -> normal binade decrement, since
4359 // we have an explicit integral bit the fact that all bits but the
4360 // integral bit are zero implies that subtracting one will yield a
4361 // significand with 0 integral bit and 1 in all other spots. Thus we
4362 // must just adjust the exponent and set the integral bit to 1.
4363 // 3. If we are dealing with a normal -> denormal binade decrement,
4364 // since we set the integral bit to 0 when we represent denormals, we
4365 // just decrement the significand.
4366 integerPart *Parts = significandParts();
4367 APInt::tcDecrement(Parts, partCount());
4368
4369 if (WillCrossBinadeBoundary) {
4370 // Our result is a normal number. Do the following:
4371 // 1. Set the integral bit to 1.
4372 // 2. Decrement the exponent.
4373 APInt::tcSetBit(Parts, semantics->precision - 1);
4374 exponent--;
4375 }
4376 } else {
4377 // If we are positive, we need to increment the significand.
4378
4379 // We only cross a binade boundary that requires adjusting the exponent if
4380 // the input is not a denormal and all of said input's significand bits
4381 // are set. If all of said conditions are true: clear the significand, set
4382 // the integral bit to 1, and increment the exponent. If we have a
4383 // denormal always increment since moving denormals and the numbers in the
4384 // smallest normal binade have the same exponent in our representation.
4385 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4386
4387 if (WillCrossBinadeBoundary) {
4388 integerPart *Parts = significandParts();
4389 APInt::tcSet(Parts, 0, partCount());
4390 APInt::tcSetBit(Parts, semantics->precision - 1);
4391 assert(exponent != semantics->maxExponent &&
4392 "We can not increment an exponent beyond the maxExponent allowed"
4393 " by the given floating point semantics.");
4394 exponent++;
4395 } else {
4396 incrementSignificand();
4397 }
4398 }
4399 break;
4400 }
4401
4402 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4403 if (nextDown)
4404 changeSign();
4405
4406 return result;
4407 }
4408
exponentNaN() const4409 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4410 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4411 return semantics->maxExponent;
4412 return semantics->maxExponent + 1;
4413 }
4414
exponentInf() const4415 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4416 return semantics->maxExponent + 1;
4417 }
4418
exponentZero() const4419 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4420 return semantics->minExponent - 1;
4421 }
4422
makeInf(bool Negative)4423 void IEEEFloat::makeInf(bool Negative) {
4424 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4425 // There is no Inf, so make NaN instead.
4426 makeNaN(false, Negative);
4427 return;
4428 }
4429 category = fcInfinity;
4430 sign = Negative;
4431 exponent = exponentInf();
4432 APInt::tcSet(significandParts(), 0, partCount());
4433 }
4434
makeZero(bool Negative)4435 void IEEEFloat::makeZero(bool Negative) {
4436 category = fcZero;
4437 sign = Negative;
4438 exponent = exponentZero();
4439 APInt::tcSet(significandParts(), 0, partCount());
4440 }
4441
makeQuiet()4442 void IEEEFloat::makeQuiet() {
4443 assert(isNaN());
4444 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4445 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4446 }
4447
ilogb(const IEEEFloat & Arg)4448 int ilogb(const IEEEFloat &Arg) {
4449 if (Arg.isNaN())
4450 return IEEEFloat::IEK_NaN;
4451 if (Arg.isZero())
4452 return IEEEFloat::IEK_Zero;
4453 if (Arg.isInfinity())
4454 return IEEEFloat::IEK_Inf;
4455 if (!Arg.isDenormal())
4456 return Arg.exponent;
4457
4458 IEEEFloat Normalized(Arg);
4459 int SignificandBits = Arg.getSemantics().precision - 1;
4460
4461 Normalized.exponent += SignificandBits;
4462 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4463 return Normalized.exponent - SignificandBits;
4464 }
4465
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)4466 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4467 auto MaxExp = X.getSemantics().maxExponent;
4468 auto MinExp = X.getSemantics().minExponent;
4469
4470 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4471 // overflow; clamp it to a safe range before adding, but ensure that the range
4472 // is large enough that the clamp does not change the result. The range we
4473 // need to support is the difference between the largest possible exponent and
4474 // the normalized exponent of half the smallest denormal.
4475
4476 int SignificandBits = X.getSemantics().precision - 1;
4477 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4478
4479 // Clamp to one past the range ends to let normalize handle overlflow.
4480 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
4481 X.normalize(RoundingMode, lfExactlyZero);
4482 if (X.isNaN())
4483 X.makeQuiet();
4484 return X;
4485 }
4486
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)4487 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4488 Exp = ilogb(Val);
4489
4490 // Quiet signalling nans.
4491 if (Exp == IEEEFloat::IEK_NaN) {
4492 IEEEFloat Quiet(Val);
4493 Quiet.makeQuiet();
4494 return Quiet;
4495 }
4496
4497 if (Exp == IEEEFloat::IEK_Inf)
4498 return Val;
4499
4500 // 1 is added because frexp is defined to return a normalized fraction in
4501 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4502 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4503 return scalbn(Val, -Exp, RM);
4504 }
4505
DoubleAPFloat(const fltSemantics & S)4506 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4507 : Semantics(&S),
4508 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4509 assert(Semantics == &semPPCDoubleDouble);
4510 }
4511
DoubleAPFloat(const fltSemantics & S,uninitializedTag)4512 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4513 : Semantics(&S),
4514 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4515 APFloat(semIEEEdouble, uninitialized)}) {
4516 assert(Semantics == &semPPCDoubleDouble);
4517 }
4518
DoubleAPFloat(const fltSemantics & S,integerPart I)4519 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4520 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4521 APFloat(semIEEEdouble)}) {
4522 assert(Semantics == &semPPCDoubleDouble);
4523 }
4524
DoubleAPFloat(const fltSemantics & S,const APInt & I)4525 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4526 : Semantics(&S),
4527 Floats(new APFloat[2]{
4528 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4529 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4530 assert(Semantics == &semPPCDoubleDouble);
4531 }
4532
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)4533 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4534 APFloat &&Second)
4535 : Semantics(&S),
4536 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4537 assert(Semantics == &semPPCDoubleDouble);
4538 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4539 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4540 }
4541
DoubleAPFloat(const DoubleAPFloat & RHS)4542 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4543 : Semantics(RHS.Semantics),
4544 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4545 APFloat(RHS.Floats[1])}
4546 : nullptr) {
4547 assert(Semantics == &semPPCDoubleDouble);
4548 }
4549
DoubleAPFloat(DoubleAPFloat && RHS)4550 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4551 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4552 RHS.Semantics = &semBogus;
4553 assert(Semantics == &semPPCDoubleDouble);
4554 }
4555
operator =(const DoubleAPFloat & RHS)4556 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4557 if (Semantics == RHS.Semantics && RHS.Floats) {
4558 Floats[0] = RHS.Floats[0];
4559 Floats[1] = RHS.Floats[1];
4560 } else if (this != &RHS) {
4561 this->~DoubleAPFloat();
4562 new (this) DoubleAPFloat(RHS);
4563 }
4564 return *this;
4565 }
4566
4567 // Implement addition, subtraction, multiplication and division based on:
4568 // "Software for Doubled-Precision Floating-Point Computations",
4569 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)4570 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4571 const APFloat &c, const APFloat &cc,
4572 roundingMode RM) {
4573 int Status = opOK;
4574 APFloat z = a;
4575 Status |= z.add(c, RM);
4576 if (!z.isFinite()) {
4577 if (!z.isInfinity()) {
4578 Floats[0] = std::move(z);
4579 Floats[1].makeZero(/* Neg = */ false);
4580 return (opStatus)Status;
4581 }
4582 Status = opOK;
4583 auto AComparedToC = a.compareAbsoluteValue(c);
4584 z = cc;
4585 Status |= z.add(aa, RM);
4586 if (AComparedToC == APFloat::cmpGreaterThan) {
4587 // z = cc + aa + c + a;
4588 Status |= z.add(c, RM);
4589 Status |= z.add(a, RM);
4590 } else {
4591 // z = cc + aa + a + c;
4592 Status |= z.add(a, RM);
4593 Status |= z.add(c, RM);
4594 }
4595 if (!z.isFinite()) {
4596 Floats[0] = std::move(z);
4597 Floats[1].makeZero(/* Neg = */ false);
4598 return (opStatus)Status;
4599 }
4600 Floats[0] = z;
4601 APFloat zz = aa;
4602 Status |= zz.add(cc, RM);
4603 if (AComparedToC == APFloat::cmpGreaterThan) {
4604 // Floats[1] = a - z + c + zz;
4605 Floats[1] = a;
4606 Status |= Floats[1].subtract(z, RM);
4607 Status |= Floats[1].add(c, RM);
4608 Status |= Floats[1].add(zz, RM);
4609 } else {
4610 // Floats[1] = c - z + a + zz;
4611 Floats[1] = c;
4612 Status |= Floats[1].subtract(z, RM);
4613 Status |= Floats[1].add(a, RM);
4614 Status |= Floats[1].add(zz, RM);
4615 }
4616 } else {
4617 // q = a - z;
4618 APFloat q = a;
4619 Status |= q.subtract(z, RM);
4620
4621 // zz = q + c + (a - (q + z)) + aa + cc;
4622 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4623 auto zz = q;
4624 Status |= zz.add(c, RM);
4625 Status |= q.add(z, RM);
4626 Status |= q.subtract(a, RM);
4627 q.changeSign();
4628 Status |= zz.add(q, RM);
4629 Status |= zz.add(aa, RM);
4630 Status |= zz.add(cc, RM);
4631 if (zz.isZero() && !zz.isNegative()) {
4632 Floats[0] = std::move(z);
4633 Floats[1].makeZero(/* Neg = */ false);
4634 return opOK;
4635 }
4636 Floats[0] = z;
4637 Status |= Floats[0].add(zz, RM);
4638 if (!Floats[0].isFinite()) {
4639 Floats[1].makeZero(/* Neg = */ false);
4640 return (opStatus)Status;
4641 }
4642 Floats[1] = std::move(z);
4643 Status |= Floats[1].subtract(Floats[0], RM);
4644 Status |= Floats[1].add(zz, RM);
4645 }
4646 return (opStatus)Status;
4647 }
4648
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4649 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4650 const DoubleAPFloat &RHS,
4651 DoubleAPFloat &Out,
4652 roundingMode RM) {
4653 if (LHS.getCategory() == fcNaN) {
4654 Out = LHS;
4655 return opOK;
4656 }
4657 if (RHS.getCategory() == fcNaN) {
4658 Out = RHS;
4659 return opOK;
4660 }
4661 if (LHS.getCategory() == fcZero) {
4662 Out = RHS;
4663 return opOK;
4664 }
4665 if (RHS.getCategory() == fcZero) {
4666 Out = LHS;
4667 return opOK;
4668 }
4669 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4670 LHS.isNegative() != RHS.isNegative()) {
4671 Out.makeNaN(false, Out.isNegative(), nullptr);
4672 return opInvalidOp;
4673 }
4674 if (LHS.getCategory() == fcInfinity) {
4675 Out = LHS;
4676 return opOK;
4677 }
4678 if (RHS.getCategory() == fcInfinity) {
4679 Out = RHS;
4680 return opOK;
4681 }
4682 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4683
4684 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4685 CC(RHS.Floats[1]);
4686 assert(&A.getSemantics() == &semIEEEdouble);
4687 assert(&AA.getSemantics() == &semIEEEdouble);
4688 assert(&C.getSemantics() == &semIEEEdouble);
4689 assert(&CC.getSemantics() == &semIEEEdouble);
4690 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4691 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4692 return Out.addImpl(A, AA, C, CC, RM);
4693 }
4694
add(const DoubleAPFloat & RHS,roundingMode RM)4695 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4696 roundingMode RM) {
4697 return addWithSpecial(*this, RHS, *this, RM);
4698 }
4699
subtract(const DoubleAPFloat & RHS,roundingMode RM)4700 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4701 roundingMode RM) {
4702 changeSign();
4703 auto Ret = add(RHS, RM);
4704 changeSign();
4705 return Ret;
4706 }
4707
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4708 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4709 APFloat::roundingMode RM) {
4710 const auto &LHS = *this;
4711 auto &Out = *this;
4712 /* Interesting observation: For special categories, finding the lowest
4713 common ancestor of the following layered graph gives the correct
4714 return category:
4715
4716 NaN
4717 / \
4718 Zero Inf
4719 \ /
4720 Normal
4721
4722 e.g. NaN * NaN = NaN
4723 Zero * Inf = NaN
4724 Normal * Zero = Zero
4725 Normal * Inf = Inf
4726 */
4727 if (LHS.getCategory() == fcNaN) {
4728 Out = LHS;
4729 return opOK;
4730 }
4731 if (RHS.getCategory() == fcNaN) {
4732 Out = RHS;
4733 return opOK;
4734 }
4735 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4736 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4737 Out.makeNaN(false, false, nullptr);
4738 return opOK;
4739 }
4740 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4741 Out = LHS;
4742 return opOK;
4743 }
4744 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4745 Out = RHS;
4746 return opOK;
4747 }
4748 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4749 "Special cases not handled exhaustively");
4750
4751 int Status = opOK;
4752 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4753 // t = a * c
4754 APFloat T = A;
4755 Status |= T.multiply(C, RM);
4756 if (!T.isFiniteNonZero()) {
4757 Floats[0] = T;
4758 Floats[1].makeZero(/* Neg = */ false);
4759 return (opStatus)Status;
4760 }
4761
4762 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4763 APFloat Tau = A;
4764 T.changeSign();
4765 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4766 T.changeSign();
4767 {
4768 // v = a * d
4769 APFloat V = A;
4770 Status |= V.multiply(D, RM);
4771 // w = b * c
4772 APFloat W = B;
4773 Status |= W.multiply(C, RM);
4774 Status |= V.add(W, RM);
4775 // tau += v + w
4776 Status |= Tau.add(V, RM);
4777 }
4778 // u = t + tau
4779 APFloat U = T;
4780 Status |= U.add(Tau, RM);
4781
4782 Floats[0] = U;
4783 if (!U.isFinite()) {
4784 Floats[1].makeZero(/* Neg = */ false);
4785 } else {
4786 // Floats[1] = (t - u) + tau
4787 Status |= T.subtract(U, RM);
4788 Status |= T.add(Tau, RM);
4789 Floats[1] = T;
4790 }
4791 return (opStatus)Status;
4792 }
4793
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4794 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4795 APFloat::roundingMode RM) {
4796 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4797 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4798 auto Ret =
4799 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4800 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4801 return Ret;
4802 }
4803
remainder(const DoubleAPFloat & RHS)4804 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4805 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4806 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4807 auto Ret =
4808 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4809 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4810 return Ret;
4811 }
4812
mod(const DoubleAPFloat & RHS)4813 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4814 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4815 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4816 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4817 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4818 return Ret;
4819 }
4820
4821 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4822 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4823 const DoubleAPFloat &Addend,
4824 APFloat::roundingMode RM) {
4825 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4826 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4827 auto Ret = Tmp.fusedMultiplyAdd(
4828 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4829 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4830 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4831 return Ret;
4832 }
4833
roundToIntegral(APFloat::roundingMode RM)4834 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4835 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4836 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4837 auto Ret = Tmp.roundToIntegral(RM);
4838 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4839 return Ret;
4840 }
4841
changeSign()4842 void DoubleAPFloat::changeSign() {
4843 Floats[0].changeSign();
4844 Floats[1].changeSign();
4845 }
4846
4847 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4848 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4849 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4850 if (Result != cmpEqual)
4851 return Result;
4852 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4853 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4854 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4855 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4856 if (Against && !RHSAgainst)
4857 return cmpLessThan;
4858 if (!Against && RHSAgainst)
4859 return cmpGreaterThan;
4860 if (!Against && !RHSAgainst)
4861 return Result;
4862 if (Against && RHSAgainst)
4863 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4864 }
4865 return Result;
4866 }
4867
getCategory() const4868 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4869 return Floats[0].getCategory();
4870 }
4871
isNegative() const4872 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4873
makeInf(bool Neg)4874 void DoubleAPFloat::makeInf(bool Neg) {
4875 Floats[0].makeInf(Neg);
4876 Floats[1].makeZero(/* Neg = */ false);
4877 }
4878
makeZero(bool Neg)4879 void DoubleAPFloat::makeZero(bool Neg) {
4880 Floats[0].makeZero(Neg);
4881 Floats[1].makeZero(/* Neg = */ false);
4882 }
4883
makeLargest(bool Neg)4884 void DoubleAPFloat::makeLargest(bool Neg) {
4885 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4886 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4887 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4888 if (Neg)
4889 changeSign();
4890 }
4891
makeSmallest(bool Neg)4892 void DoubleAPFloat::makeSmallest(bool Neg) {
4893 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4894 Floats[0].makeSmallest(Neg);
4895 Floats[1].makeZero(/* Neg = */ false);
4896 }
4897
makeSmallestNormalized(bool Neg)4898 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4899 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4900 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4901 if (Neg)
4902 Floats[0].changeSign();
4903 Floats[1].makeZero(/* Neg = */ false);
4904 }
4905
makeNaN(bool SNaN,bool Neg,const APInt * fill)4906 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4907 Floats[0].makeNaN(SNaN, Neg, fill);
4908 Floats[1].makeZero(/* Neg = */ false);
4909 }
4910
compare(const DoubleAPFloat & RHS) const4911 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4912 auto Result = Floats[0].compare(RHS.Floats[0]);
4913 // |Float[0]| > |Float[1]|
4914 if (Result == APFloat::cmpEqual)
4915 return Floats[1].compare(RHS.Floats[1]);
4916 return Result;
4917 }
4918
bitwiseIsEqual(const DoubleAPFloat & RHS) const4919 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4920 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4921 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4922 }
4923
hash_value(const DoubleAPFloat & Arg)4924 hash_code hash_value(const DoubleAPFloat &Arg) {
4925 if (Arg.Floats)
4926 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4927 return hash_combine(Arg.Semantics);
4928 }
4929
bitcastToAPInt() const4930 APInt DoubleAPFloat::bitcastToAPInt() const {
4931 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4932 uint64_t Data[] = {
4933 Floats[0].bitcastToAPInt().getRawData()[0],
4934 Floats[1].bitcastToAPInt().getRawData()[0],
4935 };
4936 return APInt(128, 2, Data);
4937 }
4938
convertFromString(StringRef S,roundingMode RM)4939 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4940 roundingMode RM) {
4941 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942 APFloat Tmp(semPPCDoubleDoubleLegacy);
4943 auto Ret = Tmp.convertFromString(S, RM);
4944 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4945 return Ret;
4946 }
4947
next(bool nextDown)4948 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4949 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4950 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4951 auto Ret = Tmp.next(nextDown);
4952 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4953 return Ret;
4954 }
4955
4956 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4957 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4958 unsigned int Width, bool IsSigned,
4959 roundingMode RM, bool *IsExact) const {
4960 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4961 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4962 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4963 }
4964
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4965 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4966 bool IsSigned,
4967 roundingMode RM) {
4968 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4969 APFloat Tmp(semPPCDoubleDoubleLegacy);
4970 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4971 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4972 return Ret;
4973 }
4974
4975 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4976 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4977 unsigned int InputSize,
4978 bool IsSigned, roundingMode RM) {
4979 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4980 APFloat Tmp(semPPCDoubleDoubleLegacy);
4981 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4982 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4983 return Ret;
4984 }
4985
4986 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4987 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4988 unsigned int InputSize,
4989 bool IsSigned, roundingMode RM) {
4990 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4991 APFloat Tmp(semPPCDoubleDoubleLegacy);
4992 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4993 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4994 return Ret;
4995 }
4996
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4997 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4998 unsigned int HexDigits,
4999 bool UpperCase,
5000 roundingMode RM) const {
5001 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5002 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5003 .convertToHexString(DST, HexDigits, UpperCase, RM);
5004 }
5005
isDenormal() const5006 bool DoubleAPFloat::isDenormal() const {
5007 return getCategory() == fcNormal &&
5008 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5009 // (double)(Hi + Lo) == Hi defines a normal number.
5010 Floats[0] != Floats[0] + Floats[1]);
5011 }
5012
isSmallest() const5013 bool DoubleAPFloat::isSmallest() const {
5014 if (getCategory() != fcNormal)
5015 return false;
5016 DoubleAPFloat Tmp(*this);
5017 Tmp.makeSmallest(this->isNegative());
5018 return Tmp.compare(*this) == cmpEqual;
5019 }
5020
isSmallestNormalized() const5021 bool DoubleAPFloat::isSmallestNormalized() const {
5022 if (getCategory() != fcNormal)
5023 return false;
5024
5025 DoubleAPFloat Tmp(*this);
5026 Tmp.makeSmallestNormalized(this->isNegative());
5027 return Tmp.compare(*this) == cmpEqual;
5028 }
5029
isLargest() const5030 bool DoubleAPFloat::isLargest() const {
5031 if (getCategory() != fcNormal)
5032 return false;
5033 DoubleAPFloat Tmp(*this);
5034 Tmp.makeLargest(this->isNegative());
5035 return Tmp.compare(*this) == cmpEqual;
5036 }
5037
isInteger() const5038 bool DoubleAPFloat::isInteger() const {
5039 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5040 return Floats[0].isInteger() && Floats[1].isInteger();
5041 }
5042
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const5043 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5044 unsigned FormatPrecision,
5045 unsigned FormatMaxPadding,
5046 bool TruncateZero) const {
5047 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5048 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5049 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5050 }
5051
getExactInverse(APFloat * inv) const5052 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5053 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5054 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5055 if (!inv)
5056 return Tmp.getExactInverse(nullptr);
5057 APFloat Inv(semPPCDoubleDoubleLegacy);
5058 auto Ret = Tmp.getExactInverse(&Inv);
5059 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5060 return Ret;
5061 }
5062
scalbn(const DoubleAPFloat & Arg,int Exp,APFloat::roundingMode RM)5063 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5064 APFloat::roundingMode RM) {
5065 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5066 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5067 scalbn(Arg.Floats[1], Exp, RM));
5068 }
5069
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)5070 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5071 APFloat::roundingMode RM) {
5072 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5073 APFloat First = frexp(Arg.Floats[0], Exp, RM);
5074 APFloat Second = Arg.Floats[1];
5075 if (Arg.getCategory() == APFloat::fcNormal)
5076 Second = scalbn(Second, -Exp, RM);
5077 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5078 }
5079
5080 } // namespace detail
5081
Storage(IEEEFloat F,const fltSemantics & Semantics)5082 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5083 if (usesLayout<IEEEFloat>(Semantics)) {
5084 new (&IEEE) IEEEFloat(std::move(F));
5085 return;
5086 }
5087 if (usesLayout<DoubleAPFloat>(Semantics)) {
5088 const fltSemantics& S = F.getSemantics();
5089 new (&Double)
5090 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5091 APFloat(semIEEEdouble));
5092 return;
5093 }
5094 llvm_unreachable("Unexpected semantics");
5095 }
5096
convertFromString(StringRef Str,roundingMode RM)5097 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5098 roundingMode RM) {
5099 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5100 }
5101
hash_value(const APFloat & Arg)5102 hash_code hash_value(const APFloat &Arg) {
5103 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5104 return hash_value(Arg.U.IEEE);
5105 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5106 return hash_value(Arg.U.Double);
5107 llvm_unreachable("Unexpected semantics");
5108 }
5109
APFloat(const fltSemantics & Semantics,StringRef S)5110 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5111 : APFloat(Semantics) {
5112 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5113 assert(StatusOrErr && "Invalid floating point representation");
5114 consumeError(StatusOrErr.takeError());
5115 }
5116
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)5117 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5118 roundingMode RM, bool *losesInfo) {
5119 if (&getSemantics() == &ToSemantics) {
5120 *losesInfo = false;
5121 return opOK;
5122 }
5123 if (usesLayout<IEEEFloat>(getSemantics()) &&
5124 usesLayout<IEEEFloat>(ToSemantics))
5125 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5126 if (usesLayout<IEEEFloat>(getSemantics()) &&
5127 usesLayout<DoubleAPFloat>(ToSemantics)) {
5128 assert(&ToSemantics == &semPPCDoubleDouble);
5129 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5130 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5131 return Ret;
5132 }
5133 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5134 usesLayout<IEEEFloat>(ToSemantics)) {
5135 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5136 *this = APFloat(std::move(getIEEE()), ToSemantics);
5137 return Ret;
5138 }
5139 llvm_unreachable("Unexpected semantics");
5140 }
5141
getAllOnesValue(const fltSemantics & Semantics)5142 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5143 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5144 }
5145
print(raw_ostream & OS) const5146 void APFloat::print(raw_ostream &OS) const {
5147 SmallVector<char, 16> Buffer;
5148 toString(Buffer);
5149 OS << Buffer << "\n";
5150 }
5151
5152 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const5153 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5154 #endif
5155
Profile(FoldingSetNodeID & NID) const5156 void APFloat::Profile(FoldingSetNodeID &NID) const {
5157 NID.Add(bitcastToAPInt());
5158 }
5159
5160 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5161 an APSInt, whose initial bit-width and signed-ness are used to determine the
5162 precision of the conversion.
5163 */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const5164 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5165 roundingMode rounding_mode,
5166 bool *isExact) const {
5167 unsigned bitWidth = result.getBitWidth();
5168 SmallVector<uint64_t, 4> parts(result.getNumWords());
5169 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5170 rounding_mode, isExact);
5171 // Keeps the original signed-ness.
5172 result = APInt(bitWidth, parts);
5173 return status;
5174 }
5175
convertToDouble() const5176 double APFloat::convertToDouble() const {
5177 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5178 return getIEEE().convertToDouble();
5179 assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5180 "Float semantics is not representable by IEEEdouble");
5181 APFloat Temp = *this;
5182 bool LosesInfo;
5183 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5184 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5185 (void)St;
5186 return Temp.getIEEE().convertToDouble();
5187 }
5188
convertToFloat() const5189 float APFloat::convertToFloat() const {
5190 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5191 return getIEEE().convertToFloat();
5192 assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5193 "Float semantics is not representable by IEEEsingle");
5194 APFloat Temp = *this;
5195 bool LosesInfo;
5196 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5197 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5198 (void)St;
5199 return Temp.getIEEE().convertToFloat();
5200 }
5201
5202 } // namespace llvm
5203
5204 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5205