1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
28
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
30 do { \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
36 } while (false)
37
38 using namespace llvm;
39
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
47
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
51
52 namespace llvm {
53 /* Represents floating point arithmetic semantics. */
54 struct fltSemantics {
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
58
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
62
63 /* Number of bits in the significand. This includes the integer
64 bit. */
65 unsigned int precision;
66
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
69 };
70
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semBFloat = {127, -126, 8, 16};
73 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
74 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
75 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
76 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
77 static const fltSemantics semBogus = {0, 0, 0, 0};
78
79 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
80 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
81 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
82 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
83 to each other, and two 11-bit exponents.
84
85 Note: we need to make the value different from semBogus as otherwise
86 an unsafe optimization may collapse both values to a single address,
87 and we heavily rely on them having distinct addresses. */
88 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89
90 /* These are legacy semantics for the fallback, inaccrurate implementation of
91 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
92 operation. It's equivalent to having an IEEE number with consecutive 106
93 bits of mantissa and 11 bits of exponent.
94
95 It's not equivalent to IBM double-double. For example, a legit IBM
96 double-double, 1 + epsilon:
97
98 1 + epsilon = 1 + (1 >> 1076)
99
100 is not representable by a consecutive 106 bits of mantissa.
101
102 Currently, these semantics are used in the following way:
103
104 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
105 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
106 semPPCDoubleDoubleLegacy -> IEEE operations
107
108 We use bitcastToAPInt() to get the bit representation (in APInt) of the
109 underlying IEEEdouble, then use the APInt constructor to construct the
110 legacy IEEE float.
111
112 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 semantics. */
114 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
115 53 + 53, 128};
116
EnumToSemantics(Semantics S)117 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
118 switch (S) {
119 case S_IEEEhalf:
120 return IEEEhalf();
121 case S_BFloat:
122 return BFloat();
123 case S_IEEEsingle:
124 return IEEEsingle();
125 case S_IEEEdouble:
126 return IEEEdouble();
127 case S_x87DoubleExtended:
128 return x87DoubleExtended();
129 case S_IEEEquad:
130 return IEEEquad();
131 case S_PPCDoubleDouble:
132 return PPCDoubleDouble();
133 }
134 llvm_unreachable("Unrecognised floating semantics");
135 }
136
137 APFloatBase::Semantics
SemanticsToEnum(const llvm::fltSemantics & Sem)138 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
139 if (&Sem == &llvm::APFloat::IEEEhalf())
140 return S_IEEEhalf;
141 else if (&Sem == &llvm::APFloat::BFloat())
142 return S_BFloat;
143 else if (&Sem == &llvm::APFloat::IEEEsingle())
144 return S_IEEEsingle;
145 else if (&Sem == &llvm::APFloat::IEEEdouble())
146 return S_IEEEdouble;
147 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
148 return S_x87DoubleExtended;
149 else if (&Sem == &llvm::APFloat::IEEEquad())
150 return S_IEEEquad;
151 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
152 return S_PPCDoubleDouble;
153 else
154 llvm_unreachable("Unknown floating semantics");
155 }
156
IEEEhalf()157 const fltSemantics &APFloatBase::IEEEhalf() {
158 return semIEEEhalf;
159 }
BFloat()160 const fltSemantics &APFloatBase::BFloat() {
161 return semBFloat;
162 }
IEEEsingle()163 const fltSemantics &APFloatBase::IEEEsingle() {
164 return semIEEEsingle;
165 }
IEEEdouble()166 const fltSemantics &APFloatBase::IEEEdouble() {
167 return semIEEEdouble;
168 }
IEEEquad()169 const fltSemantics &APFloatBase::IEEEquad() {
170 return semIEEEquad;
171 }
x87DoubleExtended()172 const fltSemantics &APFloatBase::x87DoubleExtended() {
173 return semX87DoubleExtended;
174 }
Bogus()175 const fltSemantics &APFloatBase::Bogus() {
176 return semBogus;
177 }
PPCDoubleDouble()178 const fltSemantics &APFloatBase::PPCDoubleDouble() {
179 return semPPCDoubleDouble;
180 }
181
182 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
183 constexpr RoundingMode APFloatBase::rmTowardPositive;
184 constexpr RoundingMode APFloatBase::rmTowardNegative;
185 constexpr RoundingMode APFloatBase::rmTowardZero;
186 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
187
188 /* A tight upper bound on number of parts required to hold the value
189 pow(5, power) is
190
191 power * 815 / (351 * integerPartWidth) + 1
192
193 However, whilst the result may require only this many parts,
194 because we are multiplying two values to get it, the
195 multiplication may require an extra part with the excess part
196 being zero (consider the trivial case of 1 * 1, tcFullMultiply
197 requires two parts to hold the single-part result). So we add an
198 extra one to guarantee enough space whilst multiplying. */
199 const unsigned int maxExponent = 16383;
200 const unsigned int maxPrecision = 113;
201 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
202 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
203
semanticsPrecision(const fltSemantics & semantics)204 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
205 return semantics.precision;
206 }
207 APFloatBase::ExponentType
semanticsMaxExponent(const fltSemantics & semantics)208 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
209 return semantics.maxExponent;
210 }
211 APFloatBase::ExponentType
semanticsMinExponent(const fltSemantics & semantics)212 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
213 return semantics.minExponent;
214 }
semanticsSizeInBits(const fltSemantics & semantics)215 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
216 return semantics.sizeInBits;
217 }
218
getSizeInBits(const fltSemantics & Sem)219 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
220 return Sem.sizeInBits;
221 }
222
223 /* A bunch of private, handy routines. */
224
createError(const Twine & Err)225 static inline Error createError(const Twine &Err) {
226 return make_error<StringError>(Err, inconvertibleErrorCode());
227 }
228
229 static inline unsigned int
partCountForBits(unsigned int bits)230 partCountForBits(unsigned int bits)
231 {
232 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
233 }
234
235 /* Returns 0U-9U. Return values >= 10U are not digits. */
236 static inline unsigned int
decDigitValue(unsigned int c)237 decDigitValue(unsigned int c)
238 {
239 return c - '0';
240 }
241
242 /* Return the value of a decimal exponent of the form
243 [+-]ddddddd.
244
245 If the exponent overflows, returns a large exponent with the
246 appropriate sign. */
readExponent(StringRef::iterator begin,StringRef::iterator end)247 static Expected<int> readExponent(StringRef::iterator begin,
248 StringRef::iterator end) {
249 bool isNegative;
250 unsigned int absExponent;
251 const unsigned int overlargeExponent = 24000; /* FIXME. */
252 StringRef::iterator p = begin;
253
254 // Treat no exponent as 0 to match binutils
255 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
256 return 0;
257 }
258
259 isNegative = (*p == '-');
260 if (*p == '-' || *p == '+') {
261 p++;
262 if (p == end)
263 return createError("Exponent has no digits");
264 }
265
266 absExponent = decDigitValue(*p++);
267 if (absExponent >= 10U)
268 return createError("Invalid character in exponent");
269
270 for (; p != end; ++p) {
271 unsigned int value;
272
273 value = decDigitValue(*p);
274 if (value >= 10U)
275 return createError("Invalid character in exponent");
276
277 absExponent = absExponent * 10U + value;
278 if (absExponent >= overlargeExponent) {
279 absExponent = overlargeExponent;
280 break;
281 }
282 }
283
284 if (isNegative)
285 return -(int) absExponent;
286 else
287 return (int) absExponent;
288 }
289
290 /* This is ugly and needs cleaning up, but I don't immediately see
291 how whilst remaining safe. */
totalExponent(StringRef::iterator p,StringRef::iterator end,int exponentAdjustment)292 static Expected<int> totalExponent(StringRef::iterator p,
293 StringRef::iterator end,
294 int exponentAdjustment) {
295 int unsignedExponent;
296 bool negative, overflow;
297 int exponent = 0;
298
299 if (p == end)
300 return createError("Exponent has no digits");
301
302 negative = *p == '-';
303 if (*p == '-' || *p == '+') {
304 p++;
305 if (p == end)
306 return createError("Exponent has no digits");
307 }
308
309 unsignedExponent = 0;
310 overflow = false;
311 for (; p != end; ++p) {
312 unsigned int value;
313
314 value = decDigitValue(*p);
315 if (value >= 10U)
316 return createError("Invalid character in exponent");
317
318 unsignedExponent = unsignedExponent * 10 + value;
319 if (unsignedExponent > 32767) {
320 overflow = true;
321 break;
322 }
323 }
324
325 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
326 overflow = true;
327
328 if (!overflow) {
329 exponent = unsignedExponent;
330 if (negative)
331 exponent = -exponent;
332 exponent += exponentAdjustment;
333 if (exponent > 32767 || exponent < -32768)
334 overflow = true;
335 }
336
337 if (overflow)
338 exponent = negative ? -32768: 32767;
339
340 return exponent;
341 }
342
343 static Expected<StringRef::iterator>
skipLeadingZeroesAndAnyDot(StringRef::iterator begin,StringRef::iterator end,StringRef::iterator * dot)344 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
345 StringRef::iterator *dot) {
346 StringRef::iterator p = begin;
347 *dot = end;
348 while (p != end && *p == '0')
349 p++;
350
351 if (p != end && *p == '.') {
352 *dot = p++;
353
354 if (end - begin == 1)
355 return createError("Significand has no digits");
356
357 while (p != end && *p == '0')
358 p++;
359 }
360
361 return p;
362 }
363
364 /* Given a normal decimal floating point number of the form
365
366 dddd.dddd[eE][+-]ddd
367
368 where the decimal point and exponent are optional, fill out the
369 structure D. Exponent is appropriate if the significand is
370 treated as an integer, and normalizedExponent if the significand
371 is taken to have the decimal point after a single leading
372 non-zero digit.
373
374 If the value is zero, V->firstSigDigit points to a non-digit, and
375 the return exponent is zero.
376 */
377 struct decimalInfo {
378 const char *firstSigDigit;
379 const char *lastSigDigit;
380 int exponent;
381 int normalizedExponent;
382 };
383
interpretDecimal(StringRef::iterator begin,StringRef::iterator end,decimalInfo * D)384 static Error interpretDecimal(StringRef::iterator begin,
385 StringRef::iterator end, decimalInfo *D) {
386 StringRef::iterator dot = end;
387
388 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
389 if (!PtrOrErr)
390 return PtrOrErr.takeError();
391 StringRef::iterator p = *PtrOrErr;
392
393 D->firstSigDigit = p;
394 D->exponent = 0;
395 D->normalizedExponent = 0;
396
397 for (; p != end; ++p) {
398 if (*p == '.') {
399 if (dot != end)
400 return createError("String contains multiple dots");
401 dot = p++;
402 if (p == end)
403 break;
404 }
405 if (decDigitValue(*p) >= 10U)
406 break;
407 }
408
409 if (p != end) {
410 if (*p != 'e' && *p != 'E')
411 return createError("Invalid character in significand");
412 if (p == begin)
413 return createError("Significand has no digits");
414 if (dot != end && p - begin == 1)
415 return createError("Significand has no digits");
416
417 /* p points to the first non-digit in the string */
418 auto ExpOrErr = readExponent(p + 1, end);
419 if (!ExpOrErr)
420 return ExpOrErr.takeError();
421 D->exponent = *ExpOrErr;
422
423 /* Implied decimal point? */
424 if (dot == end)
425 dot = p;
426 }
427
428 /* If number is all zeroes accept any exponent. */
429 if (p != D->firstSigDigit) {
430 /* Drop insignificant trailing zeroes. */
431 if (p != begin) {
432 do
433 do
434 p--;
435 while (p != begin && *p == '0');
436 while (p != begin && *p == '.');
437 }
438
439 /* Adjust the exponents for any decimal point. */
440 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
441 D->normalizedExponent = (D->exponent +
442 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
443 - (dot > D->firstSigDigit && dot < p)));
444 }
445
446 D->lastSigDigit = p;
447 return Error::success();
448 }
449
450 /* Return the trailing fraction of a hexadecimal number.
451 DIGITVALUE is the first hex digit of the fraction, P points to
452 the next digit. */
453 static Expected<lostFraction>
trailingHexadecimalFraction(StringRef::iterator p,StringRef::iterator end,unsigned int digitValue)454 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
455 unsigned int digitValue) {
456 unsigned int hexDigit;
457
458 /* If the first trailing digit isn't 0 or 8 we can work out the
459 fraction immediately. */
460 if (digitValue > 8)
461 return lfMoreThanHalf;
462 else if (digitValue < 8 && digitValue > 0)
463 return lfLessThanHalf;
464
465 // Otherwise we need to find the first non-zero digit.
466 while (p != end && (*p == '0' || *p == '.'))
467 p++;
468
469 if (p == end)
470 return createError("Invalid trailing hexadecimal fraction!");
471
472 hexDigit = hexDigitValue(*p);
473
474 /* If we ran off the end it is exactly zero or one-half, otherwise
475 a little more. */
476 if (hexDigit == -1U)
477 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
478 else
479 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
480 }
481
482 /* Return the fraction lost were a bignum truncated losing the least
483 significant BITS bits. */
484 static lostFraction
lostFractionThroughTruncation(const APFloatBase::integerPart * parts,unsigned int partCount,unsigned int bits)485 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
486 unsigned int partCount,
487 unsigned int bits)
488 {
489 unsigned int lsb;
490
491 lsb = APInt::tcLSB(parts, partCount);
492
493 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
494 if (bits <= lsb)
495 return lfExactlyZero;
496 if (bits == lsb + 1)
497 return lfExactlyHalf;
498 if (bits <= partCount * APFloatBase::integerPartWidth &&
499 APInt::tcExtractBit(parts, bits - 1))
500 return lfMoreThanHalf;
501
502 return lfLessThanHalf;
503 }
504
505 /* Shift DST right BITS bits noting lost fraction. */
506 static lostFraction
shiftRight(APFloatBase::integerPart * dst,unsigned int parts,unsigned int bits)507 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
508 {
509 lostFraction lost_fraction;
510
511 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
512
513 APInt::tcShiftRight(dst, parts, bits);
514
515 return lost_fraction;
516 }
517
518 /* Combine the effect of two lost fractions. */
519 static lostFraction
combineLostFractions(lostFraction moreSignificant,lostFraction lessSignificant)520 combineLostFractions(lostFraction moreSignificant,
521 lostFraction lessSignificant)
522 {
523 if (lessSignificant != lfExactlyZero) {
524 if (moreSignificant == lfExactlyZero)
525 moreSignificant = lfLessThanHalf;
526 else if (moreSignificant == lfExactlyHalf)
527 moreSignificant = lfMoreThanHalf;
528 }
529
530 return moreSignificant;
531 }
532
533 /* The error from the true value, in half-ulps, on multiplying two
534 floating point numbers, which differ from the value they
535 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
536 than the returned value.
537
538 See "How to Read Floating Point Numbers Accurately" by William D
539 Clinger. */
540 static unsigned int
HUerrBound(bool inexactMultiply,unsigned int HUerr1,unsigned int HUerr2)541 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
542 {
543 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
544
545 if (HUerr1 + HUerr2 == 0)
546 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
547 else
548 return inexactMultiply + 2 * (HUerr1 + HUerr2);
549 }
550
551 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
552 when the least significant BITS are truncated. BITS cannot be
553 zero. */
554 static APFloatBase::integerPart
ulpsFromBoundary(const APFloatBase::integerPart * parts,unsigned int bits,bool isNearest)555 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
556 bool isNearest) {
557 unsigned int count, partBits;
558 APFloatBase::integerPart part, boundary;
559
560 assert(bits != 0);
561
562 bits--;
563 count = bits / APFloatBase::integerPartWidth;
564 partBits = bits % APFloatBase::integerPartWidth + 1;
565
566 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
567
568 if (isNearest)
569 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
570 else
571 boundary = 0;
572
573 if (count == 0) {
574 if (part - boundary <= boundary - part)
575 return part - boundary;
576 else
577 return boundary - part;
578 }
579
580 if (part == boundary) {
581 while (--count)
582 if (parts[count])
583 return ~(APFloatBase::integerPart) 0; /* A lot. */
584
585 return parts[0];
586 } else if (part == boundary - 1) {
587 while (--count)
588 if (~parts[count])
589 return ~(APFloatBase::integerPart) 0; /* A lot. */
590
591 return -parts[0];
592 }
593
594 return ~(APFloatBase::integerPart) 0; /* A lot. */
595 }
596
597 /* Place pow(5, power) in DST, and return the number of parts used.
598 DST must be at least one part larger than size of the answer. */
599 static unsigned int
powerOf5(APFloatBase::integerPart * dst,unsigned int power)600 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
601 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
602 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
603 pow5s[0] = 78125 * 5;
604
605 unsigned int partsCount[16] = { 1 };
606 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
607 unsigned int result;
608 assert(power <= maxExponent);
609
610 p1 = dst;
611 p2 = scratch;
612
613 *p1 = firstEightPowers[power & 7];
614 power >>= 3;
615
616 result = 1;
617 pow5 = pow5s;
618
619 for (unsigned int n = 0; power; power >>= 1, n++) {
620 unsigned int pc;
621
622 pc = partsCount[n];
623
624 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
625 if (pc == 0) {
626 pc = partsCount[n - 1];
627 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
628 pc *= 2;
629 if (pow5[pc - 1] == 0)
630 pc--;
631 partsCount[n] = pc;
632 }
633
634 if (power & 1) {
635 APFloatBase::integerPart *tmp;
636
637 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
638 result += pc;
639 if (p2[result - 1] == 0)
640 result--;
641
642 /* Now result is in p1 with partsCount parts and p2 is scratch
643 space. */
644 tmp = p1;
645 p1 = p2;
646 p2 = tmp;
647 }
648
649 pow5 += pc;
650 }
651
652 if (p1 != dst)
653 APInt::tcAssign(dst, p1, result);
654
655 return result;
656 }
657
658 /* Zero at the end to avoid modular arithmetic when adding one; used
659 when rounding up during hexadecimal output. */
660 static const char hexDigitsLower[] = "0123456789abcdef0";
661 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
662 static const char infinityL[] = "infinity";
663 static const char infinityU[] = "INFINITY";
664 static const char NaNL[] = "nan";
665 static const char NaNU[] = "NAN";
666
667 /* Write out an integerPart in hexadecimal, starting with the most
668 significant nibble. Write out exactly COUNT hexdigits, return
669 COUNT. */
670 static unsigned int
partAsHex(char * dst,APFloatBase::integerPart part,unsigned int count,const char * hexDigitChars)671 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
672 const char *hexDigitChars)
673 {
674 unsigned int result = count;
675
676 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
677
678 part >>= (APFloatBase::integerPartWidth - 4 * count);
679 while (count--) {
680 dst[count] = hexDigitChars[part & 0xf];
681 part >>= 4;
682 }
683
684 return result;
685 }
686
687 /* Write out an unsigned decimal integer. */
688 static char *
writeUnsignedDecimal(char * dst,unsigned int n)689 writeUnsignedDecimal (char *dst, unsigned int n)
690 {
691 char buff[40], *p;
692
693 p = buff;
694 do
695 *p++ = '0' + n % 10;
696 while (n /= 10);
697
698 do
699 *dst++ = *--p;
700 while (p != buff);
701
702 return dst;
703 }
704
705 /* Write out a signed decimal integer. */
706 static char *
writeSignedDecimal(char * dst,int value)707 writeSignedDecimal (char *dst, int value)
708 {
709 if (value < 0) {
710 *dst++ = '-';
711 dst = writeUnsignedDecimal(dst, -(unsigned) value);
712 } else
713 dst = writeUnsignedDecimal(dst, value);
714
715 return dst;
716 }
717
718 namespace detail {
719 /* Constructors. */
initialize(const fltSemantics * ourSemantics)720 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
721 unsigned int count;
722
723 semantics = ourSemantics;
724 count = partCount();
725 if (count > 1)
726 significand.parts = new integerPart[count];
727 }
728
freeSignificand()729 void IEEEFloat::freeSignificand() {
730 if (needsCleanup())
731 delete [] significand.parts;
732 }
733
assign(const IEEEFloat & rhs)734 void IEEEFloat::assign(const IEEEFloat &rhs) {
735 assert(semantics == rhs.semantics);
736
737 sign = rhs.sign;
738 category = rhs.category;
739 exponent = rhs.exponent;
740 if (isFiniteNonZero() || category == fcNaN)
741 copySignificand(rhs);
742 }
743
copySignificand(const IEEEFloat & rhs)744 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
745 assert(isFiniteNonZero() || category == fcNaN);
746 assert(rhs.partCount() >= partCount());
747
748 APInt::tcAssign(significandParts(), rhs.significandParts(),
749 partCount());
750 }
751
752 /* Make this number a NaN, with an arbitrary but deterministic value
753 for the significand. If double or longer, this is a signalling NaN,
754 which may not be ideal. If float, this is QNaN(0). */
makeNaN(bool SNaN,bool Negative,const APInt * fill)755 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
756 category = fcNaN;
757 sign = Negative;
758
759 integerPart *significand = significandParts();
760 unsigned numParts = partCount();
761
762 // Set the significand bits to the fill.
763 if (!fill || fill->getNumWords() < numParts)
764 APInt::tcSet(significand, 0, numParts);
765 if (fill) {
766 APInt::tcAssign(significand, fill->getRawData(),
767 std::min(fill->getNumWords(), numParts));
768
769 // Zero out the excess bits of the significand.
770 unsigned bitsToPreserve = semantics->precision - 1;
771 unsigned part = bitsToPreserve / 64;
772 bitsToPreserve %= 64;
773 significand[part] &= ((1ULL << bitsToPreserve) - 1);
774 for (part++; part != numParts; ++part)
775 significand[part] = 0;
776 }
777
778 unsigned QNaNBit = semantics->precision - 2;
779
780 if (SNaN) {
781 // We always have to clear the QNaN bit to make it an SNaN.
782 APInt::tcClearBit(significand, QNaNBit);
783
784 // If there are no bits set in the payload, we have to set
785 // *something* to make it a NaN instead of an infinity;
786 // conventionally, this is the next bit down from the QNaN bit.
787 if (APInt::tcIsZero(significand, numParts))
788 APInt::tcSetBit(significand, QNaNBit - 1);
789 } else {
790 // We always have to set the QNaN bit to make it a QNaN.
791 APInt::tcSetBit(significand, QNaNBit);
792 }
793
794 // For x87 extended precision, we want to make a NaN, not a
795 // pseudo-NaN. Maybe we should expose the ability to make
796 // pseudo-NaNs?
797 if (semantics == &semX87DoubleExtended)
798 APInt::tcSetBit(significand, QNaNBit + 1);
799 }
800
operator =(const IEEEFloat & rhs)801 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
802 if (this != &rhs) {
803 if (semantics != rhs.semantics) {
804 freeSignificand();
805 initialize(rhs.semantics);
806 }
807 assign(rhs);
808 }
809
810 return *this;
811 }
812
operator =(IEEEFloat && rhs)813 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
814 freeSignificand();
815
816 semantics = rhs.semantics;
817 significand = rhs.significand;
818 exponent = rhs.exponent;
819 category = rhs.category;
820 sign = rhs.sign;
821
822 rhs.semantics = &semBogus;
823 return *this;
824 }
825
isDenormal() const826 bool IEEEFloat::isDenormal() const {
827 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
828 (APInt::tcExtractBit(significandParts(),
829 semantics->precision - 1) == 0);
830 }
831
isSmallest() const832 bool IEEEFloat::isSmallest() const {
833 // The smallest number by magnitude in our format will be the smallest
834 // denormal, i.e. the floating point number with exponent being minimum
835 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
836 return isFiniteNonZero() && exponent == semantics->minExponent &&
837 significandMSB() == 0;
838 }
839
isSignificandAllOnes() const840 bool IEEEFloat::isSignificandAllOnes() const {
841 // Test if the significand excluding the integral bit is all ones. This allows
842 // us to test for binade boundaries.
843 const integerPart *Parts = significandParts();
844 const unsigned PartCount = partCount();
845 for (unsigned i = 0; i < PartCount - 1; i++)
846 if (~Parts[i])
847 return false;
848
849 // Set the unused high bits to all ones when we compare.
850 const unsigned NumHighBits =
851 PartCount*integerPartWidth - semantics->precision + 1;
852 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
853 "fill than integerPartWidth");
854 const integerPart HighBitFill =
855 ~integerPart(0) << (integerPartWidth - NumHighBits);
856 if (~(Parts[PartCount - 1] | HighBitFill))
857 return false;
858
859 return true;
860 }
861
isSignificandAllZeros() const862 bool IEEEFloat::isSignificandAllZeros() const {
863 // Test if the significand excluding the integral bit is all zeros. This
864 // allows us to test for binade boundaries.
865 const integerPart *Parts = significandParts();
866 const unsigned PartCount = partCount();
867
868 for (unsigned i = 0; i < PartCount - 1; i++)
869 if (Parts[i])
870 return false;
871
872 const unsigned NumHighBits =
873 PartCount*integerPartWidth - semantics->precision + 1;
874 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
875 "clear than integerPartWidth");
876 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
877
878 if (Parts[PartCount - 1] & HighBitMask)
879 return false;
880
881 return true;
882 }
883
isLargest() const884 bool IEEEFloat::isLargest() const {
885 // The largest number by magnitude in our format will be the floating point
886 // number with maximum exponent and with significand that is all ones.
887 return isFiniteNonZero() && exponent == semantics->maxExponent
888 && isSignificandAllOnes();
889 }
890
isInteger() const891 bool IEEEFloat::isInteger() const {
892 // This could be made more efficient; I'm going for obviously correct.
893 if (!isFinite()) return false;
894 IEEEFloat truncated = *this;
895 truncated.roundToIntegral(rmTowardZero);
896 return compare(truncated) == cmpEqual;
897 }
898
bitwiseIsEqual(const IEEEFloat & rhs) const899 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
900 if (this == &rhs)
901 return true;
902 if (semantics != rhs.semantics ||
903 category != rhs.category ||
904 sign != rhs.sign)
905 return false;
906 if (category==fcZero || category==fcInfinity)
907 return true;
908
909 if (isFiniteNonZero() && exponent != rhs.exponent)
910 return false;
911
912 return std::equal(significandParts(), significandParts() + partCount(),
913 rhs.significandParts());
914 }
915
IEEEFloat(const fltSemantics & ourSemantics,integerPart value)916 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
917 initialize(&ourSemantics);
918 sign = 0;
919 category = fcNormal;
920 zeroSignificand();
921 exponent = ourSemantics.precision - 1;
922 significandParts()[0] = value;
923 normalize(rmNearestTiesToEven, lfExactlyZero);
924 }
925
IEEEFloat(const fltSemantics & ourSemantics)926 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
927 initialize(&ourSemantics);
928 category = fcZero;
929 sign = false;
930 }
931
932 // Delegate to the previous constructor, because later copy constructor may
933 // actually inspects category, which can't be garbage.
IEEEFloat(const fltSemantics & ourSemantics,uninitializedTag tag)934 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
935 : IEEEFloat(ourSemantics) {}
936
IEEEFloat(const IEEEFloat & rhs)937 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
938 initialize(rhs.semantics);
939 assign(rhs);
940 }
941
IEEEFloat(IEEEFloat && rhs)942 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
943 *this = std::move(rhs);
944 }
945
~IEEEFloat()946 IEEEFloat::~IEEEFloat() { freeSignificand(); }
947
partCount() const948 unsigned int IEEEFloat::partCount() const {
949 return partCountForBits(semantics->precision + 1);
950 }
951
significandParts() const952 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
953 return const_cast<IEEEFloat *>(this)->significandParts();
954 }
955
significandParts()956 IEEEFloat::integerPart *IEEEFloat::significandParts() {
957 if (partCount() > 1)
958 return significand.parts;
959 else
960 return &significand.part;
961 }
962
zeroSignificand()963 void IEEEFloat::zeroSignificand() {
964 APInt::tcSet(significandParts(), 0, partCount());
965 }
966
967 /* Increment an fcNormal floating point number's significand. */
incrementSignificand()968 void IEEEFloat::incrementSignificand() {
969 integerPart carry;
970
971 carry = APInt::tcIncrement(significandParts(), partCount());
972
973 /* Our callers should never cause us to overflow. */
974 assert(carry == 0);
975 (void)carry;
976 }
977
978 /* Add the significand of the RHS. Returns the carry flag. */
addSignificand(const IEEEFloat & rhs)979 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
980 integerPart *parts;
981
982 parts = significandParts();
983
984 assert(semantics == rhs.semantics);
985 assert(exponent == rhs.exponent);
986
987 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
988 }
989
990 /* Subtract the significand of the RHS with a borrow flag. Returns
991 the borrow flag. */
subtractSignificand(const IEEEFloat & rhs,integerPart borrow)992 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
993 integerPart borrow) {
994 integerPart *parts;
995
996 parts = significandParts();
997
998 assert(semantics == rhs.semantics);
999 assert(exponent == rhs.exponent);
1000
1001 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1002 partCount());
1003 }
1004
1005 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1006 on to the full-precision result of the multiplication. Returns the
1007 lost fraction. */
multiplySignificand(const IEEEFloat & rhs,IEEEFloat addend)1008 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1009 IEEEFloat addend) {
1010 unsigned int omsb; // One, not zero, based MSB.
1011 unsigned int partsCount, newPartsCount, precision;
1012 integerPart *lhsSignificand;
1013 integerPart scratch[4];
1014 integerPart *fullSignificand;
1015 lostFraction lost_fraction;
1016 bool ignored;
1017
1018 assert(semantics == rhs.semantics);
1019
1020 precision = semantics->precision;
1021
1022 // Allocate space for twice as many bits as the original significand, plus one
1023 // extra bit for the addition to overflow into.
1024 newPartsCount = partCountForBits(precision * 2 + 1);
1025
1026 if (newPartsCount > 4)
1027 fullSignificand = new integerPart[newPartsCount];
1028 else
1029 fullSignificand = scratch;
1030
1031 lhsSignificand = significandParts();
1032 partsCount = partCount();
1033
1034 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1035 rhs.significandParts(), partsCount, partsCount);
1036
1037 lost_fraction = lfExactlyZero;
1038 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1039 exponent += rhs.exponent;
1040
1041 // Assume the operands involved in the multiplication are single-precision
1042 // FP, and the two multiplicants are:
1043 // *this = a23 . a22 ... a0 * 2^e1
1044 // rhs = b23 . b22 ... b0 * 2^e2
1045 // the result of multiplication is:
1046 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1047 // Note that there are three significant bits at the left-hand side of the
1048 // radix point: two for the multiplication, and an overflow bit for the
1049 // addition (that will always be zero at this point). Move the radix point
1050 // toward left by two bits, and adjust exponent accordingly.
1051 exponent += 2;
1052
1053 if (addend.isNonZero()) {
1054 // The intermediate result of the multiplication has "2 * precision"
1055 // signicant bit; adjust the addend to be consistent with mul result.
1056 //
1057 Significand savedSignificand = significand;
1058 const fltSemantics *savedSemantics = semantics;
1059 fltSemantics extendedSemantics;
1060 opStatus status;
1061 unsigned int extendedPrecision;
1062
1063 // Normalize our MSB to one below the top bit to allow for overflow.
1064 extendedPrecision = 2 * precision + 1;
1065 if (omsb != extendedPrecision - 1) {
1066 assert(extendedPrecision > omsb);
1067 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1068 (extendedPrecision - 1) - omsb);
1069 exponent -= (extendedPrecision - 1) - omsb;
1070 }
1071
1072 /* Create new semantics. */
1073 extendedSemantics = *semantics;
1074 extendedSemantics.precision = extendedPrecision;
1075
1076 if (newPartsCount == 1)
1077 significand.part = fullSignificand[0];
1078 else
1079 significand.parts = fullSignificand;
1080 semantics = &extendedSemantics;
1081
1082 // Make a copy so we can convert it to the extended semantics.
1083 // Note that we cannot convert the addend directly, as the extendedSemantics
1084 // is a local variable (which we take a reference to).
1085 IEEEFloat extendedAddend(addend);
1086 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1087 assert(status == opOK);
1088 (void)status;
1089
1090 // Shift the significand of the addend right by one bit. This guarantees
1091 // that the high bit of the significand is zero (same as fullSignificand),
1092 // so the addition will overflow (if it does overflow at all) into the top bit.
1093 lost_fraction = extendedAddend.shiftSignificandRight(1);
1094 assert(lost_fraction == lfExactlyZero &&
1095 "Lost precision while shifting addend for fused-multiply-add.");
1096
1097 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1098
1099 /* Restore our state. */
1100 if (newPartsCount == 1)
1101 fullSignificand[0] = significand.part;
1102 significand = savedSignificand;
1103 semantics = savedSemantics;
1104
1105 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1106 }
1107
1108 // Convert the result having "2 * precision" significant-bits back to the one
1109 // having "precision" significant-bits. First, move the radix point from
1110 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1111 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1112 exponent -= precision + 1;
1113
1114 // In case MSB resides at the left-hand side of radix point, shift the
1115 // mantissa right by some amount to make sure the MSB reside right before
1116 // the radix point (i.e. "MSB . rest-significant-bits").
1117 //
1118 // Note that the result is not normalized when "omsb < precision". So, the
1119 // caller needs to call IEEEFloat::normalize() if normalized value is
1120 // expected.
1121 if (omsb > precision) {
1122 unsigned int bits, significantParts;
1123 lostFraction lf;
1124
1125 bits = omsb - precision;
1126 significantParts = partCountForBits(omsb);
1127 lf = shiftRight(fullSignificand, significantParts, bits);
1128 lost_fraction = combineLostFractions(lf, lost_fraction);
1129 exponent += bits;
1130 }
1131
1132 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1133
1134 if (newPartsCount > 4)
1135 delete [] fullSignificand;
1136
1137 return lost_fraction;
1138 }
1139
multiplySignificand(const IEEEFloat & rhs)1140 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1141 return multiplySignificand(rhs, IEEEFloat(*semantics));
1142 }
1143
1144 /* Multiply the significands of LHS and RHS to DST. */
divideSignificand(const IEEEFloat & rhs)1145 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1146 unsigned int bit, i, partsCount;
1147 const integerPart *rhsSignificand;
1148 integerPart *lhsSignificand, *dividend, *divisor;
1149 integerPart scratch[4];
1150 lostFraction lost_fraction;
1151
1152 assert(semantics == rhs.semantics);
1153
1154 lhsSignificand = significandParts();
1155 rhsSignificand = rhs.significandParts();
1156 partsCount = partCount();
1157
1158 if (partsCount > 2)
1159 dividend = new integerPart[partsCount * 2];
1160 else
1161 dividend = scratch;
1162
1163 divisor = dividend + partsCount;
1164
1165 /* Copy the dividend and divisor as they will be modified in-place. */
1166 for (i = 0; i < partsCount; i++) {
1167 dividend[i] = lhsSignificand[i];
1168 divisor[i] = rhsSignificand[i];
1169 lhsSignificand[i] = 0;
1170 }
1171
1172 exponent -= rhs.exponent;
1173
1174 unsigned int precision = semantics->precision;
1175
1176 /* Normalize the divisor. */
1177 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1178 if (bit) {
1179 exponent += bit;
1180 APInt::tcShiftLeft(divisor, partsCount, bit);
1181 }
1182
1183 /* Normalize the dividend. */
1184 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1185 if (bit) {
1186 exponent -= bit;
1187 APInt::tcShiftLeft(dividend, partsCount, bit);
1188 }
1189
1190 /* Ensure the dividend >= divisor initially for the loop below.
1191 Incidentally, this means that the division loop below is
1192 guaranteed to set the integer bit to one. */
1193 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1194 exponent--;
1195 APInt::tcShiftLeft(dividend, partsCount, 1);
1196 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1197 }
1198
1199 /* Long division. */
1200 for (bit = precision; bit; bit -= 1) {
1201 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1202 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1203 APInt::tcSetBit(lhsSignificand, bit - 1);
1204 }
1205
1206 APInt::tcShiftLeft(dividend, partsCount, 1);
1207 }
1208
1209 /* Figure out the lost fraction. */
1210 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1211
1212 if (cmp > 0)
1213 lost_fraction = lfMoreThanHalf;
1214 else if (cmp == 0)
1215 lost_fraction = lfExactlyHalf;
1216 else if (APInt::tcIsZero(dividend, partsCount))
1217 lost_fraction = lfExactlyZero;
1218 else
1219 lost_fraction = lfLessThanHalf;
1220
1221 if (partsCount > 2)
1222 delete [] dividend;
1223
1224 return lost_fraction;
1225 }
1226
significandMSB() const1227 unsigned int IEEEFloat::significandMSB() const {
1228 return APInt::tcMSB(significandParts(), partCount());
1229 }
1230
significandLSB() const1231 unsigned int IEEEFloat::significandLSB() const {
1232 return APInt::tcLSB(significandParts(), partCount());
1233 }
1234
1235 /* Note that a zero result is NOT normalized to fcZero. */
shiftSignificandRight(unsigned int bits)1236 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1237 /* Our exponent should not overflow. */
1238 assert((ExponentType) (exponent + bits) >= exponent);
1239
1240 exponent += bits;
1241
1242 return shiftRight(significandParts(), partCount(), bits);
1243 }
1244
1245 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
shiftSignificandLeft(unsigned int bits)1246 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1247 assert(bits < semantics->precision);
1248
1249 if (bits) {
1250 unsigned int partsCount = partCount();
1251
1252 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1253 exponent -= bits;
1254
1255 assert(!APInt::tcIsZero(significandParts(), partsCount));
1256 }
1257 }
1258
1259 IEEEFloat::cmpResult
compareAbsoluteValue(const IEEEFloat & rhs) const1260 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1261 int compare;
1262
1263 assert(semantics == rhs.semantics);
1264 assert(isFiniteNonZero());
1265 assert(rhs.isFiniteNonZero());
1266
1267 compare = exponent - rhs.exponent;
1268
1269 /* If exponents are equal, do an unsigned bignum comparison of the
1270 significands. */
1271 if (compare == 0)
1272 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1273 partCount());
1274
1275 if (compare > 0)
1276 return cmpGreaterThan;
1277 else if (compare < 0)
1278 return cmpLessThan;
1279 else
1280 return cmpEqual;
1281 }
1282
1283 /* Handle overflow. Sign is preserved. We either become infinity or
1284 the largest finite number. */
handleOverflow(roundingMode rounding_mode)1285 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1286 /* Infinity? */
1287 if (rounding_mode == rmNearestTiesToEven ||
1288 rounding_mode == rmNearestTiesToAway ||
1289 (rounding_mode == rmTowardPositive && !sign) ||
1290 (rounding_mode == rmTowardNegative && sign)) {
1291 category = fcInfinity;
1292 return (opStatus) (opOverflow | opInexact);
1293 }
1294
1295 /* Otherwise we become the largest finite number. */
1296 category = fcNormal;
1297 exponent = semantics->maxExponent;
1298 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1299 semantics->precision);
1300
1301 return opInexact;
1302 }
1303
1304 /* Returns TRUE if, when truncating the current number, with BIT the
1305 new LSB, with the given lost fraction and rounding mode, the result
1306 would need to be rounded away from zero (i.e., by increasing the
1307 signficand). This routine must work for fcZero of both signs, and
1308 fcNormal numbers. */
roundAwayFromZero(roundingMode rounding_mode,lostFraction lost_fraction,unsigned int bit) const1309 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1310 lostFraction lost_fraction,
1311 unsigned int bit) const {
1312 /* NaNs and infinities should not have lost fractions. */
1313 assert(isFiniteNonZero() || category == fcZero);
1314
1315 /* Current callers never pass this so we don't handle it. */
1316 assert(lost_fraction != lfExactlyZero);
1317
1318 switch (rounding_mode) {
1319 case rmNearestTiesToAway:
1320 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1321
1322 case rmNearestTiesToEven:
1323 if (lost_fraction == lfMoreThanHalf)
1324 return true;
1325
1326 /* Our zeroes don't have a significand to test. */
1327 if (lost_fraction == lfExactlyHalf && category != fcZero)
1328 return APInt::tcExtractBit(significandParts(), bit);
1329
1330 return false;
1331
1332 case rmTowardZero:
1333 return false;
1334
1335 case rmTowardPositive:
1336 return !sign;
1337
1338 case rmTowardNegative:
1339 return sign;
1340
1341 default:
1342 break;
1343 }
1344 llvm_unreachable("Invalid rounding mode found");
1345 }
1346
normalize(roundingMode rounding_mode,lostFraction lost_fraction)1347 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1348 lostFraction lost_fraction) {
1349 unsigned int omsb; /* One, not zero, based MSB. */
1350 int exponentChange;
1351
1352 if (!isFiniteNonZero())
1353 return opOK;
1354
1355 /* Before rounding normalize the exponent of fcNormal numbers. */
1356 omsb = significandMSB() + 1;
1357
1358 if (omsb) {
1359 /* OMSB is numbered from 1. We want to place it in the integer
1360 bit numbered PRECISION if possible, with a compensating change in
1361 the exponent. */
1362 exponentChange = omsb - semantics->precision;
1363
1364 /* If the resulting exponent is too high, overflow according to
1365 the rounding mode. */
1366 if (exponent + exponentChange > semantics->maxExponent)
1367 return handleOverflow(rounding_mode);
1368
1369 /* Subnormal numbers have exponent minExponent, and their MSB
1370 is forced based on that. */
1371 if (exponent + exponentChange < semantics->minExponent)
1372 exponentChange = semantics->minExponent - exponent;
1373
1374 /* Shifting left is easy as we don't lose precision. */
1375 if (exponentChange < 0) {
1376 assert(lost_fraction == lfExactlyZero);
1377
1378 shiftSignificandLeft(-exponentChange);
1379
1380 return opOK;
1381 }
1382
1383 if (exponentChange > 0) {
1384 lostFraction lf;
1385
1386 /* Shift right and capture any new lost fraction. */
1387 lf = shiftSignificandRight(exponentChange);
1388
1389 lost_fraction = combineLostFractions(lf, lost_fraction);
1390
1391 /* Keep OMSB up-to-date. */
1392 if (omsb > (unsigned) exponentChange)
1393 omsb -= exponentChange;
1394 else
1395 omsb = 0;
1396 }
1397 }
1398
1399 /* Now round the number according to rounding_mode given the lost
1400 fraction. */
1401
1402 /* As specified in IEEE 754, since we do not trap we do not report
1403 underflow for exact results. */
1404 if (lost_fraction == lfExactlyZero) {
1405 /* Canonicalize zeroes. */
1406 if (omsb == 0)
1407 category = fcZero;
1408
1409 return opOK;
1410 }
1411
1412 /* Increment the significand if we're rounding away from zero. */
1413 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1414 if (omsb == 0)
1415 exponent = semantics->minExponent;
1416
1417 incrementSignificand();
1418 omsb = significandMSB() + 1;
1419
1420 /* Did the significand increment overflow? */
1421 if (omsb == (unsigned) semantics->precision + 1) {
1422 /* Renormalize by incrementing the exponent and shifting our
1423 significand right one. However if we already have the
1424 maximum exponent we overflow to infinity. */
1425 if (exponent == semantics->maxExponent) {
1426 category = fcInfinity;
1427
1428 return (opStatus) (opOverflow | opInexact);
1429 }
1430
1431 shiftSignificandRight(1);
1432
1433 return opInexact;
1434 }
1435 }
1436
1437 /* The normal case - we were and are not denormal, and any
1438 significand increment above didn't overflow. */
1439 if (omsb == semantics->precision)
1440 return opInexact;
1441
1442 /* We have a non-zero denormal. */
1443 assert(omsb < semantics->precision);
1444
1445 /* Canonicalize zeroes. */
1446 if (omsb == 0)
1447 category = fcZero;
1448
1449 /* The fcZero case is a denormal that underflowed to zero. */
1450 return (opStatus) (opUnderflow | opInexact);
1451 }
1452
addOrSubtractSpecials(const IEEEFloat & rhs,bool subtract)1453 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1454 bool subtract) {
1455 switch (PackCategoriesIntoKey(category, rhs.category)) {
1456 default:
1457 llvm_unreachable(nullptr);
1458
1459 case PackCategoriesIntoKey(fcZero, fcNaN):
1460 case PackCategoriesIntoKey(fcNormal, fcNaN):
1461 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1462 assign(rhs);
1463 LLVM_FALLTHROUGH;
1464 case PackCategoriesIntoKey(fcNaN, fcZero):
1465 case PackCategoriesIntoKey(fcNaN, fcNormal):
1466 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1467 case PackCategoriesIntoKey(fcNaN, fcNaN):
1468 if (isSignaling()) {
1469 makeQuiet();
1470 return opInvalidOp;
1471 }
1472 return rhs.isSignaling() ? opInvalidOp : opOK;
1473
1474 case PackCategoriesIntoKey(fcNormal, fcZero):
1475 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1476 case PackCategoriesIntoKey(fcInfinity, fcZero):
1477 return opOK;
1478
1479 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1480 case PackCategoriesIntoKey(fcZero, fcInfinity):
1481 category = fcInfinity;
1482 sign = rhs.sign ^ subtract;
1483 return opOK;
1484
1485 case PackCategoriesIntoKey(fcZero, fcNormal):
1486 assign(rhs);
1487 sign = rhs.sign ^ subtract;
1488 return opOK;
1489
1490 case PackCategoriesIntoKey(fcZero, fcZero):
1491 /* Sign depends on rounding mode; handled by caller. */
1492 return opOK;
1493
1494 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1495 /* Differently signed infinities can only be validly
1496 subtracted. */
1497 if (((sign ^ rhs.sign)!=0) != subtract) {
1498 makeNaN();
1499 return opInvalidOp;
1500 }
1501
1502 return opOK;
1503
1504 case PackCategoriesIntoKey(fcNormal, fcNormal):
1505 return opDivByZero;
1506 }
1507 }
1508
1509 /* Add or subtract two normal numbers. */
addOrSubtractSignificand(const IEEEFloat & rhs,bool subtract)1510 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1511 bool subtract) {
1512 integerPart carry;
1513 lostFraction lost_fraction;
1514 int bits;
1515
1516 /* Determine if the operation on the absolute values is effectively
1517 an addition or subtraction. */
1518 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1519
1520 /* Are we bigger exponent-wise than the RHS? */
1521 bits = exponent - rhs.exponent;
1522
1523 /* Subtraction is more subtle than one might naively expect. */
1524 if (subtract) {
1525 IEEEFloat temp_rhs(rhs);
1526
1527 if (bits == 0)
1528 lost_fraction = lfExactlyZero;
1529 else if (bits > 0) {
1530 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1531 shiftSignificandLeft(1);
1532 } else {
1533 lost_fraction = shiftSignificandRight(-bits - 1);
1534 temp_rhs.shiftSignificandLeft(1);
1535 }
1536
1537 // Should we reverse the subtraction.
1538 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1539 carry = temp_rhs.subtractSignificand
1540 (*this, lost_fraction != lfExactlyZero);
1541 copySignificand(temp_rhs);
1542 sign = !sign;
1543 } else {
1544 carry = subtractSignificand
1545 (temp_rhs, lost_fraction != lfExactlyZero);
1546 }
1547
1548 /* Invert the lost fraction - it was on the RHS and
1549 subtracted. */
1550 if (lost_fraction == lfLessThanHalf)
1551 lost_fraction = lfMoreThanHalf;
1552 else if (lost_fraction == lfMoreThanHalf)
1553 lost_fraction = lfLessThanHalf;
1554
1555 /* The code above is intended to ensure that no borrow is
1556 necessary. */
1557 assert(!carry);
1558 (void)carry;
1559 } else {
1560 if (bits > 0) {
1561 IEEEFloat temp_rhs(rhs);
1562
1563 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1564 carry = addSignificand(temp_rhs);
1565 } else {
1566 lost_fraction = shiftSignificandRight(-bits);
1567 carry = addSignificand(rhs);
1568 }
1569
1570 /* We have a guard bit; generating a carry cannot happen. */
1571 assert(!carry);
1572 (void)carry;
1573 }
1574
1575 return lost_fraction;
1576 }
1577
multiplySpecials(const IEEEFloat & rhs)1578 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1579 switch (PackCategoriesIntoKey(category, rhs.category)) {
1580 default:
1581 llvm_unreachable(nullptr);
1582
1583 case PackCategoriesIntoKey(fcZero, fcNaN):
1584 case PackCategoriesIntoKey(fcNormal, fcNaN):
1585 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1586 assign(rhs);
1587 sign = false;
1588 LLVM_FALLTHROUGH;
1589 case PackCategoriesIntoKey(fcNaN, fcZero):
1590 case PackCategoriesIntoKey(fcNaN, fcNormal):
1591 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1592 case PackCategoriesIntoKey(fcNaN, fcNaN):
1593 sign ^= rhs.sign; // restore the original sign
1594 if (isSignaling()) {
1595 makeQuiet();
1596 return opInvalidOp;
1597 }
1598 return rhs.isSignaling() ? opInvalidOp : opOK;
1599
1600 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1601 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1602 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1603 category = fcInfinity;
1604 return opOK;
1605
1606 case PackCategoriesIntoKey(fcZero, fcNormal):
1607 case PackCategoriesIntoKey(fcNormal, fcZero):
1608 case PackCategoriesIntoKey(fcZero, fcZero):
1609 category = fcZero;
1610 return opOK;
1611
1612 case PackCategoriesIntoKey(fcZero, fcInfinity):
1613 case PackCategoriesIntoKey(fcInfinity, fcZero):
1614 makeNaN();
1615 return opInvalidOp;
1616
1617 case PackCategoriesIntoKey(fcNormal, fcNormal):
1618 return opOK;
1619 }
1620 }
1621
divideSpecials(const IEEEFloat & rhs)1622 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1623 switch (PackCategoriesIntoKey(category, rhs.category)) {
1624 default:
1625 llvm_unreachable(nullptr);
1626
1627 case PackCategoriesIntoKey(fcZero, fcNaN):
1628 case PackCategoriesIntoKey(fcNormal, fcNaN):
1629 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1630 assign(rhs);
1631 sign = false;
1632 LLVM_FALLTHROUGH;
1633 case PackCategoriesIntoKey(fcNaN, fcZero):
1634 case PackCategoriesIntoKey(fcNaN, fcNormal):
1635 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1636 case PackCategoriesIntoKey(fcNaN, fcNaN):
1637 sign ^= rhs.sign; // restore the original sign
1638 if (isSignaling()) {
1639 makeQuiet();
1640 return opInvalidOp;
1641 }
1642 return rhs.isSignaling() ? opInvalidOp : opOK;
1643
1644 case PackCategoriesIntoKey(fcInfinity, fcZero):
1645 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1646 case PackCategoriesIntoKey(fcZero, fcInfinity):
1647 case PackCategoriesIntoKey(fcZero, fcNormal):
1648 return opOK;
1649
1650 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1651 category = fcZero;
1652 return opOK;
1653
1654 case PackCategoriesIntoKey(fcNormal, fcZero):
1655 category = fcInfinity;
1656 return opDivByZero;
1657
1658 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1659 case PackCategoriesIntoKey(fcZero, fcZero):
1660 makeNaN();
1661 return opInvalidOp;
1662
1663 case PackCategoriesIntoKey(fcNormal, fcNormal):
1664 return opOK;
1665 }
1666 }
1667
modSpecials(const IEEEFloat & rhs)1668 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1669 switch (PackCategoriesIntoKey(category, rhs.category)) {
1670 default:
1671 llvm_unreachable(nullptr);
1672
1673 case PackCategoriesIntoKey(fcZero, fcNaN):
1674 case PackCategoriesIntoKey(fcNormal, fcNaN):
1675 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1676 assign(rhs);
1677 LLVM_FALLTHROUGH;
1678 case PackCategoriesIntoKey(fcNaN, fcZero):
1679 case PackCategoriesIntoKey(fcNaN, fcNormal):
1680 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1681 case PackCategoriesIntoKey(fcNaN, fcNaN):
1682 if (isSignaling()) {
1683 makeQuiet();
1684 return opInvalidOp;
1685 }
1686 return rhs.isSignaling() ? opInvalidOp : opOK;
1687
1688 case PackCategoriesIntoKey(fcZero, fcInfinity):
1689 case PackCategoriesIntoKey(fcZero, fcNormal):
1690 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1691 return opOK;
1692
1693 case PackCategoriesIntoKey(fcNormal, fcZero):
1694 case PackCategoriesIntoKey(fcInfinity, fcZero):
1695 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1696 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1697 case PackCategoriesIntoKey(fcZero, fcZero):
1698 makeNaN();
1699 return opInvalidOp;
1700
1701 case PackCategoriesIntoKey(fcNormal, fcNormal):
1702 return opOK;
1703 }
1704 }
1705
remainderSpecials(const IEEEFloat & rhs)1706 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1707 switch (PackCategoriesIntoKey(category, rhs.category)) {
1708 default:
1709 llvm_unreachable(nullptr);
1710
1711 case PackCategoriesIntoKey(fcZero, fcNaN):
1712 case PackCategoriesIntoKey(fcNormal, fcNaN):
1713 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1714 assign(rhs);
1715 LLVM_FALLTHROUGH;
1716 case PackCategoriesIntoKey(fcNaN, fcZero):
1717 case PackCategoriesIntoKey(fcNaN, fcNormal):
1718 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1719 case PackCategoriesIntoKey(fcNaN, fcNaN):
1720 if (isSignaling()) {
1721 makeQuiet();
1722 return opInvalidOp;
1723 }
1724 return rhs.isSignaling() ? opInvalidOp : opOK;
1725
1726 case PackCategoriesIntoKey(fcZero, fcInfinity):
1727 case PackCategoriesIntoKey(fcZero, fcNormal):
1728 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1729 return opOK;
1730
1731 case PackCategoriesIntoKey(fcNormal, fcZero):
1732 case PackCategoriesIntoKey(fcInfinity, fcZero):
1733 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1734 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1735 case PackCategoriesIntoKey(fcZero, fcZero):
1736 makeNaN();
1737 return opInvalidOp;
1738
1739 case PackCategoriesIntoKey(fcNormal, fcNormal):
1740 return opDivByZero; // fake status, indicating this is not a special case
1741 }
1742 }
1743
1744 /* Change sign. */
changeSign()1745 void IEEEFloat::changeSign() {
1746 /* Look mummy, this one's easy. */
1747 sign = !sign;
1748 }
1749
1750 /* Normalized addition or subtraction. */
addOrSubtract(const IEEEFloat & rhs,roundingMode rounding_mode,bool subtract)1751 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1752 roundingMode rounding_mode,
1753 bool subtract) {
1754 opStatus fs;
1755
1756 fs = addOrSubtractSpecials(rhs, subtract);
1757
1758 /* This return code means it was not a simple case. */
1759 if (fs == opDivByZero) {
1760 lostFraction lost_fraction;
1761
1762 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1763 fs = normalize(rounding_mode, lost_fraction);
1764
1765 /* Can only be zero if we lost no fraction. */
1766 assert(category != fcZero || lost_fraction == lfExactlyZero);
1767 }
1768
1769 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1770 positive zero unless rounding to minus infinity, except that
1771 adding two like-signed zeroes gives that zero. */
1772 if (category == fcZero) {
1773 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1774 sign = (rounding_mode == rmTowardNegative);
1775 }
1776
1777 return fs;
1778 }
1779
1780 /* Normalized addition. */
add(const IEEEFloat & rhs,roundingMode rounding_mode)1781 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1782 roundingMode rounding_mode) {
1783 return addOrSubtract(rhs, rounding_mode, false);
1784 }
1785
1786 /* Normalized subtraction. */
subtract(const IEEEFloat & rhs,roundingMode rounding_mode)1787 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1788 roundingMode rounding_mode) {
1789 return addOrSubtract(rhs, rounding_mode, true);
1790 }
1791
1792 /* Normalized multiply. */
multiply(const IEEEFloat & rhs,roundingMode rounding_mode)1793 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1794 roundingMode rounding_mode) {
1795 opStatus fs;
1796
1797 sign ^= rhs.sign;
1798 fs = multiplySpecials(rhs);
1799
1800 if (isFiniteNonZero()) {
1801 lostFraction lost_fraction = multiplySignificand(rhs);
1802 fs = normalize(rounding_mode, lost_fraction);
1803 if (lost_fraction != lfExactlyZero)
1804 fs = (opStatus) (fs | opInexact);
1805 }
1806
1807 return fs;
1808 }
1809
1810 /* Normalized divide. */
divide(const IEEEFloat & rhs,roundingMode rounding_mode)1811 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1812 roundingMode rounding_mode) {
1813 opStatus fs;
1814
1815 sign ^= rhs.sign;
1816 fs = divideSpecials(rhs);
1817
1818 if (isFiniteNonZero()) {
1819 lostFraction lost_fraction = divideSignificand(rhs);
1820 fs = normalize(rounding_mode, lost_fraction);
1821 if (lost_fraction != lfExactlyZero)
1822 fs = (opStatus) (fs | opInexact);
1823 }
1824
1825 return fs;
1826 }
1827
1828 /* Normalized remainder. */
remainder(const IEEEFloat & rhs)1829 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1830 opStatus fs;
1831 unsigned int origSign = sign;
1832
1833 // First handle the special cases.
1834 fs = remainderSpecials(rhs);
1835 if (fs != opDivByZero)
1836 return fs;
1837
1838 fs = opOK;
1839
1840 // Make sure the current value is less than twice the denom. If the addition
1841 // did not succeed (an overflow has happened), which means that the finite
1842 // value we currently posses must be less than twice the denom (as we are
1843 // using the same semantics).
1844 IEEEFloat P2 = rhs;
1845 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1846 fs = mod(P2);
1847 assert(fs == opOK);
1848 }
1849
1850 // Lets work with absolute numbers.
1851 IEEEFloat P = rhs;
1852 P.sign = false;
1853 sign = false;
1854
1855 //
1856 // To calculate the remainder we use the following scheme.
1857 //
1858 // The remainder is defained as follows:
1859 //
1860 // remainder = numer - rquot * denom = x - r * p
1861 //
1862 // Where r is the result of: x/p, rounded toward the nearest integral value
1863 // (with halfway cases rounded toward the even number).
1864 //
1865 // Currently, (after x mod 2p):
1866 // r is the number of 2p's present inside x, which is inherently, an even
1867 // number of p's.
1868 //
1869 // We may split the remaining calculation into 4 options:
1870 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1871 // - if x == 0.5p then we round to the nearest even number which is 0, and we
1872 // are done as well.
1873 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
1874 // to subtract 1p at least once.
1875 // - if x >= p then we must subtract p at least once, as x must be a
1876 // remainder.
1877 //
1878 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
1879 //
1880 // We can now split the remaining calculation to the following 3 options:
1881 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1882 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
1883 // must round up to the next even number. so we must subtract p once more.
1884 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
1885 // integral, and subtract p once more.
1886 //
1887
1888 // Extend the semantics to prevent an overflow/underflow or inexact result.
1889 bool losesInfo;
1890 fltSemantics extendedSemantics = *semantics;
1891 extendedSemantics.maxExponent++;
1892 extendedSemantics.minExponent--;
1893 extendedSemantics.precision += 2;
1894
1895 IEEEFloat VEx = *this;
1896 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1897 assert(fs == opOK && !losesInfo);
1898 IEEEFloat PEx = P;
1899 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1900 assert(fs == opOK && !losesInfo);
1901
1902 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
1903 // any fraction.
1904 fs = VEx.add(VEx, rmNearestTiesToEven);
1905 assert(fs == opOK);
1906
1907 if (VEx.compare(PEx) == cmpGreaterThan) {
1908 fs = subtract(P, rmNearestTiesToEven);
1909 assert(fs == opOK);
1910
1911 // Make VEx = this.add(this), but because we have different semantics, we do
1912 // not want to `convert` again, so we just subtract PEx twice (which equals
1913 // to the desired value).
1914 fs = VEx.subtract(PEx, rmNearestTiesToEven);
1915 assert(fs == opOK);
1916 fs = VEx.subtract(PEx, rmNearestTiesToEven);
1917 assert(fs == opOK);
1918
1919 cmpResult result = VEx.compare(PEx);
1920 if (result == cmpGreaterThan || result == cmpEqual) {
1921 fs = subtract(P, rmNearestTiesToEven);
1922 assert(fs == opOK);
1923 }
1924 }
1925
1926 if (isZero())
1927 sign = origSign; // IEEE754 requires this
1928 else
1929 sign ^= origSign;
1930 return fs;
1931 }
1932
1933 /* Normalized llvm frem (C fmod). */
mod(const IEEEFloat & rhs)1934 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1935 opStatus fs;
1936 fs = modSpecials(rhs);
1937 unsigned int origSign = sign;
1938
1939 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1940 compareAbsoluteValue(rhs) != cmpLessThan) {
1941 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1942 if (compareAbsoluteValue(V) == cmpLessThan)
1943 V = scalbn(V, -1, rmNearestTiesToEven);
1944 V.sign = sign;
1945
1946 fs = subtract(V, rmNearestTiesToEven);
1947 assert(fs==opOK);
1948 }
1949 if (isZero())
1950 sign = origSign; // fmod requires this
1951 return fs;
1952 }
1953
1954 /* Normalized fused-multiply-add. */
fusedMultiplyAdd(const IEEEFloat & multiplicand,const IEEEFloat & addend,roundingMode rounding_mode)1955 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1956 const IEEEFloat &addend,
1957 roundingMode rounding_mode) {
1958 opStatus fs;
1959
1960 /* Post-multiplication sign, before addition. */
1961 sign ^= multiplicand.sign;
1962
1963 /* If and only if all arguments are normal do we need to do an
1964 extended-precision calculation. */
1965 if (isFiniteNonZero() &&
1966 multiplicand.isFiniteNonZero() &&
1967 addend.isFinite()) {
1968 lostFraction lost_fraction;
1969
1970 lost_fraction = multiplySignificand(multiplicand, addend);
1971 fs = normalize(rounding_mode, lost_fraction);
1972 if (lost_fraction != lfExactlyZero)
1973 fs = (opStatus) (fs | opInexact);
1974
1975 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1976 positive zero unless rounding to minus infinity, except that
1977 adding two like-signed zeroes gives that zero. */
1978 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1979 sign = (rounding_mode == rmTowardNegative);
1980 } else {
1981 fs = multiplySpecials(multiplicand);
1982
1983 /* FS can only be opOK or opInvalidOp. There is no more work
1984 to do in the latter case. The IEEE-754R standard says it is
1985 implementation-defined in this case whether, if ADDEND is a
1986 quiet NaN, we raise invalid op; this implementation does so.
1987
1988 If we need to do the addition we can do so with normal
1989 precision. */
1990 if (fs == opOK)
1991 fs = addOrSubtract(addend, rounding_mode, false);
1992 }
1993
1994 return fs;
1995 }
1996
1997 /* Rounding-mode correct round to integral value. */
roundToIntegral(roundingMode rounding_mode)1998 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1999 opStatus fs;
2000
2001 if (isInfinity())
2002 // [IEEE Std 754-2008 6.1]:
2003 // The behavior of infinity in floating-point arithmetic is derived from the
2004 // limiting cases of real arithmetic with operands of arbitrarily
2005 // large magnitude, when such a limit exists.
2006 // ...
2007 // Operations on infinite operands are usually exact and therefore signal no
2008 // exceptions ...
2009 return opOK;
2010
2011 if (isNaN()) {
2012 if (isSignaling()) {
2013 // [IEEE Std 754-2008 6.2]:
2014 // Under default exception handling, any operation signaling an invalid
2015 // operation exception and for which a floating-point result is to be
2016 // delivered shall deliver a quiet NaN.
2017 makeQuiet();
2018 // [IEEE Std 754-2008 6.2]:
2019 // Signaling NaNs shall be reserved operands that, under default exception
2020 // handling, signal the invalid operation exception(see 7.2) for every
2021 // general-computational and signaling-computational operation except for
2022 // the conversions described in 5.12.
2023 return opInvalidOp;
2024 } else {
2025 // [IEEE Std 754-2008 6.2]:
2026 // For an operation with quiet NaN inputs, other than maximum and minimum
2027 // operations, if a floating-point result is to be delivered the result
2028 // shall be a quiet NaN which should be one of the input NaNs.
2029 // ...
2030 // Every general-computational and quiet-computational operation involving
2031 // one or more input NaNs, none of them signaling, shall signal no
2032 // exception, except fusedMultiplyAdd might signal the invalid operation
2033 // exception(see 7.2).
2034 return opOK;
2035 }
2036 }
2037
2038 if (isZero()) {
2039 // [IEEE Std 754-2008 6.3]:
2040 // ... the sign of the result of conversions, the quantize operation, the
2041 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2042 // the sign of the first or only operand.
2043 return opOK;
2044 }
2045
2046 // If the exponent is large enough, we know that this value is already
2047 // integral, and the arithmetic below would potentially cause it to saturate
2048 // to +/-Inf. Bail out early instead.
2049 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2050 return opOK;
2051
2052 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2053 // precision of our format, and then subtract it back off again. The choice
2054 // of rounding modes for the addition/subtraction determines the rounding mode
2055 // for our integral rounding as well.
2056 // NOTE: When the input value is negative, we do subtraction followed by
2057 // addition instead.
2058 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2059 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2060 IEEEFloat MagicConstant(*semantics);
2061 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2062 rmNearestTiesToEven);
2063 assert(fs == opOK);
2064 MagicConstant.sign = sign;
2065
2066 // Preserve the input sign so that we can handle the case of zero result
2067 // correctly.
2068 bool inputSign = isNegative();
2069
2070 fs = add(MagicConstant, rounding_mode);
2071
2072 // Current value and 'MagicConstant' are both integers, so the result of the
2073 // subtraction is always exact according to Sterbenz' lemma.
2074 subtract(MagicConstant, rounding_mode);
2075
2076 // Restore the input sign.
2077 if (inputSign != isNegative())
2078 changeSign();
2079
2080 return fs;
2081 }
2082
2083
2084 /* Comparison requires normalized numbers. */
compare(const IEEEFloat & rhs) const2085 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2086 cmpResult result;
2087
2088 assert(semantics == rhs.semantics);
2089
2090 switch (PackCategoriesIntoKey(category, rhs.category)) {
2091 default:
2092 llvm_unreachable(nullptr);
2093
2094 case PackCategoriesIntoKey(fcNaN, fcZero):
2095 case PackCategoriesIntoKey(fcNaN, fcNormal):
2096 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2097 case PackCategoriesIntoKey(fcNaN, fcNaN):
2098 case PackCategoriesIntoKey(fcZero, fcNaN):
2099 case PackCategoriesIntoKey(fcNormal, fcNaN):
2100 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2101 return cmpUnordered;
2102
2103 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2104 case PackCategoriesIntoKey(fcInfinity, fcZero):
2105 case PackCategoriesIntoKey(fcNormal, fcZero):
2106 if (sign)
2107 return cmpLessThan;
2108 else
2109 return cmpGreaterThan;
2110
2111 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2112 case PackCategoriesIntoKey(fcZero, fcInfinity):
2113 case PackCategoriesIntoKey(fcZero, fcNormal):
2114 if (rhs.sign)
2115 return cmpGreaterThan;
2116 else
2117 return cmpLessThan;
2118
2119 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2120 if (sign == rhs.sign)
2121 return cmpEqual;
2122 else if (sign)
2123 return cmpLessThan;
2124 else
2125 return cmpGreaterThan;
2126
2127 case PackCategoriesIntoKey(fcZero, fcZero):
2128 return cmpEqual;
2129
2130 case PackCategoriesIntoKey(fcNormal, fcNormal):
2131 break;
2132 }
2133
2134 /* Two normal numbers. Do they have the same sign? */
2135 if (sign != rhs.sign) {
2136 if (sign)
2137 result = cmpLessThan;
2138 else
2139 result = cmpGreaterThan;
2140 } else {
2141 /* Compare absolute values; invert result if negative. */
2142 result = compareAbsoluteValue(rhs);
2143
2144 if (sign) {
2145 if (result == cmpLessThan)
2146 result = cmpGreaterThan;
2147 else if (result == cmpGreaterThan)
2148 result = cmpLessThan;
2149 }
2150 }
2151
2152 return result;
2153 }
2154
2155 /// IEEEFloat::convert - convert a value of one floating point type to another.
2156 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2157 /// records whether the transformation lost information, i.e. whether
2158 /// converting the result back to the original type will produce the
2159 /// original value (this is almost the same as return value==fsOK, but there
2160 /// are edge cases where this is not so).
2161
convert(const fltSemantics & toSemantics,roundingMode rounding_mode,bool * losesInfo)2162 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2163 roundingMode rounding_mode,
2164 bool *losesInfo) {
2165 lostFraction lostFraction;
2166 unsigned int newPartCount, oldPartCount;
2167 opStatus fs;
2168 int shift;
2169 const fltSemantics &fromSemantics = *semantics;
2170
2171 lostFraction = lfExactlyZero;
2172 newPartCount = partCountForBits(toSemantics.precision + 1);
2173 oldPartCount = partCount();
2174 shift = toSemantics.precision - fromSemantics.precision;
2175
2176 bool X86SpecialNan = false;
2177 if (&fromSemantics == &semX87DoubleExtended &&
2178 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2179 (!(*significandParts() & 0x8000000000000000ULL) ||
2180 !(*significandParts() & 0x4000000000000000ULL))) {
2181 // x86 has some unusual NaNs which cannot be represented in any other
2182 // format; note them here.
2183 X86SpecialNan = true;
2184 }
2185
2186 // If this is a truncation of a denormal number, and the target semantics
2187 // has larger exponent range than the source semantics (this can happen
2188 // when truncating from PowerPC double-double to double format), the
2189 // right shift could lose result mantissa bits. Adjust exponent instead
2190 // of performing excessive shift.
2191 if (shift < 0 && isFiniteNonZero()) {
2192 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2193 if (exponent + exponentChange < toSemantics.minExponent)
2194 exponentChange = toSemantics.minExponent - exponent;
2195 if (exponentChange < shift)
2196 exponentChange = shift;
2197 if (exponentChange < 0) {
2198 shift -= exponentChange;
2199 exponent += exponentChange;
2200 }
2201 }
2202
2203 // If this is a truncation, perform the shift before we narrow the storage.
2204 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2205 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2206
2207 // Fix the storage so it can hold to new value.
2208 if (newPartCount > oldPartCount) {
2209 // The new type requires more storage; make it available.
2210 integerPart *newParts;
2211 newParts = new integerPart[newPartCount];
2212 APInt::tcSet(newParts, 0, newPartCount);
2213 if (isFiniteNonZero() || category==fcNaN)
2214 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2215 freeSignificand();
2216 significand.parts = newParts;
2217 } else if (newPartCount == 1 && oldPartCount != 1) {
2218 // Switch to built-in storage for a single part.
2219 integerPart newPart = 0;
2220 if (isFiniteNonZero() || category==fcNaN)
2221 newPart = significandParts()[0];
2222 freeSignificand();
2223 significand.part = newPart;
2224 }
2225
2226 // Now that we have the right storage, switch the semantics.
2227 semantics = &toSemantics;
2228
2229 // If this is an extension, perform the shift now that the storage is
2230 // available.
2231 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2232 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2233
2234 if (isFiniteNonZero()) {
2235 fs = normalize(rounding_mode, lostFraction);
2236 *losesInfo = (fs != opOK);
2237 } else if (category == fcNaN) {
2238 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2239
2240 // For x87 extended precision, we want to make a NaN, not a special NaN if
2241 // the input wasn't special either.
2242 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2243 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2244
2245 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2246 // does not give you back the same bits. This is dubious, and we
2247 // don't currently do it. You're really supposed to get
2248 // an invalid operation signal at runtime, but nobody does that.
2249 fs = opOK;
2250 } else {
2251 *losesInfo = false;
2252 fs = opOK;
2253 }
2254
2255 return fs;
2256 }
2257
2258 /* Convert a floating point number to an integer according to the
2259 rounding mode. If the rounded integer value is out of range this
2260 returns an invalid operation exception and the contents of the
2261 destination parts are unspecified. If the rounded value is in
2262 range but the floating point number is not the exact integer, the C
2263 standard doesn't require an inexact exception to be raised. IEEE
2264 854 does require it so we do that.
2265
2266 Note that for conversions to integer type the C standard requires
2267 round-to-zero to always be used. */
convertToSignExtendedInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2268 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2269 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2270 roundingMode rounding_mode, bool *isExact) const {
2271 lostFraction lost_fraction;
2272 const integerPart *src;
2273 unsigned int dstPartsCount, truncatedBits;
2274
2275 *isExact = false;
2276
2277 /* Handle the three special cases first. */
2278 if (category == fcInfinity || category == fcNaN)
2279 return opInvalidOp;
2280
2281 dstPartsCount = partCountForBits(width);
2282 assert(dstPartsCount <= parts.size() && "Integer too big");
2283
2284 if (category == fcZero) {
2285 APInt::tcSet(parts.data(), 0, dstPartsCount);
2286 // Negative zero can't be represented as an int.
2287 *isExact = !sign;
2288 return opOK;
2289 }
2290
2291 src = significandParts();
2292
2293 /* Step 1: place our absolute value, with any fraction truncated, in
2294 the destination. */
2295 if (exponent < 0) {
2296 /* Our absolute value is less than one; truncate everything. */
2297 APInt::tcSet(parts.data(), 0, dstPartsCount);
2298 /* For exponent -1 the integer bit represents .5, look at that.
2299 For smaller exponents leftmost truncated bit is 0. */
2300 truncatedBits = semantics->precision -1U - exponent;
2301 } else {
2302 /* We want the most significant (exponent + 1) bits; the rest are
2303 truncated. */
2304 unsigned int bits = exponent + 1U;
2305
2306 /* Hopelessly large in magnitude? */
2307 if (bits > width)
2308 return opInvalidOp;
2309
2310 if (bits < semantics->precision) {
2311 /* We truncate (semantics->precision - bits) bits. */
2312 truncatedBits = semantics->precision - bits;
2313 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2314 } else {
2315 /* We want at least as many bits as are available. */
2316 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2317 0);
2318 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2319 bits - semantics->precision);
2320 truncatedBits = 0;
2321 }
2322 }
2323
2324 /* Step 2: work out any lost fraction, and increment the absolute
2325 value if we would round away from zero. */
2326 if (truncatedBits) {
2327 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2328 truncatedBits);
2329 if (lost_fraction != lfExactlyZero &&
2330 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2331 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2332 return opInvalidOp; /* Overflow. */
2333 }
2334 } else {
2335 lost_fraction = lfExactlyZero;
2336 }
2337
2338 /* Step 3: check if we fit in the destination. */
2339 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2340
2341 if (sign) {
2342 if (!isSigned) {
2343 /* Negative numbers cannot be represented as unsigned. */
2344 if (omsb != 0)
2345 return opInvalidOp;
2346 } else {
2347 /* It takes omsb bits to represent the unsigned integer value.
2348 We lose a bit for the sign, but care is needed as the
2349 maximally negative integer is a special case. */
2350 if (omsb == width &&
2351 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2352 return opInvalidOp;
2353
2354 /* This case can happen because of rounding. */
2355 if (omsb > width)
2356 return opInvalidOp;
2357 }
2358
2359 APInt::tcNegate (parts.data(), dstPartsCount);
2360 } else {
2361 if (omsb >= width + !isSigned)
2362 return opInvalidOp;
2363 }
2364
2365 if (lost_fraction == lfExactlyZero) {
2366 *isExact = true;
2367 return opOK;
2368 } else
2369 return opInexact;
2370 }
2371
2372 /* Same as convertToSignExtendedInteger, except we provide
2373 deterministic values in case of an invalid operation exception,
2374 namely zero for NaNs and the minimal or maximal value respectively
2375 for underflow or overflow.
2376 The *isExact output tells whether the result is exact, in the sense
2377 that converting it back to the original floating point type produces
2378 the original value. This is almost equivalent to result==opOK,
2379 except for negative zeroes.
2380 */
2381 IEEEFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> parts,unsigned int width,bool isSigned,roundingMode rounding_mode,bool * isExact) const2382 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2383 unsigned int width, bool isSigned,
2384 roundingMode rounding_mode, bool *isExact) const {
2385 opStatus fs;
2386
2387 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2388 isExact);
2389
2390 if (fs == opInvalidOp) {
2391 unsigned int bits, dstPartsCount;
2392
2393 dstPartsCount = partCountForBits(width);
2394 assert(dstPartsCount <= parts.size() && "Integer too big");
2395
2396 if (category == fcNaN)
2397 bits = 0;
2398 else if (sign)
2399 bits = isSigned;
2400 else
2401 bits = width - isSigned;
2402
2403 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2404 if (sign && isSigned)
2405 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2406 }
2407
2408 return fs;
2409 }
2410
2411 /* Convert an unsigned integer SRC to a floating point number,
2412 rounding according to ROUNDING_MODE. The sign of the floating
2413 point number is not modified. */
convertFromUnsignedParts(const integerPart * src,unsigned int srcCount,roundingMode rounding_mode)2414 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2415 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2416 unsigned int omsb, precision, dstCount;
2417 integerPart *dst;
2418 lostFraction lost_fraction;
2419
2420 category = fcNormal;
2421 omsb = APInt::tcMSB(src, srcCount) + 1;
2422 dst = significandParts();
2423 dstCount = partCount();
2424 precision = semantics->precision;
2425
2426 /* We want the most significant PRECISION bits of SRC. There may not
2427 be that many; extract what we can. */
2428 if (precision <= omsb) {
2429 exponent = omsb - 1;
2430 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2431 omsb - precision);
2432 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2433 } else {
2434 exponent = precision - 1;
2435 lost_fraction = lfExactlyZero;
2436 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2437 }
2438
2439 return normalize(rounding_mode, lost_fraction);
2440 }
2441
convertFromAPInt(const APInt & Val,bool isSigned,roundingMode rounding_mode)2442 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2443 roundingMode rounding_mode) {
2444 unsigned int partCount = Val.getNumWords();
2445 APInt api = Val;
2446
2447 sign = false;
2448 if (isSigned && api.isNegative()) {
2449 sign = true;
2450 api = -api;
2451 }
2452
2453 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2454 }
2455
2456 /* Convert a two's complement integer SRC to a floating point number,
2457 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2458 integer is signed, in which case it must be sign-extended. */
2459 IEEEFloat::opStatus
convertFromSignExtendedInteger(const integerPart * src,unsigned int srcCount,bool isSigned,roundingMode rounding_mode)2460 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2461 unsigned int srcCount, bool isSigned,
2462 roundingMode rounding_mode) {
2463 opStatus status;
2464
2465 if (isSigned &&
2466 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2467 integerPart *copy;
2468
2469 /* If we're signed and negative negate a copy. */
2470 sign = true;
2471 copy = new integerPart[srcCount];
2472 APInt::tcAssign(copy, src, srcCount);
2473 APInt::tcNegate(copy, srcCount);
2474 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2475 delete [] copy;
2476 } else {
2477 sign = false;
2478 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2479 }
2480
2481 return status;
2482 }
2483
2484 /* FIXME: should this just take a const APInt reference? */
2485 IEEEFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * parts,unsigned int width,bool isSigned,roundingMode rounding_mode)2486 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2487 unsigned int width, bool isSigned,
2488 roundingMode rounding_mode) {
2489 unsigned int partCount = partCountForBits(width);
2490 APInt api = APInt(width, makeArrayRef(parts, partCount));
2491
2492 sign = false;
2493 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2494 sign = true;
2495 api = -api;
2496 }
2497
2498 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2499 }
2500
2501 Expected<IEEEFloat::opStatus>
convertFromHexadecimalString(StringRef s,roundingMode rounding_mode)2502 IEEEFloat::convertFromHexadecimalString(StringRef s,
2503 roundingMode rounding_mode) {
2504 lostFraction lost_fraction = lfExactlyZero;
2505
2506 category = fcNormal;
2507 zeroSignificand();
2508 exponent = 0;
2509
2510 integerPart *significand = significandParts();
2511 unsigned partsCount = partCount();
2512 unsigned bitPos = partsCount * integerPartWidth;
2513 bool computedTrailingFraction = false;
2514
2515 // Skip leading zeroes and any (hexa)decimal point.
2516 StringRef::iterator begin = s.begin();
2517 StringRef::iterator end = s.end();
2518 StringRef::iterator dot;
2519 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2520 if (!PtrOrErr)
2521 return PtrOrErr.takeError();
2522 StringRef::iterator p = *PtrOrErr;
2523 StringRef::iterator firstSignificantDigit = p;
2524
2525 while (p != end) {
2526 integerPart hex_value;
2527
2528 if (*p == '.') {
2529 if (dot != end)
2530 return createError("String contains multiple dots");
2531 dot = p++;
2532 continue;
2533 }
2534
2535 hex_value = hexDigitValue(*p);
2536 if (hex_value == -1U)
2537 break;
2538
2539 p++;
2540
2541 // Store the number while we have space.
2542 if (bitPos) {
2543 bitPos -= 4;
2544 hex_value <<= bitPos % integerPartWidth;
2545 significand[bitPos / integerPartWidth] |= hex_value;
2546 } else if (!computedTrailingFraction) {
2547 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2548 if (!FractOrErr)
2549 return FractOrErr.takeError();
2550 lost_fraction = *FractOrErr;
2551 computedTrailingFraction = true;
2552 }
2553 }
2554
2555 /* Hex floats require an exponent but not a hexadecimal point. */
2556 if (p == end)
2557 return createError("Hex strings require an exponent");
2558 if (*p != 'p' && *p != 'P')
2559 return createError("Invalid character in significand");
2560 if (p == begin)
2561 return createError("Significand has no digits");
2562 if (dot != end && p - begin == 1)
2563 return createError("Significand has no digits");
2564
2565 /* Ignore the exponent if we are zero. */
2566 if (p != firstSignificantDigit) {
2567 int expAdjustment;
2568
2569 /* Implicit hexadecimal point? */
2570 if (dot == end)
2571 dot = p;
2572
2573 /* Calculate the exponent adjustment implicit in the number of
2574 significant digits. */
2575 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2576 if (expAdjustment < 0)
2577 expAdjustment++;
2578 expAdjustment = expAdjustment * 4 - 1;
2579
2580 /* Adjust for writing the significand starting at the most
2581 significant nibble. */
2582 expAdjustment += semantics->precision;
2583 expAdjustment -= partsCount * integerPartWidth;
2584
2585 /* Adjust for the given exponent. */
2586 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2587 if (!ExpOrErr)
2588 return ExpOrErr.takeError();
2589 exponent = *ExpOrErr;
2590 }
2591
2592 return normalize(rounding_mode, lost_fraction);
2593 }
2594
2595 IEEEFloat::opStatus
roundSignificandWithExponent(const integerPart * decSigParts,unsigned sigPartCount,int exp,roundingMode rounding_mode)2596 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2597 unsigned sigPartCount, int exp,
2598 roundingMode rounding_mode) {
2599 unsigned int parts, pow5PartCount;
2600 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2601 integerPart pow5Parts[maxPowerOfFiveParts];
2602 bool isNearest;
2603
2604 isNearest = (rounding_mode == rmNearestTiesToEven ||
2605 rounding_mode == rmNearestTiesToAway);
2606
2607 parts = partCountForBits(semantics->precision + 11);
2608
2609 /* Calculate pow(5, abs(exp)). */
2610 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2611
2612 for (;; parts *= 2) {
2613 opStatus sigStatus, powStatus;
2614 unsigned int excessPrecision, truncatedBits;
2615
2616 calcSemantics.precision = parts * integerPartWidth - 1;
2617 excessPrecision = calcSemantics.precision - semantics->precision;
2618 truncatedBits = excessPrecision;
2619
2620 IEEEFloat decSig(calcSemantics, uninitialized);
2621 decSig.makeZero(sign);
2622 IEEEFloat pow5(calcSemantics);
2623
2624 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2625 rmNearestTiesToEven);
2626 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2627 rmNearestTiesToEven);
2628 /* Add exp, as 10^n = 5^n * 2^n. */
2629 decSig.exponent += exp;
2630
2631 lostFraction calcLostFraction;
2632 integerPart HUerr, HUdistance;
2633 unsigned int powHUerr;
2634
2635 if (exp >= 0) {
2636 /* multiplySignificand leaves the precision-th bit set to 1. */
2637 calcLostFraction = decSig.multiplySignificand(pow5);
2638 powHUerr = powStatus != opOK;
2639 } else {
2640 calcLostFraction = decSig.divideSignificand(pow5);
2641 /* Denormal numbers have less precision. */
2642 if (decSig.exponent < semantics->minExponent) {
2643 excessPrecision += (semantics->minExponent - decSig.exponent);
2644 truncatedBits = excessPrecision;
2645 if (excessPrecision > calcSemantics.precision)
2646 excessPrecision = calcSemantics.precision;
2647 }
2648 /* Extra half-ulp lost in reciprocal of exponent. */
2649 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2650 }
2651
2652 /* Both multiplySignificand and divideSignificand return the
2653 result with the integer bit set. */
2654 assert(APInt::tcExtractBit
2655 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2656
2657 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2658 powHUerr);
2659 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2660 excessPrecision, isNearest);
2661
2662 /* Are we guaranteed to round correctly if we truncate? */
2663 if (HUdistance >= HUerr) {
2664 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2665 calcSemantics.precision - excessPrecision,
2666 excessPrecision);
2667 /* Take the exponent of decSig. If we tcExtract-ed less bits
2668 above we must adjust our exponent to compensate for the
2669 implicit right shift. */
2670 exponent = (decSig.exponent + semantics->precision
2671 - (calcSemantics.precision - excessPrecision));
2672 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2673 decSig.partCount(),
2674 truncatedBits);
2675 return normalize(rounding_mode, calcLostFraction);
2676 }
2677 }
2678 }
2679
2680 Expected<IEEEFloat::opStatus>
convertFromDecimalString(StringRef str,roundingMode rounding_mode)2681 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2682 decimalInfo D;
2683 opStatus fs;
2684
2685 /* Scan the text. */
2686 StringRef::iterator p = str.begin();
2687 if (Error Err = interpretDecimal(p, str.end(), &D))
2688 return std::move(Err);
2689
2690 /* Handle the quick cases. First the case of no significant digits,
2691 i.e. zero, and then exponents that are obviously too large or too
2692 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2693 definitely overflows if
2694
2695 (exp - 1) * L >= maxExponent
2696
2697 and definitely underflows to zero where
2698
2699 (exp + 1) * L <= minExponent - precision
2700
2701 With integer arithmetic the tightest bounds for L are
2702
2703 93/28 < L < 196/59 [ numerator <= 256 ]
2704 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2705 */
2706
2707 // Test if we have a zero number allowing for strings with no null terminators
2708 // and zero decimals with non-zero exponents.
2709 //
2710 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2711 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2712 // be at most one dot. On the other hand, if we have a zero with a non-zero
2713 // exponent, then we know that D.firstSigDigit will be non-numeric.
2714 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2715 category = fcZero;
2716 fs = opOK;
2717
2718 /* Check whether the normalized exponent is high enough to overflow
2719 max during the log-rebasing in the max-exponent check below. */
2720 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2721 fs = handleOverflow(rounding_mode);
2722
2723 /* If it wasn't, then it also wasn't high enough to overflow max
2724 during the log-rebasing in the min-exponent check. Check that it
2725 won't overflow min in either check, then perform the min-exponent
2726 check. */
2727 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2728 (D.normalizedExponent + 1) * 28738 <=
2729 8651 * (semantics->minExponent - (int) semantics->precision)) {
2730 /* Underflow to zero and round. */
2731 category = fcNormal;
2732 zeroSignificand();
2733 fs = normalize(rounding_mode, lfLessThanHalf);
2734
2735 /* We can finally safely perform the max-exponent check. */
2736 } else if ((D.normalizedExponent - 1) * 42039
2737 >= 12655 * semantics->maxExponent) {
2738 /* Overflow and round. */
2739 fs = handleOverflow(rounding_mode);
2740 } else {
2741 integerPart *decSignificand;
2742 unsigned int partCount;
2743
2744 /* A tight upper bound on number of bits required to hold an
2745 N-digit decimal integer is N * 196 / 59. Allocate enough space
2746 to hold the full significand, and an extra part required by
2747 tcMultiplyPart. */
2748 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2749 partCount = partCountForBits(1 + 196 * partCount / 59);
2750 decSignificand = new integerPart[partCount + 1];
2751 partCount = 0;
2752
2753 /* Convert to binary efficiently - we do almost all multiplication
2754 in an integerPart. When this would overflow do we do a single
2755 bignum multiplication, and then revert again to multiplication
2756 in an integerPart. */
2757 do {
2758 integerPart decValue, val, multiplier;
2759
2760 val = 0;
2761 multiplier = 1;
2762
2763 do {
2764 if (*p == '.') {
2765 p++;
2766 if (p == str.end()) {
2767 break;
2768 }
2769 }
2770 decValue = decDigitValue(*p++);
2771 if (decValue >= 10U) {
2772 delete[] decSignificand;
2773 return createError("Invalid character in significand");
2774 }
2775 multiplier *= 10;
2776 val = val * 10 + decValue;
2777 /* The maximum number that can be multiplied by ten with any
2778 digit added without overflowing an integerPart. */
2779 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2780
2781 /* Multiply out the current part. */
2782 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2783 partCount, partCount + 1, false);
2784
2785 /* If we used another part (likely but not guaranteed), increase
2786 the count. */
2787 if (decSignificand[partCount])
2788 partCount++;
2789 } while (p <= D.lastSigDigit);
2790
2791 category = fcNormal;
2792 fs = roundSignificandWithExponent(decSignificand, partCount,
2793 D.exponent, rounding_mode);
2794
2795 delete [] decSignificand;
2796 }
2797
2798 return fs;
2799 }
2800
convertFromStringSpecials(StringRef str)2801 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2802 const size_t MIN_NAME_SIZE = 3;
2803
2804 if (str.size() < MIN_NAME_SIZE)
2805 return false;
2806
2807 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2808 makeInf(false);
2809 return true;
2810 }
2811
2812 bool IsNegative = str.front() == '-';
2813 if (IsNegative) {
2814 str = str.drop_front();
2815 if (str.size() < MIN_NAME_SIZE)
2816 return false;
2817
2818 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2819 makeInf(true);
2820 return true;
2821 }
2822 }
2823
2824 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2825 bool IsSignaling = str.front() == 's' || str.front() == 'S';
2826 if (IsSignaling) {
2827 str = str.drop_front();
2828 if (str.size() < MIN_NAME_SIZE)
2829 return false;
2830 }
2831
2832 if (str.startswith("nan") || str.startswith("NaN")) {
2833 str = str.drop_front(3);
2834
2835 // A NaN without payload.
2836 if (str.empty()) {
2837 makeNaN(IsSignaling, IsNegative);
2838 return true;
2839 }
2840
2841 // Allow the payload to be inside parentheses.
2842 if (str.front() == '(') {
2843 // Parentheses should be balanced (and not empty).
2844 if (str.size() <= 2 || str.back() != ')')
2845 return false;
2846
2847 str = str.slice(1, str.size() - 1);
2848 }
2849
2850 // Determine the payload number's radix.
2851 unsigned Radix = 10;
2852 if (str[0] == '0') {
2853 if (str.size() > 1 && tolower(str[1]) == 'x') {
2854 str = str.drop_front(2);
2855 Radix = 16;
2856 } else
2857 Radix = 8;
2858 }
2859
2860 // Parse the payload and make the NaN.
2861 APInt Payload;
2862 if (!str.getAsInteger(Radix, Payload)) {
2863 makeNaN(IsSignaling, IsNegative, &Payload);
2864 return true;
2865 }
2866 }
2867
2868 return false;
2869 }
2870
2871 Expected<IEEEFloat::opStatus>
convertFromString(StringRef str,roundingMode rounding_mode)2872 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2873 if (str.empty())
2874 return createError("Invalid string length");
2875
2876 // Handle special cases.
2877 if (convertFromStringSpecials(str))
2878 return opOK;
2879
2880 /* Handle a leading minus sign. */
2881 StringRef::iterator p = str.begin();
2882 size_t slen = str.size();
2883 sign = *p == '-' ? 1 : 0;
2884 if (*p == '-' || *p == '+') {
2885 p++;
2886 slen--;
2887 if (!slen)
2888 return createError("String has no digits");
2889 }
2890
2891 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2892 if (slen == 2)
2893 return createError("Invalid string");
2894 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2895 rounding_mode);
2896 }
2897
2898 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2899 }
2900
2901 /* Write out a hexadecimal representation of the floating point value
2902 to DST, which must be of sufficient size, in the C99 form
2903 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2904 excluding the terminating NUL.
2905
2906 If UPPERCASE, the output is in upper case, otherwise in lower case.
2907
2908 HEXDIGITS digits appear altogether, rounding the value if
2909 necessary. If HEXDIGITS is 0, the minimal precision to display the
2910 number precisely is used instead. If nothing would appear after
2911 the decimal point it is suppressed.
2912
2913 The decimal exponent is always printed and has at least one digit.
2914 Zero values display an exponent of zero. Infinities and NaNs
2915 appear as "infinity" or "nan" respectively.
2916
2917 The above rules are as specified by C99. There is ambiguity about
2918 what the leading hexadecimal digit should be. This implementation
2919 uses whatever is necessary so that the exponent is displayed as
2920 stored. This implies the exponent will fall within the IEEE format
2921 range, and the leading hexadecimal digit will be 0 (for denormals),
2922 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2923 any other digits zero).
2924 */
convertToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2925 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2926 bool upperCase,
2927 roundingMode rounding_mode) const {
2928 char *p;
2929
2930 p = dst;
2931 if (sign)
2932 *dst++ = '-';
2933
2934 switch (category) {
2935 case fcInfinity:
2936 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2937 dst += sizeof infinityL - 1;
2938 break;
2939
2940 case fcNaN:
2941 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2942 dst += sizeof NaNU - 1;
2943 break;
2944
2945 case fcZero:
2946 *dst++ = '0';
2947 *dst++ = upperCase ? 'X': 'x';
2948 *dst++ = '0';
2949 if (hexDigits > 1) {
2950 *dst++ = '.';
2951 memset (dst, '0', hexDigits - 1);
2952 dst += hexDigits - 1;
2953 }
2954 *dst++ = upperCase ? 'P': 'p';
2955 *dst++ = '0';
2956 break;
2957
2958 case fcNormal:
2959 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2960 break;
2961 }
2962
2963 *dst = 0;
2964
2965 return static_cast<unsigned int>(dst - p);
2966 }
2967
2968 /* Does the hard work of outputting the correctly rounded hexadecimal
2969 form of a normal floating point number with the specified number of
2970 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2971 digits necessary to print the value precisely is output. */
convertNormalToHexString(char * dst,unsigned int hexDigits,bool upperCase,roundingMode rounding_mode) const2972 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2973 bool upperCase,
2974 roundingMode rounding_mode) const {
2975 unsigned int count, valueBits, shift, partsCount, outputDigits;
2976 const char *hexDigitChars;
2977 const integerPart *significand;
2978 char *p;
2979 bool roundUp;
2980
2981 *dst++ = '0';
2982 *dst++ = upperCase ? 'X': 'x';
2983
2984 roundUp = false;
2985 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2986
2987 significand = significandParts();
2988 partsCount = partCount();
2989
2990 /* +3 because the first digit only uses the single integer bit, so
2991 we have 3 virtual zero most-significant-bits. */
2992 valueBits = semantics->precision + 3;
2993 shift = integerPartWidth - valueBits % integerPartWidth;
2994
2995 /* The natural number of digits required ignoring trailing
2996 insignificant zeroes. */
2997 outputDigits = (valueBits - significandLSB () + 3) / 4;
2998
2999 /* hexDigits of zero means use the required number for the
3000 precision. Otherwise, see if we are truncating. If we are,
3001 find out if we need to round away from zero. */
3002 if (hexDigits) {
3003 if (hexDigits < outputDigits) {
3004 /* We are dropping non-zero bits, so need to check how to round.
3005 "bits" is the number of dropped bits. */
3006 unsigned int bits;
3007 lostFraction fraction;
3008
3009 bits = valueBits - hexDigits * 4;
3010 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3011 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3012 }
3013 outputDigits = hexDigits;
3014 }
3015
3016 /* Write the digits consecutively, and start writing in the location
3017 of the hexadecimal point. We move the most significant digit
3018 left and add the hexadecimal point later. */
3019 p = ++dst;
3020
3021 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3022
3023 while (outputDigits && count) {
3024 integerPart part;
3025
3026 /* Put the most significant integerPartWidth bits in "part". */
3027 if (--count == partsCount)
3028 part = 0; /* An imaginary higher zero part. */
3029 else
3030 part = significand[count] << shift;
3031
3032 if (count && shift)
3033 part |= significand[count - 1] >> (integerPartWidth - shift);
3034
3035 /* Convert as much of "part" to hexdigits as we can. */
3036 unsigned int curDigits = integerPartWidth / 4;
3037
3038 if (curDigits > outputDigits)
3039 curDigits = outputDigits;
3040 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3041 outputDigits -= curDigits;
3042 }
3043
3044 if (roundUp) {
3045 char *q = dst;
3046
3047 /* Note that hexDigitChars has a trailing '0'. */
3048 do {
3049 q--;
3050 *q = hexDigitChars[hexDigitValue (*q) + 1];
3051 } while (*q == '0');
3052 assert(q >= p);
3053 } else {
3054 /* Add trailing zeroes. */
3055 memset (dst, '0', outputDigits);
3056 dst += outputDigits;
3057 }
3058
3059 /* Move the most significant digit to before the point, and if there
3060 is something after the decimal point add it. This must come
3061 after rounding above. */
3062 p[-1] = p[0];
3063 if (dst -1 == p)
3064 dst--;
3065 else
3066 p[0] = '.';
3067
3068 /* Finally output the exponent. */
3069 *dst++ = upperCase ? 'P': 'p';
3070
3071 return writeSignedDecimal (dst, exponent);
3072 }
3073
hash_value(const IEEEFloat & Arg)3074 hash_code hash_value(const IEEEFloat &Arg) {
3075 if (!Arg.isFiniteNonZero())
3076 return hash_combine((uint8_t)Arg.category,
3077 // NaN has no sign, fix it at zero.
3078 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3079 Arg.semantics->precision);
3080
3081 // Normal floats need their exponent and significand hashed.
3082 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3083 Arg.semantics->precision, Arg.exponent,
3084 hash_combine_range(
3085 Arg.significandParts(),
3086 Arg.significandParts() + Arg.partCount()));
3087 }
3088
3089 // Conversion from APFloat to/from host float/double. It may eventually be
3090 // possible to eliminate these and have everybody deal with APFloats, but that
3091 // will take a while. This approach will not easily extend to long double.
3092 // Current implementation requires integerPartWidth==64, which is correct at
3093 // the moment but could be made more general.
3094
3095 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3096 // the actual IEEE respresentations. We compensate for that here.
3097
convertF80LongDoubleAPFloatToAPInt() const3098 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3099 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3100 assert(partCount()==2);
3101
3102 uint64_t myexponent, mysignificand;
3103
3104 if (isFiniteNonZero()) {
3105 myexponent = exponent+16383; //bias
3106 mysignificand = significandParts()[0];
3107 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3108 myexponent = 0; // denormal
3109 } else if (category==fcZero) {
3110 myexponent = 0;
3111 mysignificand = 0;
3112 } else if (category==fcInfinity) {
3113 myexponent = 0x7fff;
3114 mysignificand = 0x8000000000000000ULL;
3115 } else {
3116 assert(category == fcNaN && "Unknown category");
3117 myexponent = 0x7fff;
3118 mysignificand = significandParts()[0];
3119 }
3120
3121 uint64_t words[2];
3122 words[0] = mysignificand;
3123 words[1] = ((uint64_t)(sign & 1) << 15) |
3124 (myexponent & 0x7fffLL);
3125 return APInt(80, words);
3126 }
3127
convertPPCDoubleDoubleAPFloatToAPInt() const3128 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3129 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3130 assert(partCount()==2);
3131
3132 uint64_t words[2];
3133 opStatus fs;
3134 bool losesInfo;
3135
3136 // Convert number to double. To avoid spurious underflows, we re-
3137 // normalize against the "double" minExponent first, and only *then*
3138 // truncate the mantissa. The result of that second conversion
3139 // may be inexact, but should never underflow.
3140 // Declare fltSemantics before APFloat that uses it (and
3141 // saves pointer to it) to ensure correct destruction order.
3142 fltSemantics extendedSemantics = *semantics;
3143 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3144 IEEEFloat extended(*this);
3145 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3146 assert(fs == opOK && !losesInfo);
3147 (void)fs;
3148
3149 IEEEFloat u(extended);
3150 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3151 assert(fs == opOK || fs == opInexact);
3152 (void)fs;
3153 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3154
3155 // If conversion was exact or resulted in a special case, we're done;
3156 // just set the second double to zero. Otherwise, re-convert back to
3157 // the extended format and compute the difference. This now should
3158 // convert exactly to double.
3159 if (u.isFiniteNonZero() && losesInfo) {
3160 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3161 assert(fs == opOK && !losesInfo);
3162 (void)fs;
3163
3164 IEEEFloat v(extended);
3165 v.subtract(u, rmNearestTiesToEven);
3166 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3167 assert(fs == opOK && !losesInfo);
3168 (void)fs;
3169 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3170 } else {
3171 words[1] = 0;
3172 }
3173
3174 return APInt(128, words);
3175 }
3176
convertQuadrupleAPFloatToAPInt() const3177 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3178 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3179 assert(partCount()==2);
3180
3181 uint64_t myexponent, mysignificand, mysignificand2;
3182
3183 if (isFiniteNonZero()) {
3184 myexponent = exponent+16383; //bias
3185 mysignificand = significandParts()[0];
3186 mysignificand2 = significandParts()[1];
3187 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3188 myexponent = 0; // denormal
3189 } else if (category==fcZero) {
3190 myexponent = 0;
3191 mysignificand = mysignificand2 = 0;
3192 } else if (category==fcInfinity) {
3193 myexponent = 0x7fff;
3194 mysignificand = mysignificand2 = 0;
3195 } else {
3196 assert(category == fcNaN && "Unknown category!");
3197 myexponent = 0x7fff;
3198 mysignificand = significandParts()[0];
3199 mysignificand2 = significandParts()[1];
3200 }
3201
3202 uint64_t words[2];
3203 words[0] = mysignificand;
3204 words[1] = ((uint64_t)(sign & 1) << 63) |
3205 ((myexponent & 0x7fff) << 48) |
3206 (mysignificand2 & 0xffffffffffffLL);
3207
3208 return APInt(128, words);
3209 }
3210
convertDoubleAPFloatToAPInt() const3211 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3212 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3213 assert(partCount()==1);
3214
3215 uint64_t myexponent, mysignificand;
3216
3217 if (isFiniteNonZero()) {
3218 myexponent = exponent+1023; //bias
3219 mysignificand = *significandParts();
3220 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3221 myexponent = 0; // denormal
3222 } else if (category==fcZero) {
3223 myexponent = 0;
3224 mysignificand = 0;
3225 } else if (category==fcInfinity) {
3226 myexponent = 0x7ff;
3227 mysignificand = 0;
3228 } else {
3229 assert(category == fcNaN && "Unknown category!");
3230 myexponent = 0x7ff;
3231 mysignificand = *significandParts();
3232 }
3233
3234 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3235 ((myexponent & 0x7ff) << 52) |
3236 (mysignificand & 0xfffffffffffffLL))));
3237 }
3238
convertFloatAPFloatToAPInt() const3239 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3240 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3241 assert(partCount()==1);
3242
3243 uint32_t myexponent, mysignificand;
3244
3245 if (isFiniteNonZero()) {
3246 myexponent = exponent+127; //bias
3247 mysignificand = (uint32_t)*significandParts();
3248 if (myexponent == 1 && !(mysignificand & 0x800000))
3249 myexponent = 0; // denormal
3250 } else if (category==fcZero) {
3251 myexponent = 0;
3252 mysignificand = 0;
3253 } else if (category==fcInfinity) {
3254 myexponent = 0xff;
3255 mysignificand = 0;
3256 } else {
3257 assert(category == fcNaN && "Unknown category!");
3258 myexponent = 0xff;
3259 mysignificand = (uint32_t)*significandParts();
3260 }
3261
3262 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3263 (mysignificand & 0x7fffff)));
3264 }
3265
convertBFloatAPFloatToAPInt() const3266 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3267 assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3268 assert(partCount() == 1);
3269
3270 uint32_t myexponent, mysignificand;
3271
3272 if (isFiniteNonZero()) {
3273 myexponent = exponent + 127; // bias
3274 mysignificand = (uint32_t)*significandParts();
3275 if (myexponent == 1 && !(mysignificand & 0x80))
3276 myexponent = 0; // denormal
3277 } else if (category == fcZero) {
3278 myexponent = 0;
3279 mysignificand = 0;
3280 } else if (category == fcInfinity) {
3281 myexponent = 0xff;
3282 mysignificand = 0;
3283 } else {
3284 assert(category == fcNaN && "Unknown category!");
3285 myexponent = 0xff;
3286 mysignificand = (uint32_t)*significandParts();
3287 }
3288
3289 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3290 (mysignificand & 0x7f)));
3291 }
3292
convertHalfAPFloatToAPInt() const3293 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3294 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3295 assert(partCount()==1);
3296
3297 uint32_t myexponent, mysignificand;
3298
3299 if (isFiniteNonZero()) {
3300 myexponent = exponent+15; //bias
3301 mysignificand = (uint32_t)*significandParts();
3302 if (myexponent == 1 && !(mysignificand & 0x400))
3303 myexponent = 0; // denormal
3304 } else if (category==fcZero) {
3305 myexponent = 0;
3306 mysignificand = 0;
3307 } else if (category==fcInfinity) {
3308 myexponent = 0x1f;
3309 mysignificand = 0;
3310 } else {
3311 assert(category == fcNaN && "Unknown category!");
3312 myexponent = 0x1f;
3313 mysignificand = (uint32_t)*significandParts();
3314 }
3315
3316 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3317 (mysignificand & 0x3ff)));
3318 }
3319
3320 // This function creates an APInt that is just a bit map of the floating
3321 // point constant as it would appear in memory. It is not a conversion,
3322 // and treating the result as a normal integer is unlikely to be useful.
3323
bitcastToAPInt() const3324 APInt IEEEFloat::bitcastToAPInt() const {
3325 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3326 return convertHalfAPFloatToAPInt();
3327
3328 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3329 return convertBFloatAPFloatToAPInt();
3330
3331 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3332 return convertFloatAPFloatToAPInt();
3333
3334 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3335 return convertDoubleAPFloatToAPInt();
3336
3337 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3338 return convertQuadrupleAPFloatToAPInt();
3339
3340 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3341 return convertPPCDoubleDoubleAPFloatToAPInt();
3342
3343 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3344 "unknown format!");
3345 return convertF80LongDoubleAPFloatToAPInt();
3346 }
3347
convertToFloat() const3348 float IEEEFloat::convertToFloat() const {
3349 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3350 "Float semantics are not IEEEsingle");
3351 APInt api = bitcastToAPInt();
3352 return api.bitsToFloat();
3353 }
3354
convertToDouble() const3355 double IEEEFloat::convertToDouble() const {
3356 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3357 "Float semantics are not IEEEdouble");
3358 APInt api = bitcastToAPInt();
3359 return api.bitsToDouble();
3360 }
3361
3362 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3363 /// does not support these bit patterns:
3364 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3365 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3366 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3367 /// exponent = 0, integer bit 1 ("pseudodenormal")
3368 /// At the moment, the first three are treated as NaNs, the last one as Normal.
initFromF80LongDoubleAPInt(const APInt & api)3369 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3370 assert(api.getBitWidth()==80);
3371 uint64_t i1 = api.getRawData()[0];
3372 uint64_t i2 = api.getRawData()[1];
3373 uint64_t myexponent = (i2 & 0x7fff);
3374 uint64_t mysignificand = i1;
3375 uint8_t myintegerbit = mysignificand >> 63;
3376
3377 initialize(&semX87DoubleExtended);
3378 assert(partCount()==2);
3379
3380 sign = static_cast<unsigned int>(i2>>15);
3381 if (myexponent == 0 && mysignificand == 0) {
3382 // exponent, significand meaningless
3383 category = fcZero;
3384 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3385 // exponent, significand meaningless
3386 category = fcInfinity;
3387 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3388 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3389 // exponent meaningless
3390 category = fcNaN;
3391 significandParts()[0] = mysignificand;
3392 significandParts()[1] = 0;
3393 } else {
3394 category = fcNormal;
3395 exponent = myexponent - 16383;
3396 significandParts()[0] = mysignificand;
3397 significandParts()[1] = 0;
3398 if (myexponent==0) // denormal
3399 exponent = -16382;
3400 }
3401 }
3402
initFromPPCDoubleDoubleAPInt(const APInt & api)3403 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3404 assert(api.getBitWidth()==128);
3405 uint64_t i1 = api.getRawData()[0];
3406 uint64_t i2 = api.getRawData()[1];
3407 opStatus fs;
3408 bool losesInfo;
3409
3410 // Get the first double and convert to our format.
3411 initFromDoubleAPInt(APInt(64, i1));
3412 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3413 assert(fs == opOK && !losesInfo);
3414 (void)fs;
3415
3416 // Unless we have a special case, add in second double.
3417 if (isFiniteNonZero()) {
3418 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3419 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3420 assert(fs == opOK && !losesInfo);
3421 (void)fs;
3422
3423 add(v, rmNearestTiesToEven);
3424 }
3425 }
3426
initFromQuadrupleAPInt(const APInt & api)3427 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3428 assert(api.getBitWidth()==128);
3429 uint64_t i1 = api.getRawData()[0];
3430 uint64_t i2 = api.getRawData()[1];
3431 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3432 uint64_t mysignificand = i1;
3433 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3434
3435 initialize(&semIEEEquad);
3436 assert(partCount()==2);
3437
3438 sign = static_cast<unsigned int>(i2>>63);
3439 if (myexponent==0 &&
3440 (mysignificand==0 && mysignificand2==0)) {
3441 // exponent, significand meaningless
3442 category = fcZero;
3443 } else if (myexponent==0x7fff &&
3444 (mysignificand==0 && mysignificand2==0)) {
3445 // exponent, significand meaningless
3446 category = fcInfinity;
3447 } else if (myexponent==0x7fff &&
3448 (mysignificand!=0 || mysignificand2 !=0)) {
3449 // exponent meaningless
3450 category = fcNaN;
3451 significandParts()[0] = mysignificand;
3452 significandParts()[1] = mysignificand2;
3453 } else {
3454 category = fcNormal;
3455 exponent = myexponent - 16383;
3456 significandParts()[0] = mysignificand;
3457 significandParts()[1] = mysignificand2;
3458 if (myexponent==0) // denormal
3459 exponent = -16382;
3460 else
3461 significandParts()[1] |= 0x1000000000000LL; // integer bit
3462 }
3463 }
3464
initFromDoubleAPInt(const APInt & api)3465 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3466 assert(api.getBitWidth()==64);
3467 uint64_t i = *api.getRawData();
3468 uint64_t myexponent = (i >> 52) & 0x7ff;
3469 uint64_t mysignificand = i & 0xfffffffffffffLL;
3470
3471 initialize(&semIEEEdouble);
3472 assert(partCount()==1);
3473
3474 sign = static_cast<unsigned int>(i>>63);
3475 if (myexponent==0 && mysignificand==0) {
3476 // exponent, significand meaningless
3477 category = fcZero;
3478 } else if (myexponent==0x7ff && mysignificand==0) {
3479 // exponent, significand meaningless
3480 category = fcInfinity;
3481 } else if (myexponent==0x7ff && mysignificand!=0) {
3482 // exponent meaningless
3483 category = fcNaN;
3484 *significandParts() = mysignificand;
3485 } else {
3486 category = fcNormal;
3487 exponent = myexponent - 1023;
3488 *significandParts() = mysignificand;
3489 if (myexponent==0) // denormal
3490 exponent = -1022;
3491 else
3492 *significandParts() |= 0x10000000000000LL; // integer bit
3493 }
3494 }
3495
initFromFloatAPInt(const APInt & api)3496 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3497 assert(api.getBitWidth()==32);
3498 uint32_t i = (uint32_t)*api.getRawData();
3499 uint32_t myexponent = (i >> 23) & 0xff;
3500 uint32_t mysignificand = i & 0x7fffff;
3501
3502 initialize(&semIEEEsingle);
3503 assert(partCount()==1);
3504
3505 sign = i >> 31;
3506 if (myexponent==0 && mysignificand==0) {
3507 // exponent, significand meaningless
3508 category = fcZero;
3509 } else if (myexponent==0xff && mysignificand==0) {
3510 // exponent, significand meaningless
3511 category = fcInfinity;
3512 } else if (myexponent==0xff && mysignificand!=0) {
3513 // sign, exponent, significand meaningless
3514 category = fcNaN;
3515 *significandParts() = mysignificand;
3516 } else {
3517 category = fcNormal;
3518 exponent = myexponent - 127; //bias
3519 *significandParts() = mysignificand;
3520 if (myexponent==0) // denormal
3521 exponent = -126;
3522 else
3523 *significandParts() |= 0x800000; // integer bit
3524 }
3525 }
3526
initFromBFloatAPInt(const APInt & api)3527 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3528 assert(api.getBitWidth() == 16);
3529 uint32_t i = (uint32_t)*api.getRawData();
3530 uint32_t myexponent = (i >> 7) & 0xff;
3531 uint32_t mysignificand = i & 0x7f;
3532
3533 initialize(&semBFloat);
3534 assert(partCount() == 1);
3535
3536 sign = i >> 15;
3537 if (myexponent == 0 && mysignificand == 0) {
3538 // exponent, significand meaningless
3539 category = fcZero;
3540 } else if (myexponent == 0xff && mysignificand == 0) {
3541 // exponent, significand meaningless
3542 category = fcInfinity;
3543 } else if (myexponent == 0xff && mysignificand != 0) {
3544 // sign, exponent, significand meaningless
3545 category = fcNaN;
3546 *significandParts() = mysignificand;
3547 } else {
3548 category = fcNormal;
3549 exponent = myexponent - 127; // bias
3550 *significandParts() = mysignificand;
3551 if (myexponent == 0) // denormal
3552 exponent = -126;
3553 else
3554 *significandParts() |= 0x80; // integer bit
3555 }
3556 }
3557
initFromHalfAPInt(const APInt & api)3558 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3559 assert(api.getBitWidth()==16);
3560 uint32_t i = (uint32_t)*api.getRawData();
3561 uint32_t myexponent = (i >> 10) & 0x1f;
3562 uint32_t mysignificand = i & 0x3ff;
3563
3564 initialize(&semIEEEhalf);
3565 assert(partCount()==1);
3566
3567 sign = i >> 15;
3568 if (myexponent==0 && mysignificand==0) {
3569 // exponent, significand meaningless
3570 category = fcZero;
3571 } else if (myexponent==0x1f && mysignificand==0) {
3572 // exponent, significand meaningless
3573 category = fcInfinity;
3574 } else if (myexponent==0x1f && mysignificand!=0) {
3575 // sign, exponent, significand meaningless
3576 category = fcNaN;
3577 *significandParts() = mysignificand;
3578 } else {
3579 category = fcNormal;
3580 exponent = myexponent - 15; //bias
3581 *significandParts() = mysignificand;
3582 if (myexponent==0) // denormal
3583 exponent = -14;
3584 else
3585 *significandParts() |= 0x400; // integer bit
3586 }
3587 }
3588
3589 /// Treat api as containing the bits of a floating point number. Currently
3590 /// we infer the floating point type from the size of the APInt. The
3591 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3592 /// when the size is anything else).
initFromAPInt(const fltSemantics * Sem,const APInt & api)3593 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3594 if (Sem == &semIEEEhalf)
3595 return initFromHalfAPInt(api);
3596 if (Sem == &semBFloat)
3597 return initFromBFloatAPInt(api);
3598 if (Sem == &semIEEEsingle)
3599 return initFromFloatAPInt(api);
3600 if (Sem == &semIEEEdouble)
3601 return initFromDoubleAPInt(api);
3602 if (Sem == &semX87DoubleExtended)
3603 return initFromF80LongDoubleAPInt(api);
3604 if (Sem == &semIEEEquad)
3605 return initFromQuadrupleAPInt(api);
3606 if (Sem == &semPPCDoubleDoubleLegacy)
3607 return initFromPPCDoubleDoubleAPInt(api);
3608
3609 llvm_unreachable(nullptr);
3610 }
3611
3612 /// Make this number the largest magnitude normal number in the given
3613 /// semantics.
makeLargest(bool Negative)3614 void IEEEFloat::makeLargest(bool Negative) {
3615 // We want (in interchange format):
3616 // sign = {Negative}
3617 // exponent = 1..10
3618 // significand = 1..1
3619 category = fcNormal;
3620 sign = Negative;
3621 exponent = semantics->maxExponent;
3622
3623 // Use memset to set all but the highest integerPart to all ones.
3624 integerPart *significand = significandParts();
3625 unsigned PartCount = partCount();
3626 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3627
3628 // Set the high integerPart especially setting all unused top bits for
3629 // internal consistency.
3630 const unsigned NumUnusedHighBits =
3631 PartCount*integerPartWidth - semantics->precision;
3632 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3633 ? (~integerPart(0) >> NumUnusedHighBits)
3634 : 0;
3635 }
3636
3637 /// Make this number the smallest magnitude denormal number in the given
3638 /// semantics.
makeSmallest(bool Negative)3639 void IEEEFloat::makeSmallest(bool Negative) {
3640 // We want (in interchange format):
3641 // sign = {Negative}
3642 // exponent = 0..0
3643 // significand = 0..01
3644 category = fcNormal;
3645 sign = Negative;
3646 exponent = semantics->minExponent;
3647 APInt::tcSet(significandParts(), 1, partCount());
3648 }
3649
makeSmallestNormalized(bool Negative)3650 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3651 // We want (in interchange format):
3652 // sign = {Negative}
3653 // exponent = 0..0
3654 // significand = 10..0
3655
3656 category = fcNormal;
3657 zeroSignificand();
3658 sign = Negative;
3659 exponent = semantics->minExponent;
3660 significandParts()[partCountForBits(semantics->precision) - 1] |=
3661 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3662 }
3663
IEEEFloat(const fltSemantics & Sem,const APInt & API)3664 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3665 initFromAPInt(&Sem, API);
3666 }
3667
IEEEFloat(float f)3668 IEEEFloat::IEEEFloat(float f) {
3669 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3670 }
3671
IEEEFloat(double d)3672 IEEEFloat::IEEEFloat(double d) {
3673 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3674 }
3675
3676 namespace {
append(SmallVectorImpl<char> & Buffer,StringRef Str)3677 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3678 Buffer.append(Str.begin(), Str.end());
3679 }
3680
3681 /// Removes data from the given significand until it is no more
3682 /// precise than is required for the desired precision.
AdjustToPrecision(APInt & significand,int & exp,unsigned FormatPrecision)3683 void AdjustToPrecision(APInt &significand,
3684 int &exp, unsigned FormatPrecision) {
3685 unsigned bits = significand.getActiveBits();
3686
3687 // 196/59 is a very slight overestimate of lg_2(10).
3688 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3689
3690 if (bits <= bitsRequired) return;
3691
3692 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3693 if (!tensRemovable) return;
3694
3695 exp += tensRemovable;
3696
3697 APInt divisor(significand.getBitWidth(), 1);
3698 APInt powten(significand.getBitWidth(), 10);
3699 while (true) {
3700 if (tensRemovable & 1)
3701 divisor *= powten;
3702 tensRemovable >>= 1;
3703 if (!tensRemovable) break;
3704 powten *= powten;
3705 }
3706
3707 significand = significand.udiv(divisor);
3708
3709 // Truncate the significand down to its active bit count.
3710 significand = significand.trunc(significand.getActiveBits());
3711 }
3712
3713
AdjustToPrecision(SmallVectorImpl<char> & buffer,int & exp,unsigned FormatPrecision)3714 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3715 int &exp, unsigned FormatPrecision) {
3716 unsigned N = buffer.size();
3717 if (N <= FormatPrecision) return;
3718
3719 // The most significant figures are the last ones in the buffer.
3720 unsigned FirstSignificant = N - FormatPrecision;
3721
3722 // Round.
3723 // FIXME: this probably shouldn't use 'round half up'.
3724
3725 // Rounding down is just a truncation, except we also want to drop
3726 // trailing zeros from the new result.
3727 if (buffer[FirstSignificant - 1] < '5') {
3728 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3729 FirstSignificant++;
3730
3731 exp += FirstSignificant;
3732 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3733 return;
3734 }
3735
3736 // Rounding up requires a decimal add-with-carry. If we continue
3737 // the carry, the newly-introduced zeros will just be truncated.
3738 for (unsigned I = FirstSignificant; I != N; ++I) {
3739 if (buffer[I] == '9') {
3740 FirstSignificant++;
3741 } else {
3742 buffer[I]++;
3743 break;
3744 }
3745 }
3746
3747 // If we carried through, we have exactly one digit of precision.
3748 if (FirstSignificant == N) {
3749 exp += FirstSignificant;
3750 buffer.clear();
3751 buffer.push_back('1');
3752 return;
3753 }
3754
3755 exp += FirstSignificant;
3756 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3757 }
3758 }
3759
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const3760 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3761 unsigned FormatMaxPadding, bool TruncateZero) const {
3762 switch (category) {
3763 case fcInfinity:
3764 if (isNegative())
3765 return append(Str, "-Inf");
3766 else
3767 return append(Str, "+Inf");
3768
3769 case fcNaN: return append(Str, "NaN");
3770
3771 case fcZero:
3772 if (isNegative())
3773 Str.push_back('-');
3774
3775 if (!FormatMaxPadding) {
3776 if (TruncateZero)
3777 append(Str, "0.0E+0");
3778 else {
3779 append(Str, "0.0");
3780 if (FormatPrecision > 1)
3781 Str.append(FormatPrecision - 1, '0');
3782 append(Str, "e+00");
3783 }
3784 } else
3785 Str.push_back('0');
3786 return;
3787
3788 case fcNormal:
3789 break;
3790 }
3791
3792 if (isNegative())
3793 Str.push_back('-');
3794
3795 // Decompose the number into an APInt and an exponent.
3796 int exp = exponent - ((int) semantics->precision - 1);
3797 APInt significand(semantics->precision,
3798 makeArrayRef(significandParts(),
3799 partCountForBits(semantics->precision)));
3800
3801 // Set FormatPrecision if zero. We want to do this before we
3802 // truncate trailing zeros, as those are part of the precision.
3803 if (!FormatPrecision) {
3804 // We use enough digits so the number can be round-tripped back to an
3805 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3806 // Accurately" by Steele and White.
3807 // FIXME: Using a formula based purely on the precision is conservative;
3808 // we can print fewer digits depending on the actual value being printed.
3809
3810 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3811 FormatPrecision = 2 + semantics->precision * 59 / 196;
3812 }
3813
3814 // Ignore trailing binary zeros.
3815 int trailingZeros = significand.countTrailingZeros();
3816 exp += trailingZeros;
3817 significand.lshrInPlace(trailingZeros);
3818
3819 // Change the exponent from 2^e to 10^e.
3820 if (exp == 0) {
3821 // Nothing to do.
3822 } else if (exp > 0) {
3823 // Just shift left.
3824 significand = significand.zext(semantics->precision + exp);
3825 significand <<= exp;
3826 exp = 0;
3827 } else { /* exp < 0 */
3828 int texp = -exp;
3829
3830 // We transform this using the identity:
3831 // (N)(2^-e) == (N)(5^e)(10^-e)
3832 // This means we have to multiply N (the significand) by 5^e.
3833 // To avoid overflow, we have to operate on numbers large
3834 // enough to store N * 5^e:
3835 // log2(N * 5^e) == log2(N) + e * log2(5)
3836 // <= semantics->precision + e * 137 / 59
3837 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3838
3839 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3840
3841 // Multiply significand by 5^e.
3842 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3843 significand = significand.zext(precision);
3844 APInt five_to_the_i(precision, 5);
3845 while (true) {
3846 if (texp & 1) significand *= five_to_the_i;
3847
3848 texp >>= 1;
3849 if (!texp) break;
3850 five_to_the_i *= five_to_the_i;
3851 }
3852 }
3853
3854 AdjustToPrecision(significand, exp, FormatPrecision);
3855
3856 SmallVector<char, 256> buffer;
3857
3858 // Fill the buffer.
3859 unsigned precision = significand.getBitWidth();
3860 APInt ten(precision, 10);
3861 APInt digit(precision, 0);
3862
3863 bool inTrail = true;
3864 while (significand != 0) {
3865 // digit <- significand % 10
3866 // significand <- significand / 10
3867 APInt::udivrem(significand, ten, significand, digit);
3868
3869 unsigned d = digit.getZExtValue();
3870
3871 // Drop trailing zeros.
3872 if (inTrail && !d) exp++;
3873 else {
3874 buffer.push_back((char) ('0' + d));
3875 inTrail = false;
3876 }
3877 }
3878
3879 assert(!buffer.empty() && "no characters in buffer!");
3880
3881 // Drop down to FormatPrecision.
3882 // TODO: don't do more precise calculations above than are required.
3883 AdjustToPrecision(buffer, exp, FormatPrecision);
3884
3885 unsigned NDigits = buffer.size();
3886
3887 // Check whether we should use scientific notation.
3888 bool FormatScientific;
3889 if (!FormatMaxPadding)
3890 FormatScientific = true;
3891 else {
3892 if (exp >= 0) {
3893 // 765e3 --> 765000
3894 // ^^^
3895 // But we shouldn't make the number look more precise than it is.
3896 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3897 NDigits + (unsigned) exp > FormatPrecision);
3898 } else {
3899 // Power of the most significant digit.
3900 int MSD = exp + (int) (NDigits - 1);
3901 if (MSD >= 0) {
3902 // 765e-2 == 7.65
3903 FormatScientific = false;
3904 } else {
3905 // 765e-5 == 0.00765
3906 // ^ ^^
3907 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3908 }
3909 }
3910 }
3911
3912 // Scientific formatting is pretty straightforward.
3913 if (FormatScientific) {
3914 exp += (NDigits - 1);
3915
3916 Str.push_back(buffer[NDigits-1]);
3917 Str.push_back('.');
3918 if (NDigits == 1 && TruncateZero)
3919 Str.push_back('0');
3920 else
3921 for (unsigned I = 1; I != NDigits; ++I)
3922 Str.push_back(buffer[NDigits-1-I]);
3923 // Fill with zeros up to FormatPrecision.
3924 if (!TruncateZero && FormatPrecision > NDigits - 1)
3925 Str.append(FormatPrecision - NDigits + 1, '0');
3926 // For !TruncateZero we use lower 'e'.
3927 Str.push_back(TruncateZero ? 'E' : 'e');
3928
3929 Str.push_back(exp >= 0 ? '+' : '-');
3930 if (exp < 0) exp = -exp;
3931 SmallVector<char, 6> expbuf;
3932 do {
3933 expbuf.push_back((char) ('0' + (exp % 10)));
3934 exp /= 10;
3935 } while (exp);
3936 // Exponent always at least two digits if we do not truncate zeros.
3937 if (!TruncateZero && expbuf.size() < 2)
3938 expbuf.push_back('0');
3939 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3940 Str.push_back(expbuf[E-1-I]);
3941 return;
3942 }
3943
3944 // Non-scientific, positive exponents.
3945 if (exp >= 0) {
3946 for (unsigned I = 0; I != NDigits; ++I)
3947 Str.push_back(buffer[NDigits-1-I]);
3948 for (unsigned I = 0; I != (unsigned) exp; ++I)
3949 Str.push_back('0');
3950 return;
3951 }
3952
3953 // Non-scientific, negative exponents.
3954
3955 // The number of digits to the left of the decimal point.
3956 int NWholeDigits = exp + (int) NDigits;
3957
3958 unsigned I = 0;
3959 if (NWholeDigits > 0) {
3960 for (; I != (unsigned) NWholeDigits; ++I)
3961 Str.push_back(buffer[NDigits-I-1]);
3962 Str.push_back('.');
3963 } else {
3964 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3965
3966 Str.push_back('0');
3967 Str.push_back('.');
3968 for (unsigned Z = 1; Z != NZeros; ++Z)
3969 Str.push_back('0');
3970 }
3971
3972 for (; I != NDigits; ++I)
3973 Str.push_back(buffer[NDigits-I-1]);
3974 }
3975
getExactInverse(APFloat * inv) const3976 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3977 // Special floats and denormals have no exact inverse.
3978 if (!isFiniteNonZero())
3979 return false;
3980
3981 // Check that the number is a power of two by making sure that only the
3982 // integer bit is set in the significand.
3983 if (significandLSB() != semantics->precision - 1)
3984 return false;
3985
3986 // Get the inverse.
3987 IEEEFloat reciprocal(*semantics, 1ULL);
3988 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3989 return false;
3990
3991 // Avoid multiplication with a denormal, it is not safe on all platforms and
3992 // may be slower than a normal division.
3993 if (reciprocal.isDenormal())
3994 return false;
3995
3996 assert(reciprocal.isFiniteNonZero() &&
3997 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3998
3999 if (inv)
4000 *inv = APFloat(reciprocal, *semantics);
4001
4002 return true;
4003 }
4004
isSignaling() const4005 bool IEEEFloat::isSignaling() const {
4006 if (!isNaN())
4007 return false;
4008
4009 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4010 // first bit of the trailing significand being 0.
4011 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4012 }
4013
4014 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4015 ///
4016 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4017 /// appropriate sign switching before/after the computation.
next(bool nextDown)4018 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4019 // If we are performing nextDown, swap sign so we have -x.
4020 if (nextDown)
4021 changeSign();
4022
4023 // Compute nextUp(x)
4024 opStatus result = opOK;
4025
4026 // Handle each float category separately.
4027 switch (category) {
4028 case fcInfinity:
4029 // nextUp(+inf) = +inf
4030 if (!isNegative())
4031 break;
4032 // nextUp(-inf) = -getLargest()
4033 makeLargest(true);
4034 break;
4035 case fcNaN:
4036 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4037 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4038 // change the payload.
4039 if (isSignaling()) {
4040 result = opInvalidOp;
4041 // For consistency, propagate the sign of the sNaN to the qNaN.
4042 makeNaN(false, isNegative(), nullptr);
4043 }
4044 break;
4045 case fcZero:
4046 // nextUp(pm 0) = +getSmallest()
4047 makeSmallest(false);
4048 break;
4049 case fcNormal:
4050 // nextUp(-getSmallest()) = -0
4051 if (isSmallest() && isNegative()) {
4052 APInt::tcSet(significandParts(), 0, partCount());
4053 category = fcZero;
4054 exponent = 0;
4055 break;
4056 }
4057
4058 // nextUp(getLargest()) == INFINITY
4059 if (isLargest() && !isNegative()) {
4060 APInt::tcSet(significandParts(), 0, partCount());
4061 category = fcInfinity;
4062 exponent = semantics->maxExponent + 1;
4063 break;
4064 }
4065
4066 // nextUp(normal) == normal + inc.
4067 if (isNegative()) {
4068 // If we are negative, we need to decrement the significand.
4069
4070 // We only cross a binade boundary that requires adjusting the exponent
4071 // if:
4072 // 1. exponent != semantics->minExponent. This implies we are not in the
4073 // smallest binade or are dealing with denormals.
4074 // 2. Our significand excluding the integral bit is all zeros.
4075 bool WillCrossBinadeBoundary =
4076 exponent != semantics->minExponent && isSignificandAllZeros();
4077
4078 // Decrement the significand.
4079 //
4080 // We always do this since:
4081 // 1. If we are dealing with a non-binade decrement, by definition we
4082 // just decrement the significand.
4083 // 2. If we are dealing with a normal -> normal binade decrement, since
4084 // we have an explicit integral bit the fact that all bits but the
4085 // integral bit are zero implies that subtracting one will yield a
4086 // significand with 0 integral bit and 1 in all other spots. Thus we
4087 // must just adjust the exponent and set the integral bit to 1.
4088 // 3. If we are dealing with a normal -> denormal binade decrement,
4089 // since we set the integral bit to 0 when we represent denormals, we
4090 // just decrement the significand.
4091 integerPart *Parts = significandParts();
4092 APInt::tcDecrement(Parts, partCount());
4093
4094 if (WillCrossBinadeBoundary) {
4095 // Our result is a normal number. Do the following:
4096 // 1. Set the integral bit to 1.
4097 // 2. Decrement the exponent.
4098 APInt::tcSetBit(Parts, semantics->precision - 1);
4099 exponent--;
4100 }
4101 } else {
4102 // If we are positive, we need to increment the significand.
4103
4104 // We only cross a binade boundary that requires adjusting the exponent if
4105 // the input is not a denormal and all of said input's significand bits
4106 // are set. If all of said conditions are true: clear the significand, set
4107 // the integral bit to 1, and increment the exponent. If we have a
4108 // denormal always increment since moving denormals and the numbers in the
4109 // smallest normal binade have the same exponent in our representation.
4110 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4111
4112 if (WillCrossBinadeBoundary) {
4113 integerPart *Parts = significandParts();
4114 APInt::tcSet(Parts, 0, partCount());
4115 APInt::tcSetBit(Parts, semantics->precision - 1);
4116 assert(exponent != semantics->maxExponent &&
4117 "We can not increment an exponent beyond the maxExponent allowed"
4118 " by the given floating point semantics.");
4119 exponent++;
4120 } else {
4121 incrementSignificand();
4122 }
4123 }
4124 break;
4125 }
4126
4127 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4128 if (nextDown)
4129 changeSign();
4130
4131 return result;
4132 }
4133
makeInf(bool Negative)4134 void IEEEFloat::makeInf(bool Negative) {
4135 category = fcInfinity;
4136 sign = Negative;
4137 exponent = semantics->maxExponent + 1;
4138 APInt::tcSet(significandParts(), 0, partCount());
4139 }
4140
makeZero(bool Negative)4141 void IEEEFloat::makeZero(bool Negative) {
4142 category = fcZero;
4143 sign = Negative;
4144 exponent = semantics->minExponent-1;
4145 APInt::tcSet(significandParts(), 0, partCount());
4146 }
4147
makeQuiet()4148 void IEEEFloat::makeQuiet() {
4149 assert(isNaN());
4150 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4151 }
4152
ilogb(const IEEEFloat & Arg)4153 int ilogb(const IEEEFloat &Arg) {
4154 if (Arg.isNaN())
4155 return IEEEFloat::IEK_NaN;
4156 if (Arg.isZero())
4157 return IEEEFloat::IEK_Zero;
4158 if (Arg.isInfinity())
4159 return IEEEFloat::IEK_Inf;
4160 if (!Arg.isDenormal())
4161 return Arg.exponent;
4162
4163 IEEEFloat Normalized(Arg);
4164 int SignificandBits = Arg.getSemantics().precision - 1;
4165
4166 Normalized.exponent += SignificandBits;
4167 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4168 return Normalized.exponent - SignificandBits;
4169 }
4170
scalbn(IEEEFloat X,int Exp,IEEEFloat::roundingMode RoundingMode)4171 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4172 auto MaxExp = X.getSemantics().maxExponent;
4173 auto MinExp = X.getSemantics().minExponent;
4174
4175 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4176 // overflow; clamp it to a safe range before adding, but ensure that the range
4177 // is large enough that the clamp does not change the result. The range we
4178 // need to support is the difference between the largest possible exponent and
4179 // the normalized exponent of half the smallest denormal.
4180
4181 int SignificandBits = X.getSemantics().precision - 1;
4182 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4183
4184 // Clamp to one past the range ends to let normalize handle overlflow.
4185 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
4186 X.normalize(RoundingMode, lfExactlyZero);
4187 if (X.isNaN())
4188 X.makeQuiet();
4189 return X;
4190 }
4191
frexp(const IEEEFloat & Val,int & Exp,IEEEFloat::roundingMode RM)4192 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4193 Exp = ilogb(Val);
4194
4195 // Quiet signalling nans.
4196 if (Exp == IEEEFloat::IEK_NaN) {
4197 IEEEFloat Quiet(Val);
4198 Quiet.makeQuiet();
4199 return Quiet;
4200 }
4201
4202 if (Exp == IEEEFloat::IEK_Inf)
4203 return Val;
4204
4205 // 1 is added because frexp is defined to return a normalized fraction in
4206 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4207 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4208 return scalbn(Val, -Exp, RM);
4209 }
4210
DoubleAPFloat(const fltSemantics & S)4211 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4212 : Semantics(&S),
4213 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4214 assert(Semantics == &semPPCDoubleDouble);
4215 }
4216
DoubleAPFloat(const fltSemantics & S,uninitializedTag)4217 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4218 : Semantics(&S),
4219 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4220 APFloat(semIEEEdouble, uninitialized)}) {
4221 assert(Semantics == &semPPCDoubleDouble);
4222 }
4223
DoubleAPFloat(const fltSemantics & S,integerPart I)4224 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4225 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4226 APFloat(semIEEEdouble)}) {
4227 assert(Semantics == &semPPCDoubleDouble);
4228 }
4229
DoubleAPFloat(const fltSemantics & S,const APInt & I)4230 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4231 : Semantics(&S),
4232 Floats(new APFloat[2]{
4233 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4234 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4235 assert(Semantics == &semPPCDoubleDouble);
4236 }
4237
DoubleAPFloat(const fltSemantics & S,APFloat && First,APFloat && Second)4238 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4239 APFloat &&Second)
4240 : Semantics(&S),
4241 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4242 assert(Semantics == &semPPCDoubleDouble);
4243 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4244 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4245 }
4246
DoubleAPFloat(const DoubleAPFloat & RHS)4247 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4248 : Semantics(RHS.Semantics),
4249 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4250 APFloat(RHS.Floats[1])}
4251 : nullptr) {
4252 assert(Semantics == &semPPCDoubleDouble);
4253 }
4254
DoubleAPFloat(DoubleAPFloat && RHS)4255 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4256 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4257 RHS.Semantics = &semBogus;
4258 assert(Semantics == &semPPCDoubleDouble);
4259 }
4260
operator =(const DoubleAPFloat & RHS)4261 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4262 if (Semantics == RHS.Semantics && RHS.Floats) {
4263 Floats[0] = RHS.Floats[0];
4264 Floats[1] = RHS.Floats[1];
4265 } else if (this != &RHS) {
4266 this->~DoubleAPFloat();
4267 new (this) DoubleAPFloat(RHS);
4268 }
4269 return *this;
4270 }
4271
4272 // Implement addition, subtraction, multiplication and division based on:
4273 // "Software for Doubled-Precision Floating-Point Computations",
4274 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
addImpl(const APFloat & a,const APFloat & aa,const APFloat & c,const APFloat & cc,roundingMode RM)4275 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4276 const APFloat &c, const APFloat &cc,
4277 roundingMode RM) {
4278 int Status = opOK;
4279 APFloat z = a;
4280 Status |= z.add(c, RM);
4281 if (!z.isFinite()) {
4282 if (!z.isInfinity()) {
4283 Floats[0] = std::move(z);
4284 Floats[1].makeZero(/* Neg = */ false);
4285 return (opStatus)Status;
4286 }
4287 Status = opOK;
4288 auto AComparedToC = a.compareAbsoluteValue(c);
4289 z = cc;
4290 Status |= z.add(aa, RM);
4291 if (AComparedToC == APFloat::cmpGreaterThan) {
4292 // z = cc + aa + c + a;
4293 Status |= z.add(c, RM);
4294 Status |= z.add(a, RM);
4295 } else {
4296 // z = cc + aa + a + c;
4297 Status |= z.add(a, RM);
4298 Status |= z.add(c, RM);
4299 }
4300 if (!z.isFinite()) {
4301 Floats[0] = std::move(z);
4302 Floats[1].makeZero(/* Neg = */ false);
4303 return (opStatus)Status;
4304 }
4305 Floats[0] = z;
4306 APFloat zz = aa;
4307 Status |= zz.add(cc, RM);
4308 if (AComparedToC == APFloat::cmpGreaterThan) {
4309 // Floats[1] = a - z + c + zz;
4310 Floats[1] = a;
4311 Status |= Floats[1].subtract(z, RM);
4312 Status |= Floats[1].add(c, RM);
4313 Status |= Floats[1].add(zz, RM);
4314 } else {
4315 // Floats[1] = c - z + a + zz;
4316 Floats[1] = c;
4317 Status |= Floats[1].subtract(z, RM);
4318 Status |= Floats[1].add(a, RM);
4319 Status |= Floats[1].add(zz, RM);
4320 }
4321 } else {
4322 // q = a - z;
4323 APFloat q = a;
4324 Status |= q.subtract(z, RM);
4325
4326 // zz = q + c + (a - (q + z)) + aa + cc;
4327 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4328 auto zz = q;
4329 Status |= zz.add(c, RM);
4330 Status |= q.add(z, RM);
4331 Status |= q.subtract(a, RM);
4332 q.changeSign();
4333 Status |= zz.add(q, RM);
4334 Status |= zz.add(aa, RM);
4335 Status |= zz.add(cc, RM);
4336 if (zz.isZero() && !zz.isNegative()) {
4337 Floats[0] = std::move(z);
4338 Floats[1].makeZero(/* Neg = */ false);
4339 return opOK;
4340 }
4341 Floats[0] = z;
4342 Status |= Floats[0].add(zz, RM);
4343 if (!Floats[0].isFinite()) {
4344 Floats[1].makeZero(/* Neg = */ false);
4345 return (opStatus)Status;
4346 }
4347 Floats[1] = std::move(z);
4348 Status |= Floats[1].subtract(Floats[0], RM);
4349 Status |= Floats[1].add(zz, RM);
4350 }
4351 return (opStatus)Status;
4352 }
4353
addWithSpecial(const DoubleAPFloat & LHS,const DoubleAPFloat & RHS,DoubleAPFloat & Out,roundingMode RM)4354 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4355 const DoubleAPFloat &RHS,
4356 DoubleAPFloat &Out,
4357 roundingMode RM) {
4358 if (LHS.getCategory() == fcNaN) {
4359 Out = LHS;
4360 return opOK;
4361 }
4362 if (RHS.getCategory() == fcNaN) {
4363 Out = RHS;
4364 return opOK;
4365 }
4366 if (LHS.getCategory() == fcZero) {
4367 Out = RHS;
4368 return opOK;
4369 }
4370 if (RHS.getCategory() == fcZero) {
4371 Out = LHS;
4372 return opOK;
4373 }
4374 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4375 LHS.isNegative() != RHS.isNegative()) {
4376 Out.makeNaN(false, Out.isNegative(), nullptr);
4377 return opInvalidOp;
4378 }
4379 if (LHS.getCategory() == fcInfinity) {
4380 Out = LHS;
4381 return opOK;
4382 }
4383 if (RHS.getCategory() == fcInfinity) {
4384 Out = RHS;
4385 return opOK;
4386 }
4387 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4388
4389 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4390 CC(RHS.Floats[1]);
4391 assert(&A.getSemantics() == &semIEEEdouble);
4392 assert(&AA.getSemantics() == &semIEEEdouble);
4393 assert(&C.getSemantics() == &semIEEEdouble);
4394 assert(&CC.getSemantics() == &semIEEEdouble);
4395 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4396 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4397 return Out.addImpl(A, AA, C, CC, RM);
4398 }
4399
add(const DoubleAPFloat & RHS,roundingMode RM)4400 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4401 roundingMode RM) {
4402 return addWithSpecial(*this, RHS, *this, RM);
4403 }
4404
subtract(const DoubleAPFloat & RHS,roundingMode RM)4405 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4406 roundingMode RM) {
4407 changeSign();
4408 auto Ret = add(RHS, RM);
4409 changeSign();
4410 return Ret;
4411 }
4412
multiply(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4413 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4414 APFloat::roundingMode RM) {
4415 const auto &LHS = *this;
4416 auto &Out = *this;
4417 /* Interesting observation: For special categories, finding the lowest
4418 common ancestor of the following layered graph gives the correct
4419 return category:
4420
4421 NaN
4422 / \
4423 Zero Inf
4424 \ /
4425 Normal
4426
4427 e.g. NaN * NaN = NaN
4428 Zero * Inf = NaN
4429 Normal * Zero = Zero
4430 Normal * Inf = Inf
4431 */
4432 if (LHS.getCategory() == fcNaN) {
4433 Out = LHS;
4434 return opOK;
4435 }
4436 if (RHS.getCategory() == fcNaN) {
4437 Out = RHS;
4438 return opOK;
4439 }
4440 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4441 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4442 Out.makeNaN(false, false, nullptr);
4443 return opOK;
4444 }
4445 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4446 Out = LHS;
4447 return opOK;
4448 }
4449 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4450 Out = RHS;
4451 return opOK;
4452 }
4453 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4454 "Special cases not handled exhaustively");
4455
4456 int Status = opOK;
4457 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4458 // t = a * c
4459 APFloat T = A;
4460 Status |= T.multiply(C, RM);
4461 if (!T.isFiniteNonZero()) {
4462 Floats[0] = T;
4463 Floats[1].makeZero(/* Neg = */ false);
4464 return (opStatus)Status;
4465 }
4466
4467 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4468 APFloat Tau = A;
4469 T.changeSign();
4470 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4471 T.changeSign();
4472 {
4473 // v = a * d
4474 APFloat V = A;
4475 Status |= V.multiply(D, RM);
4476 // w = b * c
4477 APFloat W = B;
4478 Status |= W.multiply(C, RM);
4479 Status |= V.add(W, RM);
4480 // tau += v + w
4481 Status |= Tau.add(V, RM);
4482 }
4483 // u = t + tau
4484 APFloat U = T;
4485 Status |= U.add(Tau, RM);
4486
4487 Floats[0] = U;
4488 if (!U.isFinite()) {
4489 Floats[1].makeZero(/* Neg = */ false);
4490 } else {
4491 // Floats[1] = (t - u) + tau
4492 Status |= T.subtract(U, RM);
4493 Status |= T.add(Tau, RM);
4494 Floats[1] = T;
4495 }
4496 return (opStatus)Status;
4497 }
4498
divide(const DoubleAPFloat & RHS,APFloat::roundingMode RM)4499 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4500 APFloat::roundingMode RM) {
4501 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4502 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4503 auto Ret =
4504 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4505 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4506 return Ret;
4507 }
4508
remainder(const DoubleAPFloat & RHS)4509 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4510 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4511 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4512 auto Ret =
4513 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4514 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4515 return Ret;
4516 }
4517
mod(const DoubleAPFloat & RHS)4518 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4519 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4520 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4521 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4522 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4523 return Ret;
4524 }
4525
4526 APFloat::opStatus
fusedMultiplyAdd(const DoubleAPFloat & Multiplicand,const DoubleAPFloat & Addend,APFloat::roundingMode RM)4527 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4528 const DoubleAPFloat &Addend,
4529 APFloat::roundingMode RM) {
4530 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4531 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4532 auto Ret = Tmp.fusedMultiplyAdd(
4533 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4534 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4535 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4536 return Ret;
4537 }
4538
roundToIntegral(APFloat::roundingMode RM)4539 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4540 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4541 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4542 auto Ret = Tmp.roundToIntegral(RM);
4543 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4544 return Ret;
4545 }
4546
changeSign()4547 void DoubleAPFloat::changeSign() {
4548 Floats[0].changeSign();
4549 Floats[1].changeSign();
4550 }
4551
4552 APFloat::cmpResult
compareAbsoluteValue(const DoubleAPFloat & RHS) const4553 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4554 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4555 if (Result != cmpEqual)
4556 return Result;
4557 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4558 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4559 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4560 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4561 if (Against && !RHSAgainst)
4562 return cmpLessThan;
4563 if (!Against && RHSAgainst)
4564 return cmpGreaterThan;
4565 if (!Against && !RHSAgainst)
4566 return Result;
4567 if (Against && RHSAgainst)
4568 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4569 }
4570 return Result;
4571 }
4572
getCategory() const4573 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4574 return Floats[0].getCategory();
4575 }
4576
isNegative() const4577 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4578
makeInf(bool Neg)4579 void DoubleAPFloat::makeInf(bool Neg) {
4580 Floats[0].makeInf(Neg);
4581 Floats[1].makeZero(/* Neg = */ false);
4582 }
4583
makeZero(bool Neg)4584 void DoubleAPFloat::makeZero(bool Neg) {
4585 Floats[0].makeZero(Neg);
4586 Floats[1].makeZero(/* Neg = */ false);
4587 }
4588
makeLargest(bool Neg)4589 void DoubleAPFloat::makeLargest(bool Neg) {
4590 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4591 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4592 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4593 if (Neg)
4594 changeSign();
4595 }
4596
makeSmallest(bool Neg)4597 void DoubleAPFloat::makeSmallest(bool Neg) {
4598 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4599 Floats[0].makeSmallest(Neg);
4600 Floats[1].makeZero(/* Neg = */ false);
4601 }
4602
makeSmallestNormalized(bool Neg)4603 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4604 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4605 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4606 if (Neg)
4607 Floats[0].changeSign();
4608 Floats[1].makeZero(/* Neg = */ false);
4609 }
4610
makeNaN(bool SNaN,bool Neg,const APInt * fill)4611 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4612 Floats[0].makeNaN(SNaN, Neg, fill);
4613 Floats[1].makeZero(/* Neg = */ false);
4614 }
4615
compare(const DoubleAPFloat & RHS) const4616 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4617 auto Result = Floats[0].compare(RHS.Floats[0]);
4618 // |Float[0]| > |Float[1]|
4619 if (Result == APFloat::cmpEqual)
4620 return Floats[1].compare(RHS.Floats[1]);
4621 return Result;
4622 }
4623
bitwiseIsEqual(const DoubleAPFloat & RHS) const4624 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4625 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4626 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4627 }
4628
hash_value(const DoubleAPFloat & Arg)4629 hash_code hash_value(const DoubleAPFloat &Arg) {
4630 if (Arg.Floats)
4631 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4632 return hash_combine(Arg.Semantics);
4633 }
4634
bitcastToAPInt() const4635 APInt DoubleAPFloat::bitcastToAPInt() const {
4636 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4637 uint64_t Data[] = {
4638 Floats[0].bitcastToAPInt().getRawData()[0],
4639 Floats[1].bitcastToAPInt().getRawData()[0],
4640 };
4641 return APInt(128, 2, Data);
4642 }
4643
convertFromString(StringRef S,roundingMode RM)4644 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4645 roundingMode RM) {
4646 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4647 APFloat Tmp(semPPCDoubleDoubleLegacy);
4648 auto Ret = Tmp.convertFromString(S, RM);
4649 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4650 return Ret;
4651 }
4652
next(bool nextDown)4653 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4654 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4655 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4656 auto Ret = Tmp.next(nextDown);
4657 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4658 return Ret;
4659 }
4660
4661 APFloat::opStatus
convertToInteger(MutableArrayRef<integerPart> Input,unsigned int Width,bool IsSigned,roundingMode RM,bool * IsExact) const4662 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4663 unsigned int Width, bool IsSigned,
4664 roundingMode RM, bool *IsExact) const {
4665 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4666 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4667 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4668 }
4669
convertFromAPInt(const APInt & Input,bool IsSigned,roundingMode RM)4670 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4671 bool IsSigned,
4672 roundingMode RM) {
4673 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4674 APFloat Tmp(semPPCDoubleDoubleLegacy);
4675 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4676 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4677 return Ret;
4678 }
4679
4680 APFloat::opStatus
convertFromSignExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4681 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4682 unsigned int InputSize,
4683 bool IsSigned, roundingMode RM) {
4684 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4685 APFloat Tmp(semPPCDoubleDoubleLegacy);
4686 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4687 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4688 return Ret;
4689 }
4690
4691 APFloat::opStatus
convertFromZeroExtendedInteger(const integerPart * Input,unsigned int InputSize,bool IsSigned,roundingMode RM)4692 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4693 unsigned int InputSize,
4694 bool IsSigned, roundingMode RM) {
4695 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4696 APFloat Tmp(semPPCDoubleDoubleLegacy);
4697 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4698 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4699 return Ret;
4700 }
4701
convertToHexString(char * DST,unsigned int HexDigits,bool UpperCase,roundingMode RM) const4702 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4703 unsigned int HexDigits,
4704 bool UpperCase,
4705 roundingMode RM) const {
4706 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4707 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4708 .convertToHexString(DST, HexDigits, UpperCase, RM);
4709 }
4710
isDenormal() const4711 bool DoubleAPFloat::isDenormal() const {
4712 return getCategory() == fcNormal &&
4713 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4714 // (double)(Hi + Lo) == Hi defines a normal number.
4715 Floats[0] != Floats[0] + Floats[1]);
4716 }
4717
isSmallest() const4718 bool DoubleAPFloat::isSmallest() const {
4719 if (getCategory() != fcNormal)
4720 return false;
4721 DoubleAPFloat Tmp(*this);
4722 Tmp.makeSmallest(this->isNegative());
4723 return Tmp.compare(*this) == cmpEqual;
4724 }
4725
isLargest() const4726 bool DoubleAPFloat::isLargest() const {
4727 if (getCategory() != fcNormal)
4728 return false;
4729 DoubleAPFloat Tmp(*this);
4730 Tmp.makeLargest(this->isNegative());
4731 return Tmp.compare(*this) == cmpEqual;
4732 }
4733
isInteger() const4734 bool DoubleAPFloat::isInteger() const {
4735 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4736 return Floats[0].isInteger() && Floats[1].isInteger();
4737 }
4738
toString(SmallVectorImpl<char> & Str,unsigned FormatPrecision,unsigned FormatMaxPadding,bool TruncateZero) const4739 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4740 unsigned FormatPrecision,
4741 unsigned FormatMaxPadding,
4742 bool TruncateZero) const {
4743 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4744 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4745 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4746 }
4747
getExactInverse(APFloat * inv) const4748 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4749 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4750 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4751 if (!inv)
4752 return Tmp.getExactInverse(nullptr);
4753 APFloat Inv(semPPCDoubleDoubleLegacy);
4754 auto Ret = Tmp.getExactInverse(&Inv);
4755 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4756 return Ret;
4757 }
4758
scalbn(DoubleAPFloat Arg,int Exp,APFloat::roundingMode RM)4759 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4760 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4761 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4762 scalbn(Arg.Floats[1], Exp, RM));
4763 }
4764
frexp(const DoubleAPFloat & Arg,int & Exp,APFloat::roundingMode RM)4765 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4766 APFloat::roundingMode RM) {
4767 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4768 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4769 APFloat Second = Arg.Floats[1];
4770 if (Arg.getCategory() == APFloat::fcNormal)
4771 Second = scalbn(Second, -Exp, RM);
4772 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4773 }
4774
4775 } // End detail namespace
4776
Storage(IEEEFloat F,const fltSemantics & Semantics)4777 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4778 if (usesLayout<IEEEFloat>(Semantics)) {
4779 new (&IEEE) IEEEFloat(std::move(F));
4780 return;
4781 }
4782 if (usesLayout<DoubleAPFloat>(Semantics)) {
4783 const fltSemantics& S = F.getSemantics();
4784 new (&Double)
4785 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4786 APFloat(semIEEEdouble));
4787 return;
4788 }
4789 llvm_unreachable("Unexpected semantics");
4790 }
4791
convertFromString(StringRef Str,roundingMode RM)4792 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4793 roundingMode RM) {
4794 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4795 }
4796
hash_value(const APFloat & Arg)4797 hash_code hash_value(const APFloat &Arg) {
4798 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4799 return hash_value(Arg.U.IEEE);
4800 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4801 return hash_value(Arg.U.Double);
4802 llvm_unreachable("Unexpected semantics");
4803 }
4804
APFloat(const fltSemantics & Semantics,StringRef S)4805 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4806 : APFloat(Semantics) {
4807 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4808 assert(StatusOrErr && "Invalid floating point representation");
4809 consumeError(StatusOrErr.takeError());
4810 }
4811
convert(const fltSemantics & ToSemantics,roundingMode RM,bool * losesInfo)4812 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4813 roundingMode RM, bool *losesInfo) {
4814 if (&getSemantics() == &ToSemantics) {
4815 *losesInfo = false;
4816 return opOK;
4817 }
4818 if (usesLayout<IEEEFloat>(getSemantics()) &&
4819 usesLayout<IEEEFloat>(ToSemantics))
4820 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4821 if (usesLayout<IEEEFloat>(getSemantics()) &&
4822 usesLayout<DoubleAPFloat>(ToSemantics)) {
4823 assert(&ToSemantics == &semPPCDoubleDouble);
4824 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4825 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4826 return Ret;
4827 }
4828 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4829 usesLayout<IEEEFloat>(ToSemantics)) {
4830 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4831 *this = APFloat(std::move(getIEEE()), ToSemantics);
4832 return Ret;
4833 }
4834 llvm_unreachable("Unexpected semantics");
4835 }
4836
getAllOnesValue(const fltSemantics & Semantics,unsigned BitWidth)4837 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics,
4838 unsigned BitWidth) {
4839 return APFloat(Semantics, APInt::getAllOnesValue(BitWidth));
4840 }
4841
print(raw_ostream & OS) const4842 void APFloat::print(raw_ostream &OS) const {
4843 SmallVector<char, 16> Buffer;
4844 toString(Buffer);
4845 OS << Buffer << "\n";
4846 }
4847
4848 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const4849 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4850 #endif
4851
Profile(FoldingSetNodeID & NID) const4852 void APFloat::Profile(FoldingSetNodeID &NID) const {
4853 NID.Add(bitcastToAPInt());
4854 }
4855
4856 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4857 an APSInt, whose initial bit-width and signed-ness are used to determine the
4858 precision of the conversion.
4859 */
convertToInteger(APSInt & result,roundingMode rounding_mode,bool * isExact) const4860 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4861 roundingMode rounding_mode,
4862 bool *isExact) const {
4863 unsigned bitWidth = result.getBitWidth();
4864 SmallVector<uint64_t, 4> parts(result.getNumWords());
4865 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4866 rounding_mode, isExact);
4867 // Keeps the original signed-ness.
4868 result = APInt(bitWidth, parts);
4869 return status;
4870 }
4871
4872 } // End llvm namespace
4873
4874 #undef APFLOAT_DISPATCH_ON_SEMANTICS
4875