1 // Copyright 2009 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15
16 // Author: ericv@google.com (Eric Veach)
17
18 #include "s2/util/math/exactfloat/exactfloat.h"
19
20 #include <cstdio>
21 #include <cstdlib>
22 #include <cstring>
23 #include <algorithm>
24 #include <cmath>
25 #include <limits>
26
27 #include <openssl/bn.h>
28 #include <openssl/crypto.h> // for OPENSSL_free
29
30 #include "s2/base/integral_types.h"
31 #include "s2/base/logging.h"
32 #include "s2/third_party/absl/base/macros.h"
33 #include "s2/third_party/absl/container/fixed_array.h"
34
35 using std::max;
36 using std::min;
37
38 // Define storage for constants.
39 const int ExactFloat::kMinExp;
40 const int ExactFloat::kMaxExp;
41 const int ExactFloat::kMaxPrec;
42 const int32 ExactFloat::kExpNaN;
43 const int32 ExactFloat::kExpInfinity;
44 const int32 ExactFloat::kExpZero;
45 const int ExactFloat::kDoubleMantissaBits;
46
47 // To simplify the overflow/underflow logic, we limit the exponent and
48 // precision range so that (2 * bn_exp_) does not overflow an "int". We take
49 // advantage of this, for example, by only checking for overflow/underflow
50 // *after* multiplying two numbers.
51 static_assert(
52 ExactFloat::kMaxExp <= INT_MAX / 2 &&
53 ExactFloat::kMinExp - ExactFloat::kMaxPrec >= INT_MIN / 2,
54 "exactfloat exponent might overflow");
55
56 // We define a few simple extensions to the OpenSSL's BIGNUM interface.
57 // In some cases these depend on BIGNUM internal fields, so they might
58 // require tweaking if the BIGNUM implementation changes significantly.
59 // These are just thin wrappers for BoringSSL.
60
61 #ifdef OPENSSL_IS_BORINGSSL
62
BN_ext_set_uint64(BIGNUM * bn,uint64 v)63 inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) {
64 S2_CHECK(BN_set_u64(bn, v));
65 }
66
67 // Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
68 // Requires that BIGNUM fits into 64 bits.
BN_ext_get_uint64(const BIGNUM * bn)69 inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
70 uint64_t u64;
71 if (!BN_get_u64(bn, &u64)) {
72 S2_DCHECK(false) << "BN has " << BN_num_bits(bn) << " bits";
73 return 0;
74 }
75 return u64;
76 }
77
BN_ext_count_low_zero_bits(const BIGNUM * bn)78 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
79 return BN_count_low_zero_bits(bn);
80 }
81
82 #else // !defined(OPENSSL_IS_BORINGSSL)
83
84 // Set a BIGNUM to the given unsigned 64-bit value.
BN_ext_set_uint64(BIGNUM * bn,uint64 v)85 inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) {
86 #if BN_BITS2 == 64
87 S2_CHECK(BN_set_word(bn, v));
88 #else
89 static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
90 S2_CHECK(BN_set_word(bn, static_cast<uint32>(v >> 32)));
91 S2_CHECK(BN_lshift(bn, bn, 32));
92 S2_CHECK(BN_add_word(bn, static_cast<uint32>(v)));
93 #endif
94 }
95
96 // Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
97 // Requires that BIGNUM fits into 64 bits.
BN_ext_get_uint64(const BIGNUM * bn)98 inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
99 S2_DCHECK_LE(BN_num_bytes(bn), sizeof(uint64));
100 #if BN_BITS2 == 64
101 return BN_get_word(bn);
102 #else
103 static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
104 if (bn->top == 0) return 0;
105 if (bn->top == 1) return BN_get_word(bn);
106 S2_DCHECK_EQ(bn->top, 2);
107 return (static_cast<uint64>(bn->d[1]) << 32) + bn->d[0];
108 #endif
109 }
110
111 #if (OPENSSL_VERSION_NUMBER < 0x10100000L) || defined(LIBRESSL_VERSION_NUMBER)
112
113 // Count the number of low-order zero bits in the given BIGNUM (ignoring its
114 // sign). Returns 0 if the argument is zero.
BN_ext_count_low_zero_bits(const BIGNUM * bn)115 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
116 int count = 0;
117 for (int i = 0; i < bn->top; ++i) {
118 BN_ULONG w = bn->d[i];
119 if (w == 0) {
120 count += 8 * sizeof(BN_ULONG);
121 } else {
122 for (; (w & 1) == 0; w >>= 1) {
123 ++count;
124 }
125 break;
126 }
127 }
128 return count;
129 }
130
131 #else // OPENSSL_VERSION_NUMBER >= 0x10100000L
132
BN_ext_count_low_zero_bits(const BIGNUM * bn)133 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
134 // In OpenSSL >= 1.1, BIGNUM is an opaque type, so d and top
135 // cannot be accessed. The bytes must be copied out at a ~25%
136 // performance penalty.
137 absl::FixedArray<unsigned char> bytes(BN_num_bytes(bn));
138 // "le" indicates little endian.
139 S2_CHECK_EQ(BN_bn2lebinpad(bn, bytes.data(), bytes.size()), bytes.size());
140
141 int count = 0;
142 for (unsigned char c : bytes) {
143 if (c == 0) {
144 count += 8;
145 } else {
146 for (; (c & 1) == 0; c >>= 1) {
147 ++count;
148 }
149 break;
150 }
151 }
152 return count;
153 }
154
155 #endif // OPENSSL_VERSION_NUMBER >= 0x10100000L
156
157 #endif // !defined(OPENSSL_IS_BORINGSSL)
158
ExactFloat(double v)159 ExactFloat::ExactFloat(double v) {
160 sign_ = std::signbit(v) ? -1 : 1;
161 if (std::isnan(v)) {
162 set_nan();
163 } else if (std::isinf(v)) {
164 set_inf(sign_);
165 } else {
166 // The following code is much simpler than messing about with bit masks,
167 // has the advantage of handling denormalized numbers and zero correctly,
168 // and is actually quite efficient (at least compared to the rest of this
169 // code). "f" is a fraction in the range [0.5, 1), so if we shift it left
170 // by the number of mantissa bits in a double (53, including the leading
171 // "1") then the result is always an integer.
172 int exp;
173 double f = frexp(fabs(v), &exp);
174 uint64 m = static_cast<uint64>(ldexp(f, kDoubleMantissaBits));
175 BN_ext_set_uint64(bn_.get(), m);
176 bn_exp_ = exp - kDoubleMantissaBits;
177 Canonicalize();
178 }
179 }
180
ExactFloat(int v)181 ExactFloat::ExactFloat(int v) {
182 sign_ = (v >= 0) ? 1 : -1;
183 // Note that this works even for INT_MIN because the parameter type for
184 // BN_set_word() is unsigned.
185 S2_CHECK(BN_set_word(bn_.get(), abs(v)));
186 bn_exp_ = 0;
187 Canonicalize();
188 }
189
ExactFloat(const ExactFloat & b)190 ExactFloat::ExactFloat(const ExactFloat& b)
191 : sign_(b.sign_),
192 bn_exp_(b.bn_exp_) {
193 BN_copy(bn_.get(), b.bn_.get());
194 }
195
SignedZero(int sign)196 ExactFloat ExactFloat::SignedZero(int sign) {
197 ExactFloat r;
198 r.set_zero(sign);
199 return r;
200 }
201
Infinity(int sign)202 ExactFloat ExactFloat::Infinity(int sign) {
203 ExactFloat r;
204 r.set_inf(sign);
205 return r;
206 }
207
NaN()208 ExactFloat ExactFloat::NaN() {
209 ExactFloat r;
210 r.set_nan();
211 return r;
212 }
213
prec() const214 int ExactFloat::prec() const {
215 return BN_num_bits(bn_.get());
216 }
217
exp() const218 int ExactFloat::exp() const {
219 S2_DCHECK(is_normal());
220 return bn_exp_ + BN_num_bits(bn_.get());
221 }
222
set_zero(int sign)223 void ExactFloat::set_zero(int sign) {
224 sign_ = sign;
225 bn_exp_ = kExpZero;
226 if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
227 }
228
set_inf(int sign)229 void ExactFloat::set_inf(int sign) {
230 sign_ = sign;
231 bn_exp_ = kExpInfinity;
232 if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
233 }
234
set_nan()235 void ExactFloat::set_nan() {
236 sign_ = 1;
237 bn_exp_ = kExpNaN;
238 if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
239 }
240
ToDouble() const241 double ExactFloat::ToDouble() const {
242 // If the mantissa has too many bits, we need to round it.
243 if (prec() <= kDoubleMantissaBits) {
244 return ToDoubleHelper();
245 } else {
246 ExactFloat r = RoundToMaxPrec(kDoubleMantissaBits, kRoundTiesToEven);
247 return r.ToDoubleHelper();
248 }
249 }
250
ToDoubleHelper() const251 double ExactFloat::ToDoubleHelper() const {
252 S2_DCHECK_LE(BN_num_bits(bn_.get()), kDoubleMantissaBits);
253 if (!is_normal()) {
254 if (is_zero()) return copysign(0, sign_);
255 if (is_inf()) {
256 return std::copysign(std::numeric_limits<double>::infinity(), sign_);
257 }
258 return std::copysign(std::numeric_limits<double>::quiet_NaN(), sign_);
259 }
260 uint64 d_mantissa = BN_ext_get_uint64(bn_.get());
261 // We rely on ldexp() to handle overflow and underflow. (It will return a
262 // signed zero or infinity if the result is too small or too large.)
263 return sign_ * ldexp(static_cast<double>(d_mantissa), bn_exp_);
264 }
265
RoundToMaxPrec(int max_prec,RoundingMode mode) const266 ExactFloat ExactFloat::RoundToMaxPrec(int max_prec, RoundingMode mode) const {
267 // The "kRoundTiesToEven" mode requires at least 2 bits of precision
268 // (otherwise both adjacent representable values may be odd).
269 S2_DCHECK_GE(max_prec, 2);
270 S2_DCHECK_LE(max_prec, kMaxPrec);
271
272 // The following test also catches zero, infinity, and NaN.
273 int shift = prec() - max_prec;
274 if (shift <= 0) return *this;
275
276 // Round by removing the appropriate number of bits from the mantissa. Note
277 // that if the value is rounded up to a power of 2, the high-order bit
278 // position may increase, but in that case Canonicalize() will remove at
279 // least one zero bit and so the output will still have prec() <= max_prec.
280 return RoundToPowerOf2(bn_exp_ + shift, mode);
281 }
282
RoundToPowerOf2(int bit_exp,RoundingMode mode) const283 ExactFloat ExactFloat::RoundToPowerOf2(int bit_exp, RoundingMode mode) const {
284 S2_DCHECK_GE(bit_exp, kMinExp - kMaxPrec);
285 S2_DCHECK_LE(bit_exp, kMaxExp);
286
287 // If the exponent is already large enough, or the value is zero, infinity,
288 // or NaN, then there is nothing to do.
289 int shift = bit_exp - bn_exp_;
290 if (shift <= 0) return *this;
291 S2_DCHECK(is_normal());
292
293 // Convert rounding up/down to toward/away from zero, so that we don't need
294 // to consider the sign of the number from this point onward.
295 if (mode == kRoundTowardPositive) {
296 mode = (sign_ > 0) ? kRoundAwayFromZero : kRoundTowardZero;
297 } else if (mode == kRoundTowardNegative) {
298 mode = (sign_ > 0) ? kRoundTowardZero : kRoundAwayFromZero;
299 }
300
301 // Rounding consists of right-shifting the mantissa by "shift", and then
302 // possibly incrementing the result (depending on the rounding mode, the
303 // bits that were discarded, and sometimes the lowest kept bit). The
304 // following code figures out whether we need to increment.
305 ExactFloat r;
306 bool increment = false;
307 if (mode == kRoundTowardZero) {
308 // Never increment.
309 } else if (mode == kRoundTiesAwayFromZero) {
310 // Increment if the highest discarded bit is 1.
311 if (BN_is_bit_set(bn_.get(), shift - 1))
312 increment = true;
313 } else if (mode == kRoundAwayFromZero) {
314 // Increment unless all discarded bits are zero.
315 if (BN_ext_count_low_zero_bits(bn_.get()) < shift)
316 increment = true;
317 } else {
318 S2_DCHECK_EQ(mode, kRoundTiesToEven);
319 // Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and
320 // "xyz" are the discarded bits. Then using regexp notation:
321 // ./0.* -> Don't increment (fraction < 1/2)
322 // 0/10* -> Don't increment (fraction = 1/2, kept part even)
323 // 1/10* -> Increment (fraction = 1/2, kept part odd)
324 // ./1.*1.* -> Increment (fraction > 1/2)
325 if (BN_is_bit_set(bn_.get(), shift - 1) &&
326 ((BN_is_bit_set(bn_.get(), shift) ||
327 BN_ext_count_low_zero_bits(bn_.get()) < shift - 1))) {
328 increment = true;
329 }
330 }
331 r.bn_exp_ = bn_exp_ + shift;
332 S2_CHECK(BN_rshift(r.bn_.get(), bn_.get(), shift));
333 if (increment) {
334 S2_CHECK(BN_add_word(r.bn_.get(), 1));
335 }
336 r.sign_ = sign_;
337 r.Canonicalize();
338 return r;
339 }
340
NumSignificantDigitsForPrec(int prec)341 int ExactFloat::NumSignificantDigitsForPrec(int prec) {
342 // The simplest bound is
343 //
344 // d <= 1 + ceil(prec * log10(2))
345 //
346 // The following bound is tighter by 0.5 digits on average, but requires
347 // the exponent to be known as well:
348 //
349 // d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2))
350 //
351 // Since either of these bounds can be too large by 0, 1, or 2 digits, we
352 // stick with the simpler first bound.
353 return static_cast<int>(1 + ceil(prec * (M_LN2 / M_LN10)));
354 }
355
356 // Numbers are always formatted with at least this many significant digits.
357 // This prevents small integers from being formatted in exponential notation
358 // (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having
359 // supposedly "high precision" numbers formatted with just 1 or 2 digits
360 // (e.g. 1/512 == 0.001953125 formatted as 0.002).
361 static const int kMinSignificantDigits = 10;
362
ToString() const363 string ExactFloat::ToString() const {
364 int max_digits = max(kMinSignificantDigits,
365 NumSignificantDigitsForPrec(prec()));
366 return ToStringWithMaxDigits(max_digits);
367 }
368
ToStringWithMaxDigits(int max_digits) const369 string ExactFloat::ToStringWithMaxDigits(int max_digits) const {
370 S2_DCHECK_GT(max_digits, 0);
371 if (!is_normal()) {
372 if (is_nan()) return "nan";
373 if (is_zero()) return (sign_ < 0) ? "-0" : "0";
374 return (sign_ < 0) ? "-inf" : "inf";
375 }
376 string digits;
377 int exp10 = GetDecimalDigits(max_digits, &digits);
378 string str;
379 if (sign_ < 0) str.push_back('-');
380
381 // We use the standard '%g' formatting rules. If the exponent is less than
382 // -4 or greater than or equal to the requested precision (i.e., max_digits)
383 // then we use exponential notation.
384 //
385 // But since "exp10" is the base-10 exponent corresponding to a mantissa in
386 // the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range
387 // [1.0, 10), we need to adjust these parameters by 1.
388 if (exp10 <= -4 || exp10 > max_digits) {
389 // Use exponential format.
390 str.push_back(digits[0]);
391 if (digits.size() > 1) {
392 str.push_back('.');
393 str.append(digits.begin() + 1, digits.end());
394 }
395 char exp_buf[20];
396 sprintf(exp_buf, "e%+02d", exp10 - 1);
397 str += exp_buf;
398 } else {
399 // Use fixed format. We split this into two cases depending on whether
400 // the integer portion is non-zero or not.
401 if (exp10 > 0) {
402 if (exp10 >= digits.size()) {
403 str += digits;
404 for (int i = exp10 - digits.size(); i > 0; --i) {
405 str.push_back('0');
406 }
407 } else {
408 str.append(digits.begin(), digits.begin() + exp10);
409 str.push_back('.');
410 str.append(digits.begin() + exp10, digits.end());
411 }
412 } else {
413 str += "0.";
414 for (int i = exp10; i < 0; ++i) {
415 str.push_back('0');
416 }
417 str += digits;
418 }
419 }
420 return str;
421 }
422
423 // Increment an unsigned integer represented as a string of ASCII digits.
IncrementDecimalDigits(string * digits)424 static void IncrementDecimalDigits(string* digits) {
425 string::iterator pos = digits->end();
426 while (--pos >= digits->begin()) {
427 if (*pos < '9') { ++*pos; return; }
428 *pos = '0';
429 }
430 digits->insert(0, "1");
431 }
432
GetDecimalDigits(int max_digits,string * digits) const433 int ExactFloat::GetDecimalDigits(int max_digits, string* digits) const {
434 S2_DCHECK(is_normal());
435 // Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a
436 // positive integer (BIGNUM).
437 BIGNUM* bn = BN_new();
438 int bn_exp10;
439 if (bn_exp_ >= 0) {
440 // The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0.
441 S2_CHECK(BN_lshift(bn, bn_.get(), bn_exp_));
442 bn_exp10 = 0;
443 } else {
444 // Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_. This is
445 // equivalent to the original value of (bn_ * (2 ** bn_exp_)).
446 BIGNUM* power = BN_new();
447 S2_CHECK(BN_set_word(power, -bn_exp_));
448 S2_CHECK(BN_set_word(bn, 5));
449 BN_CTX* ctx = BN_CTX_new();
450 S2_CHECK(BN_exp(bn, bn, power, ctx));
451 S2_CHECK(BN_mul(bn, bn, bn_.get(), ctx));
452 BN_CTX_free(ctx);
453 BN_free(power);
454 bn_exp10 = bn_exp_;
455 }
456 // Now convert "bn" to a decimal string.
457 char* all_digits = BN_bn2dec(bn);
458 S2_DCHECK(all_digits != nullptr);
459 BN_free(bn);
460 // Check whether we have too many digits and round if necessary.
461 int num_digits = strlen(all_digits);
462 if (num_digits <= max_digits) {
463 *digits = all_digits;
464 } else {
465 digits->assign(all_digits, max_digits);
466 // Standard "printf" formatting rounds ties to an even number. This means
467 // that we round up (away from zero) if highest discarded digit is '5' or
468 // more, unless all other discarded digits are zero in which case we round
469 // up only if the lowest kept digit is odd.
470 if (all_digits[max_digits] >= '5' &&
471 ((all_digits[max_digits-1] & 1) == 1 ||
472 strpbrk(all_digits + max_digits + 1, "123456789") != nullptr)) {
473 // This can increase the number of digits by 1, but in that case at
474 // least one trailing zero will be stripped off below.
475 IncrementDecimalDigits(digits);
476 }
477 // Adjust the base-10 exponent to reflect the digits we have removed.
478 bn_exp10 += num_digits - max_digits;
479 }
480 OPENSSL_free(all_digits);
481
482 // Now strip any trailing zeros.
483 S2_DCHECK_NE((*digits)[0], '0');
484 string::iterator pos = digits->end();
485 while (pos[-1] == '0') --pos;
486 if (pos < digits->end()) {
487 bn_exp10 += digits->end() - pos;
488 digits->erase(pos, digits->end());
489 }
490 S2_DCHECK_LE(digits->size(), max_digits);
491
492 // Finally, we adjust the base-10 exponent so that the mantissa is a
493 // fraction in the range [0.1, 1) rather than an integer.
494 return bn_exp10 + digits->size();
495 }
496
ToUniqueString() const497 string ExactFloat::ToUniqueString() const {
498 char prec_buf[20];
499 sprintf(prec_buf, "<%d>", prec());
500 return ToString() + prec_buf;
501 }
502
operator =(const ExactFloat & b)503 ExactFloat& ExactFloat::operator=(const ExactFloat& b) {
504 if (this != &b) {
505 sign_ = b.sign_;
506 bn_exp_ = b.bn_exp_;
507 BN_copy(bn_.get(), b.bn_.get());
508 }
509 return *this;
510 }
511
operator -() const512 ExactFloat ExactFloat::operator-() const {
513 return CopyWithSign(-sign_);
514 }
515
operator +(const ExactFloat & a,const ExactFloat & b)516 ExactFloat operator+(const ExactFloat& a, const ExactFloat& b) {
517 return ExactFloat::SignedSum(a.sign_, &a, b.sign_, &b);
518 }
519
operator -(const ExactFloat & a,const ExactFloat & b)520 ExactFloat operator-(const ExactFloat& a, const ExactFloat& b) {
521 return ExactFloat::SignedSum(a.sign_, &a, -b.sign_, &b);
522 }
523
SignedSum(int a_sign,const ExactFloat * a,int b_sign,const ExactFloat * b)524 ExactFloat ExactFloat::SignedSum(int a_sign, const ExactFloat* a,
525 int b_sign, const ExactFloat* b) {
526 if (!a->is_normal() || !b->is_normal()) {
527 // Handle zero, infinity, and NaN according to IEEE 754-2008.
528 if (a->is_nan()) return *a;
529 if (b->is_nan()) return *b;
530 if (a->is_inf()) {
531 // Adding two infinities with opposite sign yields NaN.
532 if (b->is_inf() && a_sign != b_sign) return NaN();
533 return Infinity(a_sign);
534 }
535 if (b->is_inf()) return Infinity(b_sign);
536 if (a->is_zero()) {
537 if (!b->is_zero()) return b->CopyWithSign(b_sign);
538 // Adding two zeros with the same sign preserves the sign.
539 if (a_sign == b_sign) return SignedZero(a_sign);
540 // Adding two zeros of opposite sign produces +0.
541 return SignedZero(+1);
542 }
543 S2_DCHECK(b->is_zero());
544 return a->CopyWithSign(a_sign);
545 }
546 // Swap the numbers if necessary so that "a" has the larger bn_exp_.
547 if (a->bn_exp_ < b->bn_exp_) {
548 using std::swap;
549 swap(a_sign, b_sign);
550 swap(a, b);
551 }
552 // Shift "a" if necessary so that both values have the same bn_exp_.
553 ExactFloat r;
554 if (a->bn_exp_ > b->bn_exp_) {
555 S2_CHECK(BN_lshift(r.bn_.get(), a->bn_.get(), a->bn_exp_ - b->bn_exp_));
556 a = &r; // The only field of "a" used below is bn_.
557 }
558 r.bn_exp_ = b->bn_exp_;
559 if (a_sign == b_sign) {
560 S2_CHECK(BN_add(r.bn_.get(), a->bn_.get(), b->bn_.get()));
561 r.sign_ = a_sign;
562 } else {
563 // Note that the BIGNUM documentation is out of date -- all methods now
564 // allow the result to be the same as any input argument, so it is okay if
565 // (a == &r) due to the shift above.
566 S2_CHECK(BN_sub(r.bn_.get(), a->bn_.get(), b->bn_.get()));
567 if (BN_is_zero(r.bn_.get())) {
568 r.sign_ = +1;
569 } else if (BN_is_negative(r.bn_.get())) {
570 // The magnitude of "b" was larger.
571 r.sign_ = b_sign;
572 BN_set_negative(r.bn_.get(), false);
573 } else {
574 // They were equal, or the magnitude of "a" was larger.
575 r.sign_ = a_sign;
576 }
577 }
578 r.Canonicalize();
579 return r;
580 }
581
Canonicalize()582 void ExactFloat::Canonicalize() {
583 if (!is_normal()) return;
584
585 // Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp].
586 // We also convert a zero mantissa to signed zero.
587 int my_exp = exp();
588 if (my_exp < kMinExp || BN_is_zero(bn_.get())) {
589 set_zero(sign_);
590 } else if (my_exp > kMaxExp) {
591 set_inf(sign_);
592 } else if (!BN_is_odd(bn_.get())) {
593 // Remove any low-order zero bits from the mantissa.
594 S2_DCHECK(!BN_is_zero(bn_.get()));
595 int shift = BN_ext_count_low_zero_bits(bn_.get());
596 if (shift > 0) {
597 S2_CHECK(BN_rshift(bn_.get(), bn_.get(), shift));
598 bn_exp_ += shift;
599 }
600 }
601 // If the mantissa has too many bits, we replace it by NaN to indicate
602 // that an inexact calculation has occurred.
603 if (prec() > kMaxPrec) {
604 set_nan();
605 }
606 }
607
operator *(const ExactFloat & a,const ExactFloat & b)608 ExactFloat operator*(const ExactFloat& a, const ExactFloat& b) {
609 int result_sign = a.sign_ * b.sign_;
610 if (!a.is_normal() || !b.is_normal()) {
611 // Handle zero, infinity, and NaN according to IEEE 754-2008.
612 if (a.is_nan()) return a;
613 if (b.is_nan()) return b;
614 if (a.is_inf()) {
615 // Infinity times zero yields NaN.
616 if (b.is_zero()) return ExactFloat::NaN();
617 return ExactFloat::Infinity(result_sign);
618 }
619 if (b.is_inf()) {
620 if (a.is_zero()) return ExactFloat::NaN();
621 return ExactFloat::Infinity(result_sign);
622 }
623 S2_DCHECK(a.is_zero() || b.is_zero());
624 return ExactFloat::SignedZero(result_sign);
625 }
626 ExactFloat r;
627 r.sign_ = result_sign;
628 r.bn_exp_ = a.bn_exp_ + b.bn_exp_;
629 BN_CTX* ctx = BN_CTX_new();
630 S2_CHECK(BN_mul(r.bn_.get(), a.bn_.get(), b.bn_.get(), ctx));
631 BN_CTX_free(ctx);
632 r.Canonicalize();
633 return r;
634 }
635
operator ==(const ExactFloat & a,const ExactFloat & b)636 bool operator==(const ExactFloat& a, const ExactFloat& b) {
637 // NaN is not equal to anything, not even itself.
638 if (a.is_nan() || b.is_nan()) return false;
639
640 // Since Canonicalize() strips low-order zero bits, all other cases
641 // (including non-normal values) require bn_exp_ to be equal.
642 if (a.bn_exp_ != b.bn_exp_) return false;
643
644 // Positive and negative zero are equal.
645 if (a.is_zero() && b.is_zero()) return true;
646
647 // Otherwise, the signs and mantissas must match. Note that non-normal
648 // values such as infinity have a mantissa of zero.
649 return a.sign_ == b.sign_ && BN_ucmp(a.bn_.get(), b.bn_.get()) == 0;
650 }
651
ScaleAndCompare(const ExactFloat & b) const652 int ExactFloat::ScaleAndCompare(const ExactFloat& b) const {
653 S2_DCHECK(is_normal() && b.is_normal() && bn_exp_ >= b.bn_exp_);
654 ExactFloat tmp = *this;
655 S2_CHECK(BN_lshift(tmp.bn_.get(), tmp.bn_.get(), bn_exp_ - b.bn_exp_));
656 return BN_ucmp(tmp.bn_.get(), b.bn_.get());
657 }
658
UnsignedLess(const ExactFloat & b) const659 bool ExactFloat::UnsignedLess(const ExactFloat& b) const {
660 // Handle the zero/infinity cases (NaN has already been done).
661 if (is_inf() || b.is_zero()) return false;
662 if (is_zero() || b.is_inf()) return true;
663 // If the high-order bit positions differ, we are done.
664 int cmp = exp() - b.exp();
665 if (cmp != 0) return cmp < 0;
666 // Otherwise shift one of the two values so that they both have the same
667 // bn_exp_ and then compare the mantissas.
668 return (bn_exp_ >= b.bn_exp_ ?
669 ScaleAndCompare(b) < 0 : b.ScaleAndCompare(*this) > 0);
670 }
671
operator <(const ExactFloat & a,const ExactFloat & b)672 bool operator<(const ExactFloat& a, const ExactFloat& b) {
673 // NaN is unordered compared to everything, including itself.
674 if (a.is_nan() || b.is_nan()) return false;
675 // Positive and negative zero are equal.
676 if (a.is_zero() && b.is_zero()) return false;
677 // Otherwise, anything negative is less than anything positive.
678 if (a.sign_ != b.sign_) return a.sign_ < b.sign_;
679 // Now we just compare absolute values.
680 return (a.sign_ > 0) ? a.UnsignedLess(b) : b.UnsignedLess(a);
681 }
682
fabs(const ExactFloat & a)683 ExactFloat fabs(const ExactFloat& a) {
684 return abs(a);
685 }
686
abs(const ExactFloat & a)687 ExactFloat abs(const ExactFloat& a) {
688 return a.CopyWithSign(+1);
689 }
690
fmax(const ExactFloat & a,const ExactFloat & b)691 ExactFloat fmax(const ExactFloat& a, const ExactFloat& b) {
692 // If one argument is NaN, return the other argument.
693 if (a.is_nan()) return b;
694 if (b.is_nan()) return a;
695 // Not required by IEEE 754, but we prefer +0 over -0.
696 if (a.sign_ != b.sign_) {
697 return (a.sign_ < b.sign_) ? b : a;
698 }
699 return (a < b) ? b : a;
700 }
701
fmin(const ExactFloat & a,const ExactFloat & b)702 ExactFloat fmin(const ExactFloat& a, const ExactFloat& b) {
703 // If one argument is NaN, return the other argument.
704 if (a.is_nan()) return b;
705 if (b.is_nan()) return a;
706 // Not required by IEEE 754, but we prefer -0 over +0.
707 if (a.sign_ != b.sign_) {
708 return (a.sign_ < b.sign_) ? a : b;
709 }
710 return (a < b) ? a : b;
711 }
712
fdim(const ExactFloat & a,const ExactFloat & b)713 ExactFloat fdim(const ExactFloat& a, const ExactFloat& b) {
714 // This formulation has the correct behavior for NaNs.
715 return (a <= b) ? 0 : (a - b);
716 }
717
ceil(const ExactFloat & a)718 ExactFloat ceil(const ExactFloat& a) {
719 return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardPositive);
720 }
721
floor(const ExactFloat & a)722 ExactFloat floor(const ExactFloat& a) {
723 return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardNegative);
724 }
725
trunc(const ExactFloat & a)726 ExactFloat trunc(const ExactFloat& a) {
727 return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardZero);
728 }
729
round(const ExactFloat & a)730 ExactFloat round(const ExactFloat& a) {
731 return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesAwayFromZero);
732 }
733
rint(const ExactFloat & a)734 ExactFloat rint(const ExactFloat& a) {
735 return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesToEven);
736 }
737
738 template <class T>
ToInteger(RoundingMode mode) const739 T ExactFloat::ToInteger(RoundingMode mode) const {
740 using std::numeric_limits;
741 static_assert(sizeof(T) <= sizeof(uint64), "max 64 bits supported");
742 static_assert(numeric_limits<T>::is_signed, "only signed types supported");
743 const int64 kMinValue = numeric_limits<T>::min();
744 const int64 kMaxValue = numeric_limits<T>::max();
745
746 ExactFloat r = RoundToPowerOf2(0, mode);
747 if (r.is_nan()) return kMaxValue;
748 if (r.is_zero()) return 0;
749 if (!r.is_inf()) {
750 // If the unsigned value has more than 63 bits it is always clamped.
751 if (r.exp() < 64) {
752 int64 value = BN_ext_get_uint64(r.bn_.get()) << r.bn_exp_;
753 if (r.sign_ < 0) value = -value;
754 return max(kMinValue, min(kMaxValue, value));
755 }
756 }
757 return (r.sign_ < 0) ? kMinValue : kMaxValue;
758 }
759
lrint(const ExactFloat & a)760 long lrint(const ExactFloat& a) {
761 return a.ToInteger<long>(ExactFloat::kRoundTiesToEven);
762 }
763
llrint(const ExactFloat & a)764 long long llrint(const ExactFloat& a) {
765 return a.ToInteger<long long>(ExactFloat::kRoundTiesToEven);
766 }
767
lround(const ExactFloat & a)768 long lround(const ExactFloat& a) {
769 return a.ToInteger<long>(ExactFloat::kRoundTiesAwayFromZero);
770 }
771
llround(const ExactFloat & a)772 long long llround(const ExactFloat& a) {
773 return a.ToInteger<long long>(ExactFloat::kRoundTiesAwayFromZero);
774 }
775
copysign(const ExactFloat & a,const ExactFloat & b)776 ExactFloat copysign(const ExactFloat& a, const ExactFloat& b) {
777 return a.CopyWithSign(b.sign_);
778 }
779
frexp(const ExactFloat & a,int * exp)780 ExactFloat frexp(const ExactFloat& a, int* exp) {
781 if (!a.is_normal()) {
782 // If a == 0, exp should be zero. If a.is_inf() or a.is_nan(), exp is not
783 // defined but the glibc implementation returns zero.
784 *exp = 0;
785 return a;
786 }
787 *exp = a.exp();
788 return ldexp(a, -a.exp());
789 }
790
ldexp(const ExactFloat & a,int exp)791 ExactFloat ldexp(const ExactFloat& a, int exp) {
792 if (!a.is_normal()) return a;
793
794 // To prevent integer overflow, we first clamp "exp" so that
795 // (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1).
796 int a_exp = a.exp();
797 exp = min(ExactFloat::kMaxExp + 1 - a_exp,
798 max(ExactFloat::kMinExp - 1 + a_exp, exp));
799
800 // Now modify the exponent and check for overflow/underflow.
801 ExactFloat r = a;
802 r.bn_exp_ += exp;
803 r.Canonicalize();
804 return r;
805 }
806
scalbln(const ExactFloat & a,long exp)807 ExactFloat scalbln(const ExactFloat& a, long exp) {
808 // Clamp the exponent to the range of "int" in order to avoid truncation.
809 exp = max(static_cast<long>(INT_MIN), min(static_cast<long>(INT_MAX), exp));
810 return ldexp(a, exp);
811 }
812
ilogb(const ExactFloat & a)813 int ilogb(const ExactFloat& a) {
814 if (a.is_zero()) return FP_ILOGB0;
815 if (a.is_inf()) return INT_MAX;
816 if (a.is_nan()) return FP_ILOGBNAN;
817 // a.exp() assumes the significand is in the range [0.5, 1).
818 return a.exp() - 1;
819 }
820
logb(const ExactFloat & a)821 ExactFloat logb(const ExactFloat& a) {
822 if (a.is_zero()) return ExactFloat::Infinity(-1);
823 if (a.is_inf()) return ExactFloat::Infinity(+1); // Even if a < 0.
824 if (a.is_nan()) return a;
825 // exp() assumes the significand is in the range [0.5,1).
826 return ExactFloat(a.exp() - 1);
827 }
828
Unimplemented()829 ExactFloat ExactFloat::Unimplemented() {
830 S2_LOG(FATAL) << "Unimplemented ExactFloat method called";
831 return NaN();
832 }
833