1 // Copyright 2009 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // Author: ericv@google.com (Eric Veach)
17 
18 #include "s2/util/math/exactfloat/exactfloat.h"
19 
20 #include <cstdio>
21 #include <cstdlib>
22 #include <cstring>
23 #include <algorithm>
24 #include <cmath>
25 #include <limits>
26 
27 #include <openssl/bn.h>
28 #include <openssl/crypto.h>  // for OPENSSL_free
29 
30 #include "s2/base/integral_types.h"
31 #include "s2/base/logging.h"
32 #include "s2/third_party/absl/base/macros.h"
33 #include "s2/third_party/absl/container/fixed_array.h"
34 
35 using std::max;
36 using std::min;
37 
38 // Define storage for constants.
39 const int ExactFloat::kMinExp;
40 const int ExactFloat::kMaxExp;
41 const int ExactFloat::kMaxPrec;
42 const int32 ExactFloat::kExpNaN;
43 const int32 ExactFloat::kExpInfinity;
44 const int32 ExactFloat::kExpZero;
45 const int ExactFloat::kDoubleMantissaBits;
46 
47 // To simplify the overflow/underflow logic, we limit the exponent and
48 // precision range so that (2 * bn_exp_) does not overflow an "int".  We take
49 // advantage of this, for example, by only checking for overflow/underflow
50 // *after* multiplying two numbers.
51 static_assert(
52     ExactFloat::kMaxExp <= INT_MAX / 2 &&
53     ExactFloat::kMinExp - ExactFloat::kMaxPrec >= INT_MIN / 2,
54     "exactfloat exponent might overflow");
55 
56 // We define a few simple extensions to the OpenSSL's BIGNUM interface.
57 // In some cases these depend on BIGNUM internal fields, so they might
58 // require tweaking if the BIGNUM implementation changes significantly.
59 // These are just thin wrappers for BoringSSL.
60 
61 #ifdef OPENSSL_IS_BORINGSSL
62 
BN_ext_set_uint64(BIGNUM * bn,uint64 v)63 inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) {
64   S2_CHECK(BN_set_u64(bn, v));
65 }
66 
67 // Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
68 // Requires that BIGNUM fits into 64 bits.
BN_ext_get_uint64(const BIGNUM * bn)69 inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
70   uint64_t u64;
71   if (!BN_get_u64(bn, &u64)) {
72     S2_DCHECK(false) << "BN has " << BN_num_bits(bn) << " bits";
73     return 0;
74   }
75   return u64;
76 }
77 
BN_ext_count_low_zero_bits(const BIGNUM * bn)78 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
79   return BN_count_low_zero_bits(bn);
80 }
81 
82 #else  // !defined(OPENSSL_IS_BORINGSSL)
83 
84 // Set a BIGNUM to the given unsigned 64-bit value.
BN_ext_set_uint64(BIGNUM * bn,uint64 v)85 inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) {
86 #if BN_BITS2 == 64
87   S2_CHECK(BN_set_word(bn, v));
88 #else
89   static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
90   S2_CHECK(BN_set_word(bn, static_cast<uint32>(v >> 32)));
91   S2_CHECK(BN_lshift(bn, bn, 32));
92   S2_CHECK(BN_add_word(bn, static_cast<uint32>(v)));
93 #endif
94 }
95 
96 // Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
97 // Requires that BIGNUM fits into 64 bits.
BN_ext_get_uint64(const BIGNUM * bn)98 inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) {
99   S2_DCHECK_LE(BN_num_bytes(bn), sizeof(uint64));
100 #if BN_BITS2 == 64
101   return BN_get_word(bn);
102 #else
103   static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
104   if (bn->top == 0) return 0;
105   if (bn->top == 1) return BN_get_word(bn);
106   S2_DCHECK_EQ(bn->top, 2);
107   return (static_cast<uint64>(bn->d[1]) << 32) + bn->d[0];
108 #endif
109 }
110 
111 #if (OPENSSL_VERSION_NUMBER < 0x10100000L) || defined(LIBRESSL_VERSION_NUMBER)
112 
113 // Count the number of low-order zero bits in the given BIGNUM (ignoring its
114 // sign).  Returns 0 if the argument is zero.
BN_ext_count_low_zero_bits(const BIGNUM * bn)115 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
116   int count = 0;
117   for (int i = 0; i < bn->top; ++i) {
118     BN_ULONG w = bn->d[i];
119     if (w == 0) {
120       count += 8 * sizeof(BN_ULONG);
121     } else {
122       for (; (w & 1) == 0; w >>= 1) {
123         ++count;
124       }
125       break;
126     }
127   }
128   return count;
129 }
130 
131 #else  // OPENSSL_VERSION_NUMBER >= 0x10100000L
132 
BN_ext_count_low_zero_bits(const BIGNUM * bn)133 static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
134   // In OpenSSL >= 1.1, BIGNUM is an opaque type, so d and top
135   // cannot be accessed.  The bytes must be copied out at a ~25%
136   // performance penalty.
137   absl::FixedArray<unsigned char> bytes(BN_num_bytes(bn));
138   // "le" indicates little endian.
139   S2_CHECK_EQ(BN_bn2lebinpad(bn, bytes.data(), bytes.size()), bytes.size());
140 
141   int count = 0;
142   for (unsigned char c : bytes) {
143     if (c == 0) {
144       count += 8;
145     } else {
146       for (; (c & 1) == 0; c >>= 1) {
147         ++count;
148       }
149       break;
150     }
151   }
152   return count;
153 }
154 
155 #endif  // OPENSSL_VERSION_NUMBER >= 0x10100000L
156 
157 #endif  // !defined(OPENSSL_IS_BORINGSSL)
158 
ExactFloat(double v)159 ExactFloat::ExactFloat(double v) {
160   sign_ = std::signbit(v) ? -1 : 1;
161   if (std::isnan(v)) {
162     set_nan();
163   } else if (std::isinf(v)) {
164     set_inf(sign_);
165   } else {
166     // The following code is much simpler than messing about with bit masks,
167     // has the advantage of handling denormalized numbers and zero correctly,
168     // and is actually quite efficient (at least compared to the rest of this
169     // code).  "f" is a fraction in the range [0.5, 1), so if we shift it left
170     // by the number of mantissa bits in a double (53, including the leading
171     // "1") then the result is always an integer.
172     int exp;
173     double f = frexp(fabs(v), &exp);
174     uint64 m = static_cast<uint64>(ldexp(f, kDoubleMantissaBits));
175     BN_ext_set_uint64(bn_.get(), m);
176     bn_exp_ = exp - kDoubleMantissaBits;
177     Canonicalize();
178   }
179 }
180 
ExactFloat(int v)181 ExactFloat::ExactFloat(int v) {
182   sign_ = (v >= 0) ? 1 : -1;
183   // Note that this works even for INT_MIN because the parameter type for
184   // BN_set_word() is unsigned.
185   S2_CHECK(BN_set_word(bn_.get(), abs(v)));
186   bn_exp_ = 0;
187   Canonicalize();
188 }
189 
ExactFloat(const ExactFloat & b)190 ExactFloat::ExactFloat(const ExactFloat& b)
191     : sign_(b.sign_),
192       bn_exp_(b.bn_exp_) {
193   BN_copy(bn_.get(), b.bn_.get());
194 }
195 
SignedZero(int sign)196 ExactFloat ExactFloat::SignedZero(int sign) {
197   ExactFloat r;
198   r.set_zero(sign);
199   return r;
200 }
201 
Infinity(int sign)202 ExactFloat ExactFloat::Infinity(int sign) {
203   ExactFloat r;
204   r.set_inf(sign);
205   return r;
206 }
207 
NaN()208 ExactFloat ExactFloat::NaN() {
209   ExactFloat r;
210   r.set_nan();
211   return r;
212 }
213 
prec() const214 int ExactFloat::prec() const {
215   return BN_num_bits(bn_.get());
216 }
217 
exp() const218 int ExactFloat::exp() const {
219   S2_DCHECK(is_normal());
220   return bn_exp_ + BN_num_bits(bn_.get());
221 }
222 
set_zero(int sign)223 void ExactFloat::set_zero(int sign) {
224   sign_ = sign;
225   bn_exp_ = kExpZero;
226   if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
227 }
228 
set_inf(int sign)229 void ExactFloat::set_inf(int sign) {
230   sign_ = sign;
231   bn_exp_ = kExpInfinity;
232   if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
233 }
234 
set_nan()235 void ExactFloat::set_nan() {
236   sign_ = 1;
237   bn_exp_ = kExpNaN;
238   if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
239 }
240 
ToDouble() const241 double ExactFloat::ToDouble() const {
242   // If the mantissa has too many bits, we need to round it.
243   if (prec() <= kDoubleMantissaBits) {
244     return ToDoubleHelper();
245   } else {
246     ExactFloat r = RoundToMaxPrec(kDoubleMantissaBits, kRoundTiesToEven);
247     return r.ToDoubleHelper();
248   }
249 }
250 
ToDoubleHelper() const251 double ExactFloat::ToDoubleHelper() const {
252   S2_DCHECK_LE(BN_num_bits(bn_.get()), kDoubleMantissaBits);
253   if (!is_normal()) {
254     if (is_zero()) return copysign(0, sign_);
255     if (is_inf()) {
256       return std::copysign(std::numeric_limits<double>::infinity(), sign_);
257     }
258     return std::copysign(std::numeric_limits<double>::quiet_NaN(), sign_);
259   }
260   uint64 d_mantissa = BN_ext_get_uint64(bn_.get());
261   // We rely on ldexp() to handle overflow and underflow.  (It will return a
262   // signed zero or infinity if the result is too small or too large.)
263   return sign_ * ldexp(static_cast<double>(d_mantissa), bn_exp_);
264 }
265 
RoundToMaxPrec(int max_prec,RoundingMode mode) const266 ExactFloat ExactFloat::RoundToMaxPrec(int max_prec, RoundingMode mode) const {
267   // The "kRoundTiesToEven" mode requires at least 2 bits of precision
268   // (otherwise both adjacent representable values may be odd).
269   S2_DCHECK_GE(max_prec, 2);
270   S2_DCHECK_LE(max_prec, kMaxPrec);
271 
272   // The following test also catches zero, infinity, and NaN.
273   int shift = prec() - max_prec;
274   if (shift <= 0) return *this;
275 
276   // Round by removing the appropriate number of bits from the mantissa.  Note
277   // that if the value is rounded up to a power of 2, the high-order bit
278   // position may increase, but in that case Canonicalize() will remove at
279   // least one zero bit and so the output will still have prec() <= max_prec.
280   return RoundToPowerOf2(bn_exp_ + shift, mode);
281 }
282 
RoundToPowerOf2(int bit_exp,RoundingMode mode) const283 ExactFloat ExactFloat::RoundToPowerOf2(int bit_exp, RoundingMode mode) const {
284   S2_DCHECK_GE(bit_exp, kMinExp - kMaxPrec);
285   S2_DCHECK_LE(bit_exp, kMaxExp);
286 
287   // If the exponent is already large enough, or the value is zero, infinity,
288   // or NaN, then there is nothing to do.
289   int shift = bit_exp - bn_exp_;
290   if (shift <= 0) return *this;
291   S2_DCHECK(is_normal());
292 
293   // Convert rounding up/down to toward/away from zero, so that we don't need
294   // to consider the sign of the number from this point onward.
295   if (mode == kRoundTowardPositive) {
296     mode = (sign_ > 0) ? kRoundAwayFromZero : kRoundTowardZero;
297   } else if (mode == kRoundTowardNegative) {
298     mode = (sign_ > 0) ? kRoundTowardZero : kRoundAwayFromZero;
299   }
300 
301   // Rounding consists of right-shifting the mantissa by "shift", and then
302   // possibly incrementing the result (depending on the rounding mode, the
303   // bits that were discarded, and sometimes the lowest kept bit).  The
304   // following code figures out whether we need to increment.
305   ExactFloat r;
306   bool increment = false;
307   if (mode == kRoundTowardZero) {
308     // Never increment.
309   } else if (mode == kRoundTiesAwayFromZero) {
310     // Increment if the highest discarded bit is 1.
311     if (BN_is_bit_set(bn_.get(), shift - 1))
312       increment = true;
313   } else if (mode == kRoundAwayFromZero) {
314     // Increment unless all discarded bits are zero.
315     if (BN_ext_count_low_zero_bits(bn_.get()) < shift)
316       increment = true;
317   } else {
318     S2_DCHECK_EQ(mode, kRoundTiesToEven);
319     // Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and
320     // "xyz" are the discarded bits.  Then using regexp notation:
321     //    ./0.*       ->    Don't increment (fraction < 1/2)
322     //    0/10*       ->    Don't increment (fraction = 1/2, kept part even)
323     //    1/10*       ->    Increment (fraction = 1/2, kept part odd)
324     //    ./1.*1.*    ->    Increment (fraction > 1/2)
325     if (BN_is_bit_set(bn_.get(), shift - 1) &&
326         ((BN_is_bit_set(bn_.get(), shift) ||
327           BN_ext_count_low_zero_bits(bn_.get()) < shift - 1))) {
328       increment = true;
329     }
330   }
331   r.bn_exp_ = bn_exp_ + shift;
332   S2_CHECK(BN_rshift(r.bn_.get(), bn_.get(), shift));
333   if (increment) {
334     S2_CHECK(BN_add_word(r.bn_.get(), 1));
335   }
336   r.sign_ = sign_;
337   r.Canonicalize();
338   return r;
339 }
340 
NumSignificantDigitsForPrec(int prec)341 int ExactFloat::NumSignificantDigitsForPrec(int prec) {
342   // The simplest bound is
343   //
344   //    d <= 1 + ceil(prec * log10(2))
345   //
346   // The following bound is tighter by 0.5 digits on average, but requires
347   // the exponent to be known as well:
348   //
349   //    d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2))
350   //
351   // Since either of these bounds can be too large by 0, 1, or 2 digits, we
352   // stick with the simpler first bound.
353   return static_cast<int>(1 + ceil(prec * (M_LN2 / M_LN10)));
354 }
355 
356 // Numbers are always formatted with at least this many significant digits.
357 // This prevents small integers from being formatted in exponential notation
358 // (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having
359 // supposedly "high precision" numbers formatted with just 1 or 2 digits
360 // (e.g. 1/512 == 0.001953125 formatted as 0.002).
361 static const int kMinSignificantDigits = 10;
362 
ToString() const363 string ExactFloat::ToString() const {
364   int max_digits = max(kMinSignificantDigits,
365                        NumSignificantDigitsForPrec(prec()));
366   return ToStringWithMaxDigits(max_digits);
367 }
368 
ToStringWithMaxDigits(int max_digits) const369 string ExactFloat::ToStringWithMaxDigits(int max_digits) const {
370   S2_DCHECK_GT(max_digits, 0);
371   if (!is_normal()) {
372     if (is_nan()) return "nan";
373     if (is_zero()) return (sign_ < 0) ? "-0" : "0";
374     return (sign_ < 0) ? "-inf" : "inf";
375   }
376   string digits;
377   int exp10 = GetDecimalDigits(max_digits, &digits);
378   string str;
379   if (sign_ < 0) str.push_back('-');
380 
381   // We use the standard '%g' formatting rules.  If the exponent is less than
382   // -4 or greater than or equal to the requested precision (i.e., max_digits)
383   // then we use exponential notation.
384   //
385   // But since "exp10" is the base-10 exponent corresponding to a mantissa in
386   // the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range
387   // [1.0, 10), we need to adjust these parameters by 1.
388   if (exp10 <= -4 || exp10 > max_digits) {
389     // Use exponential format.
390     str.push_back(digits[0]);
391     if (digits.size() > 1) {
392       str.push_back('.');
393       str.append(digits.begin() + 1, digits.end());
394     }
395     char exp_buf[20];
396     sprintf(exp_buf, "e%+02d", exp10 - 1);
397     str += exp_buf;
398   } else {
399     // Use fixed format.  We split this into two cases depending on whether
400     // the integer portion is non-zero or not.
401     if (exp10 > 0) {
402       if (exp10 >= digits.size()) {
403         str += digits;
404         for (int i = exp10 - digits.size(); i > 0; --i) {
405           str.push_back('0');
406         }
407       } else {
408         str.append(digits.begin(), digits.begin() + exp10);
409         str.push_back('.');
410         str.append(digits.begin() + exp10, digits.end());
411       }
412     } else {
413       str += "0.";
414       for (int i = exp10; i < 0; ++i) {
415         str.push_back('0');
416       }
417       str += digits;
418     }
419   }
420   return str;
421 }
422 
423 // Increment an unsigned integer represented as a string of ASCII digits.
IncrementDecimalDigits(string * digits)424 static void IncrementDecimalDigits(string* digits) {
425   string::iterator pos = digits->end();
426   while (--pos >= digits->begin()) {
427     if (*pos < '9') { ++*pos; return; }
428     *pos = '0';
429   }
430   digits->insert(0, "1");
431 }
432 
GetDecimalDigits(int max_digits,string * digits) const433 int ExactFloat::GetDecimalDigits(int max_digits, string* digits) const {
434   S2_DCHECK(is_normal());
435   // Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a
436   // positive integer (BIGNUM).
437   BIGNUM* bn = BN_new();
438   int bn_exp10;
439   if (bn_exp_ >= 0) {
440     // The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0.
441     S2_CHECK(BN_lshift(bn, bn_.get(), bn_exp_));
442     bn_exp10 = 0;
443   } else {
444     // Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_.  This is
445     // equivalent to the original value of (bn_ * (2 ** bn_exp_)).
446     BIGNUM* power = BN_new();
447     S2_CHECK(BN_set_word(power, -bn_exp_));
448     S2_CHECK(BN_set_word(bn, 5));
449     BN_CTX* ctx = BN_CTX_new();
450     S2_CHECK(BN_exp(bn, bn, power, ctx));
451     S2_CHECK(BN_mul(bn, bn, bn_.get(), ctx));
452     BN_CTX_free(ctx);
453     BN_free(power);
454     bn_exp10 = bn_exp_;
455   }
456   // Now convert "bn" to a decimal string.
457   char* all_digits = BN_bn2dec(bn);
458   S2_DCHECK(all_digits != nullptr);
459   BN_free(bn);
460   // Check whether we have too many digits and round if necessary.
461   int num_digits = strlen(all_digits);
462   if (num_digits <= max_digits) {
463     *digits = all_digits;
464   } else {
465     digits->assign(all_digits, max_digits);
466     // Standard "printf" formatting rounds ties to an even number.  This means
467     // that we round up (away from zero) if highest discarded digit is '5' or
468     // more, unless all other discarded digits are zero in which case we round
469     // up only if the lowest kept digit is odd.
470     if (all_digits[max_digits] >= '5' &&
471         ((all_digits[max_digits-1] & 1) == 1 ||
472          strpbrk(all_digits + max_digits + 1, "123456789") != nullptr)) {
473       // This can increase the number of digits by 1, but in that case at
474       // least one trailing zero will be stripped off below.
475       IncrementDecimalDigits(digits);
476     }
477     // Adjust the base-10 exponent to reflect the digits we have removed.
478     bn_exp10 += num_digits - max_digits;
479   }
480   OPENSSL_free(all_digits);
481 
482   // Now strip any trailing zeros.
483   S2_DCHECK_NE((*digits)[0], '0');
484   string::iterator pos = digits->end();
485   while (pos[-1] == '0') --pos;
486   if (pos < digits->end()) {
487     bn_exp10 += digits->end() - pos;
488     digits->erase(pos, digits->end());
489   }
490   S2_DCHECK_LE(digits->size(), max_digits);
491 
492   // Finally, we adjust the base-10 exponent so that the mantissa is a
493   // fraction in the range [0.1, 1) rather than an integer.
494   return bn_exp10 + digits->size();
495 }
496 
ToUniqueString() const497 string ExactFloat::ToUniqueString() const {
498   char prec_buf[20];
499   sprintf(prec_buf, "<%d>", prec());
500   return ToString() + prec_buf;
501 }
502 
operator =(const ExactFloat & b)503 ExactFloat& ExactFloat::operator=(const ExactFloat& b) {
504   if (this != &b) {
505     sign_ = b.sign_;
506     bn_exp_ = b.bn_exp_;
507     BN_copy(bn_.get(), b.bn_.get());
508   }
509   return *this;
510 }
511 
operator -() const512 ExactFloat ExactFloat::operator-() const {
513   return CopyWithSign(-sign_);
514 }
515 
operator +(const ExactFloat & a,const ExactFloat & b)516 ExactFloat operator+(const ExactFloat& a, const ExactFloat& b) {
517   return ExactFloat::SignedSum(a.sign_, &a, b.sign_, &b);
518 }
519 
operator -(const ExactFloat & a,const ExactFloat & b)520 ExactFloat operator-(const ExactFloat& a, const ExactFloat& b) {
521   return ExactFloat::SignedSum(a.sign_, &a, -b.sign_, &b);
522 }
523 
SignedSum(int a_sign,const ExactFloat * a,int b_sign,const ExactFloat * b)524 ExactFloat ExactFloat::SignedSum(int a_sign, const ExactFloat* a,
525                                  int b_sign, const ExactFloat* b) {
526   if (!a->is_normal() || !b->is_normal()) {
527     // Handle zero, infinity, and NaN according to IEEE 754-2008.
528     if (a->is_nan()) return *a;
529     if (b->is_nan()) return *b;
530     if (a->is_inf()) {
531       // Adding two infinities with opposite sign yields NaN.
532       if (b->is_inf() && a_sign != b_sign) return NaN();
533       return Infinity(a_sign);
534     }
535     if (b->is_inf()) return Infinity(b_sign);
536     if (a->is_zero()) {
537       if (!b->is_zero()) return b->CopyWithSign(b_sign);
538       // Adding two zeros with the same sign preserves the sign.
539       if (a_sign == b_sign) return SignedZero(a_sign);
540       // Adding two zeros of opposite sign produces +0.
541       return SignedZero(+1);
542     }
543     S2_DCHECK(b->is_zero());
544     return a->CopyWithSign(a_sign);
545   }
546   // Swap the numbers if necessary so that "a" has the larger bn_exp_.
547   if (a->bn_exp_ < b->bn_exp_) {
548     using std::swap;
549     swap(a_sign, b_sign);
550     swap(a, b);
551   }
552   // Shift "a" if necessary so that both values have the same bn_exp_.
553   ExactFloat r;
554   if (a->bn_exp_ > b->bn_exp_) {
555     S2_CHECK(BN_lshift(r.bn_.get(), a->bn_.get(), a->bn_exp_ - b->bn_exp_));
556     a = &r;  // The only field of "a" used below is bn_.
557   }
558   r.bn_exp_ = b->bn_exp_;
559   if (a_sign == b_sign) {
560     S2_CHECK(BN_add(r.bn_.get(), a->bn_.get(), b->bn_.get()));
561     r.sign_ = a_sign;
562   } else {
563     // Note that the BIGNUM documentation is out of date -- all methods now
564     // allow the result to be the same as any input argument, so it is okay if
565     // (a == &r) due to the shift above.
566     S2_CHECK(BN_sub(r.bn_.get(), a->bn_.get(), b->bn_.get()));
567     if (BN_is_zero(r.bn_.get())) {
568       r.sign_ = +1;
569     } else if (BN_is_negative(r.bn_.get())) {
570       // The magnitude of "b" was larger.
571       r.sign_ = b_sign;
572       BN_set_negative(r.bn_.get(), false);
573     } else {
574       // They were equal, or the magnitude of "a" was larger.
575       r.sign_ = a_sign;
576     }
577   }
578   r.Canonicalize();
579   return r;
580 }
581 
Canonicalize()582 void ExactFloat::Canonicalize() {
583   if (!is_normal()) return;
584 
585   // Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp].
586   // We also convert a zero mantissa to signed zero.
587   int my_exp = exp();
588   if (my_exp < kMinExp || BN_is_zero(bn_.get())) {
589     set_zero(sign_);
590   } else if (my_exp > kMaxExp) {
591     set_inf(sign_);
592   } else if (!BN_is_odd(bn_.get())) {
593     // Remove any low-order zero bits from the mantissa.
594     S2_DCHECK(!BN_is_zero(bn_.get()));
595     int shift = BN_ext_count_low_zero_bits(bn_.get());
596     if (shift > 0) {
597       S2_CHECK(BN_rshift(bn_.get(), bn_.get(), shift));
598       bn_exp_ += shift;
599     }
600   }
601   // If the mantissa has too many bits, we replace it by NaN to indicate
602   // that an inexact calculation has occurred.
603   if (prec() > kMaxPrec) {
604     set_nan();
605   }
606 }
607 
operator *(const ExactFloat & a,const ExactFloat & b)608 ExactFloat operator*(const ExactFloat& a, const ExactFloat& b) {
609   int result_sign = a.sign_ * b.sign_;
610   if (!a.is_normal() || !b.is_normal()) {
611     // Handle zero, infinity, and NaN according to IEEE 754-2008.
612     if (a.is_nan()) return a;
613     if (b.is_nan()) return b;
614     if (a.is_inf()) {
615       // Infinity times zero yields NaN.
616       if (b.is_zero()) return ExactFloat::NaN();
617       return ExactFloat::Infinity(result_sign);
618     }
619     if (b.is_inf()) {
620       if (a.is_zero()) return ExactFloat::NaN();
621       return ExactFloat::Infinity(result_sign);
622     }
623     S2_DCHECK(a.is_zero() || b.is_zero());
624     return ExactFloat::SignedZero(result_sign);
625   }
626   ExactFloat r;
627   r.sign_ = result_sign;
628   r.bn_exp_ = a.bn_exp_ + b.bn_exp_;
629   BN_CTX* ctx = BN_CTX_new();
630   S2_CHECK(BN_mul(r.bn_.get(), a.bn_.get(), b.bn_.get(), ctx));
631   BN_CTX_free(ctx);
632   r.Canonicalize();
633   return r;
634 }
635 
operator ==(const ExactFloat & a,const ExactFloat & b)636 bool operator==(const ExactFloat& a, const ExactFloat& b) {
637   // NaN is not equal to anything, not even itself.
638   if (a.is_nan() || b.is_nan()) return false;
639 
640   // Since Canonicalize() strips low-order zero bits, all other cases
641   // (including non-normal values) require bn_exp_ to be equal.
642   if (a.bn_exp_ != b.bn_exp_) return false;
643 
644   // Positive and negative zero are equal.
645   if (a.is_zero() && b.is_zero()) return true;
646 
647   // Otherwise, the signs and mantissas must match.  Note that non-normal
648   // values such as infinity have a mantissa of zero.
649   return a.sign_ == b.sign_ && BN_ucmp(a.bn_.get(), b.bn_.get()) == 0;
650 }
651 
ScaleAndCompare(const ExactFloat & b) const652 int ExactFloat::ScaleAndCompare(const ExactFloat& b) const {
653   S2_DCHECK(is_normal() && b.is_normal() && bn_exp_ >= b.bn_exp_);
654   ExactFloat tmp = *this;
655   S2_CHECK(BN_lshift(tmp.bn_.get(), tmp.bn_.get(), bn_exp_ - b.bn_exp_));
656   return BN_ucmp(tmp.bn_.get(), b.bn_.get());
657 }
658 
UnsignedLess(const ExactFloat & b) const659 bool ExactFloat::UnsignedLess(const ExactFloat& b) const {
660   // Handle the zero/infinity cases (NaN has already been done).
661   if (is_inf() || b.is_zero()) return false;
662   if (is_zero() || b.is_inf()) return true;
663   // If the high-order bit positions differ, we are done.
664   int cmp = exp() - b.exp();
665   if (cmp != 0) return cmp < 0;
666   // Otherwise shift one of the two values so that they both have the same
667   // bn_exp_ and then compare the mantissas.
668   return (bn_exp_ >= b.bn_exp_ ?
669           ScaleAndCompare(b) < 0 : b.ScaleAndCompare(*this) > 0);
670 }
671 
operator <(const ExactFloat & a,const ExactFloat & b)672 bool operator<(const ExactFloat& a, const ExactFloat& b) {
673   // NaN is unordered compared to everything, including itself.
674   if (a.is_nan() || b.is_nan()) return false;
675   // Positive and negative zero are equal.
676   if (a.is_zero() && b.is_zero()) return false;
677   // Otherwise, anything negative is less than anything positive.
678   if (a.sign_ != b.sign_) return a.sign_ < b.sign_;
679   // Now we just compare absolute values.
680   return (a.sign_ > 0) ? a.UnsignedLess(b) : b.UnsignedLess(a);
681 }
682 
fabs(const ExactFloat & a)683 ExactFloat fabs(const ExactFloat& a) {
684   return abs(a);
685 }
686 
abs(const ExactFloat & a)687 ExactFloat abs(const ExactFloat& a) {
688   return a.CopyWithSign(+1);
689 }
690 
fmax(const ExactFloat & a,const ExactFloat & b)691 ExactFloat fmax(const ExactFloat& a, const ExactFloat& b) {
692   // If one argument is NaN, return the other argument.
693   if (a.is_nan()) return b;
694   if (b.is_nan()) return a;
695   // Not required by IEEE 754, but we prefer +0 over -0.
696   if (a.sign_ != b.sign_) {
697     return (a.sign_ < b.sign_) ? b : a;
698   }
699   return (a < b) ? b : a;
700 }
701 
fmin(const ExactFloat & a,const ExactFloat & b)702 ExactFloat fmin(const ExactFloat& a, const ExactFloat& b) {
703   // If one argument is NaN, return the other argument.
704   if (a.is_nan()) return b;
705   if (b.is_nan()) return a;
706   // Not required by IEEE 754, but we prefer -0 over +0.
707   if (a.sign_ != b.sign_) {
708     return (a.sign_ < b.sign_) ? a : b;
709   }
710   return (a < b) ? a : b;
711 }
712 
fdim(const ExactFloat & a,const ExactFloat & b)713 ExactFloat fdim(const ExactFloat& a, const ExactFloat& b) {
714   // This formulation has the correct behavior for NaNs.
715   return (a <= b) ? 0 : (a - b);
716 }
717 
ceil(const ExactFloat & a)718 ExactFloat ceil(const ExactFloat& a) {
719   return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardPositive);
720 }
721 
floor(const ExactFloat & a)722 ExactFloat floor(const ExactFloat& a) {
723   return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardNegative);
724 }
725 
trunc(const ExactFloat & a)726 ExactFloat trunc(const ExactFloat& a) {
727   return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardZero);
728 }
729 
round(const ExactFloat & a)730 ExactFloat round(const ExactFloat& a) {
731   return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesAwayFromZero);
732 }
733 
rint(const ExactFloat & a)734 ExactFloat rint(const ExactFloat& a) {
735   return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesToEven);
736 }
737 
738 template <class T>
ToInteger(RoundingMode mode) const739 T ExactFloat::ToInteger(RoundingMode mode) const {
740   using std::numeric_limits;
741   static_assert(sizeof(T) <= sizeof(uint64), "max 64 bits supported");
742   static_assert(numeric_limits<T>::is_signed, "only signed types supported");
743   const int64 kMinValue = numeric_limits<T>::min();
744   const int64 kMaxValue = numeric_limits<T>::max();
745 
746   ExactFloat r = RoundToPowerOf2(0, mode);
747   if (r.is_nan()) return kMaxValue;
748   if (r.is_zero()) return 0;
749   if (!r.is_inf()) {
750     // If the unsigned value has more than 63 bits it is always clamped.
751     if (r.exp() < 64) {
752       int64 value = BN_ext_get_uint64(r.bn_.get()) << r.bn_exp_;
753       if (r.sign_ < 0) value = -value;
754       return max(kMinValue, min(kMaxValue, value));
755     }
756   }
757   return (r.sign_ < 0) ? kMinValue : kMaxValue;
758 }
759 
lrint(const ExactFloat & a)760 long lrint(const ExactFloat& a) {
761   return a.ToInteger<long>(ExactFloat::kRoundTiesToEven);
762 }
763 
llrint(const ExactFloat & a)764 long long llrint(const ExactFloat& a) {
765   return a.ToInteger<long long>(ExactFloat::kRoundTiesToEven);
766 }
767 
lround(const ExactFloat & a)768 long lround(const ExactFloat& a) {
769   return a.ToInteger<long>(ExactFloat::kRoundTiesAwayFromZero);
770 }
771 
llround(const ExactFloat & a)772 long long llround(const ExactFloat& a) {
773   return a.ToInteger<long long>(ExactFloat::kRoundTiesAwayFromZero);
774 }
775 
copysign(const ExactFloat & a,const ExactFloat & b)776 ExactFloat copysign(const ExactFloat& a, const ExactFloat& b) {
777   return a.CopyWithSign(b.sign_);
778 }
779 
frexp(const ExactFloat & a,int * exp)780 ExactFloat frexp(const ExactFloat& a, int* exp) {
781   if (!a.is_normal()) {
782     // If a == 0, exp should be zero.  If a.is_inf() or a.is_nan(), exp is not
783     // defined but the glibc implementation returns zero.
784     *exp = 0;
785     return a;
786   }
787   *exp = a.exp();
788   return ldexp(a, -a.exp());
789 }
790 
ldexp(const ExactFloat & a,int exp)791 ExactFloat ldexp(const ExactFloat& a, int exp) {
792   if (!a.is_normal()) return a;
793 
794   // To prevent integer overflow, we first clamp "exp" so that
795   // (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1).
796   int a_exp = a.exp();
797   exp = min(ExactFloat::kMaxExp + 1 - a_exp,
798             max(ExactFloat::kMinExp - 1 + a_exp, exp));
799 
800   // Now modify the exponent and check for overflow/underflow.
801   ExactFloat r = a;
802   r.bn_exp_ += exp;
803   r.Canonicalize();
804   return r;
805 }
806 
scalbln(const ExactFloat & a,long exp)807 ExactFloat scalbln(const ExactFloat& a, long exp) {
808   // Clamp the exponent to the range of "int" in order to avoid truncation.
809   exp = max(static_cast<long>(INT_MIN), min(static_cast<long>(INT_MAX), exp));
810   return ldexp(a, exp);
811 }
812 
ilogb(const ExactFloat & a)813 int ilogb(const ExactFloat& a) {
814   if (a.is_zero()) return FP_ILOGB0;
815   if (a.is_inf()) return INT_MAX;
816   if (a.is_nan()) return FP_ILOGBNAN;
817   // a.exp() assumes the significand is in the range [0.5, 1).
818   return a.exp() - 1;
819 }
820 
logb(const ExactFloat & a)821 ExactFloat logb(const ExactFloat& a) {
822   if (a.is_zero()) return ExactFloat::Infinity(-1);
823   if (a.is_inf()) return ExactFloat::Infinity(+1);  // Even if a < 0.
824   if (a.is_nan()) return a;
825   // exp() assumes the significand is in the range [0.5,1).
826   return ExactFloat(a.exp() - 1);
827 }
828 
Unimplemented()829 ExactFloat ExactFloat::Unimplemented() {
830   S2_LOG(FATAL) << "Unimplemented ExactFloat method called";
831   return NaN();
832 }
833