1 //! An implementation of Clinger's Bellerophon algorithm.
2 //!
3 //! This is a moderate path algorithm that uses an extended-precision
4 //! float, represented in 80 bits, by calculating the bits of slop
5 //! and determining if those bits could prevent unambiguous rounding.
6 //!
7 //! This algorithm requires less static storage than the Lemire algorithm,
8 //! and has decent performance, and is therefore used when non-decimal,
9 //! non-power-of-two strings need to be parsed. Clinger's algorithm
10 //! is described in depth in "How to Read Floating Point Numbers Accurately.",
11 //! available online [here](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.4152&rep=rep1&type=pdf).
12 //!
13 //! This implementation is loosely based off the Golang implementation,
14 //! found [here](https://github.com/golang/go/blob/b10849fbb97a2244c086991b4623ae9f32c212d0/src/strconv/extfloat.go).
15 //! This code is therefore subject to a 3-clause BSD license.
16 
17 #![cfg(feature = "compact")]
18 #![doc(hidden)]
19 
20 use crate::extended_float::ExtendedFloat;
21 use crate::mask::{lower_n_halfway, lower_n_mask};
22 use crate::num::Float;
23 use crate::number::Number;
24 use crate::rounding::{round, round_nearest_tie_even};
25 use crate::table::BASE10_POWERS;
26 
27 // ALGORITHM
28 // ---------
29 
30 /// Core implementation of the Bellerophon algorithm.
31 ///
32 /// Create an extended-precision float, scale it to the proper radix power,
33 /// calculate the bits of slop, and return the representation. The value
34 /// will always be guaranteed to be within 1 bit, rounded-down, of the real
35 /// value. If a negative exponent is returned, this represents we were
36 /// unable to unambiguously round the significant digits.
37 ///
38 /// This has been modified to return a biased, rather than unbiased exponent.
bellerophon<F: Float>(num: &Number) -> ExtendedFloat39 pub fn bellerophon<F: Float>(num: &Number) -> ExtendedFloat {
40     let fp_zero = ExtendedFloat {
41         mant: 0,
42         exp: 0,
43     };
44     let fp_inf = ExtendedFloat {
45         mant: 0,
46         exp: F::INFINITE_POWER,
47     };
48 
49     // Early short-circuit, in case of literal 0 or infinity.
50     // This allows us to avoid narrow casts causing numeric overflow,
51     // and is a quick check for any radix.
52     if num.mantissa == 0 || num.exponent <= -0x1000 {
53         return fp_zero;
54     } else if num.exponent >= 0x1000 {
55         return fp_inf;
56     }
57 
58     // Calculate our indexes for our extended-precision multiplication.
59     // This narrowing cast is safe, since exponent must be in a valid range.
60     let exponent = num.exponent as i32 + BASE10_POWERS.bias;
61     let small_index = exponent % BASE10_POWERS.step;
62     let large_index = exponent / BASE10_POWERS.step;
63 
64     if exponent < 0 {
65         // Guaranteed underflow (assign 0).
66         return fp_zero;
67     }
68     if large_index as usize >= BASE10_POWERS.large.len() {
69         // Overflow (assign infinity)
70         return fp_inf;
71     }
72 
73     // Within the valid exponent range, multiply by the large and small
74     // exponents and return the resulting value.
75 
76     // Track errors to as a factor of unit in last-precision.
77     let mut errors: u32 = 0;
78     if num.many_digits {
79         errors += error_halfscale();
80     }
81 
82     // Multiply by the small power.
83     // Check if we can directly multiply by an integer, if not,
84     // use extended-precision multiplication.
85     let mut fp = ExtendedFloat {
86         mant: num.mantissa,
87         exp: 0,
88     };
89     match fp.mant.overflowing_mul(BASE10_POWERS.get_small_int(small_index as usize)) {
90         // Overflow, multiplication unsuccessful, go slow path.
91         (_, true) => {
92             normalize(&mut fp);
93             fp = mul(&fp, &BASE10_POWERS.get_small(small_index as usize));
94             errors += error_halfscale();
95         },
96         // No overflow, multiplication successful.
97         (mant, false) => {
98             fp.mant = mant;
99             normalize(&mut fp);
100         },
101     }
102 
103     // Multiply by the large power.
104     fp = mul(&fp, &BASE10_POWERS.get_large(large_index as usize));
105     if errors > 0 {
106         errors += 1;
107     }
108     errors += error_halfscale();
109 
110     // Normalize the floating point (and the errors).
111     let shift = normalize(&mut fp);
112     errors <<= shift;
113     fp.exp += F::EXPONENT_BIAS;
114 
115     // Check for literal overflow, even with halfway cases.
116     if -fp.exp + 1 > 65 {
117         return fp_zero;
118     }
119 
120     // Too many errors accumulated, return an error.
121     if !error_is_accurate::<F>(errors, &fp) {
122         // Bias the exponent so we know it's invalid.
123         fp.exp += F::INVALID_FP;
124         return fp;
125     }
126 
127     // Check if we have a literal 0 or overflow here.
128     // If we have an exponent of -63, we can still have a valid shift,
129     // giving a case where we have too many errors and need to round-up.
130     if -fp.exp + 1 == 65 {
131         // Have more than 64 bits below the minimum exponent, must be 0.
132         return fp_zero;
133     }
134 
135     round::<F, _>(&mut fp, |f, s| {
136         round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| {
137             is_above || (is_odd && is_halfway)
138         });
139     });
140     fp
141 }
142 
143 // ERRORS
144 // ------
145 
146 // Calculate if the errors in calculating the extended-precision float.
147 //
148 // Specifically, we want to know if we are close to a halfway representation,
149 // or halfway between `b` and `b+1`, or `b+h`. The halfway representation
150 // has the form:
151 //     SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100...
152 // where:
153 //     S = Sign Bit
154 //     E = Exponent Bits
155 //     H = Hidden Bit
156 //     M = Mantissa Bits
157 //
158 // The halfway representation has a bit set 1-after the mantissa digits,
159 // and no bits set immediately afterward, making it impossible to
160 // round between `b` and `b+1` with this representation.
161 
162 /// Get the full error scale.
163 #[inline(always)]
error_scale() -> u32164 const fn error_scale() -> u32 {
165     8
166 }
167 
168 /// Get the half error scale.
169 #[inline(always)]
error_halfscale() -> u32170 const fn error_halfscale() -> u32 {
171     error_scale() / 2
172 }
173 
174 /// Determine if the number of errors is tolerable for float precision.
error_is_accurate<F: Float>(errors: u32, fp: &ExtendedFloat) -> bool175 fn error_is_accurate<F: Float>(errors: u32, fp: &ExtendedFloat) -> bool {
176     // Check we can't have a literal 0 denormal float.
177     debug_assert!(fp.exp >= -64);
178 
179     // Determine if extended-precision float is a good approximation.
180     // If the error has affected too many units, the float will be
181     // inaccurate, or if the representation is too close to halfway
182     // that any operations could affect this halfway representation.
183     // See the documentation for dtoa for more information.
184 
185     // This is always a valid u32, since `fp.exp >= -64`
186     // will always be positive and the significand size is {23, 52}.
187     let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
188 
189     // The unbiased exponent checks is `unbiased_exp <= F::MANTISSA_SIZE
190     // - F::EXPONENT_BIAS -64 + 1`, or `biased_exp <= F::MANTISSA_SIZE - 63`,
191     // or `biased_exp <= mantissa_shift`.
192     let extrabits = match fp.exp <= -mantissa_shift {
193         // Denormal, since shifting to the hidden bit still has a negative exponent.
194         // The unbiased check calculation for bits is `1 - F::EXPONENT_BIAS - unbiased_exp`,
195         // or `1 - biased_exp`.
196         true => 1 - fp.exp,
197         false => 64 - F::MANTISSA_SIZE - 1,
198     };
199 
200     // Our logic is as follows: we want to determine if the actual
201     // mantissa and the errors during calculation differ significantly
202     // from the rounding point. The rounding point for round-nearest
203     // is the halfway point, IE, this when the truncated bits start
204     // with b1000..., while the rounding point for the round-toward
205     // is when the truncated bits are equal to 0.
206     // To do so, we can check whether the rounding point +/- the error
207     // are >/< the actual lower n bits.
208     //
209     // For whether we need to use signed or unsigned types for this
210     // analysis, see this example, using u8 rather than u64 to simplify
211     // things.
212     //
213     // # Comparisons
214     //      cmp1 = (halfway - errors) < extra
215     //      cmp1 = extra < (halfway + errors)
216     //
217     // # Large Extrabits, Low Errors
218     //
219     //      extrabits = 8
220     //      halfway          =  0b10000000
221     //      extra            =  0b10000010
222     //      errors           =  0b00000100
223     //      halfway - errors =  0b01111100
224     //      halfway + errors =  0b10000100
225     //
226     //      Unsigned:
227     //          halfway - errors = 124
228     //          halfway + errors = 132
229     //          extra            = 130
230     //          cmp1             = true
231     //          cmp2             = true
232     //      Signed:
233     //          halfway - errors = 124
234     //          halfway + errors = -124
235     //          extra            = -126
236     //          cmp1             = false
237     //          cmp2             = true
238     //
239     // # Conclusion
240     //
241     // Since errors will always be small, and since we want to detect
242     // if the representation is accurate, we need to use an **unsigned**
243     // type for comparisons.
244     let maskbits = extrabits as u64;
245     let errors = errors as u64;
246 
247     // Round-to-nearest, need to use the halfway point.
248     if extrabits > 64 {
249         // Underflow, we have a shift larger than the mantissa.
250         // Representation is valid **only** if the value is close enough
251         // overflow to the next bit within errors. If it overflows,
252         // the representation is **not** valid.
253         !fp.mant.overflowing_add(errors).1
254     } else {
255         let mask = lower_n_mask(maskbits);
256         let extra = fp.mant & mask;
257 
258         // Round-to-nearest, need to check if we're close to halfway.
259         // IE, b10100 | 100000, where `|` signifies the truncation point.
260         let halfway = lower_n_halfway(maskbits);
261         let cmp1 = halfway.wrapping_sub(errors) < extra;
262         let cmp2 = extra < halfway.wrapping_add(errors);
263 
264         // If both comparisons are true, we have significant rounding error,
265         // and the value cannot be exactly represented. Otherwise, the
266         // representation is valid.
267         !(cmp1 && cmp2)
268     }
269 }
270 
271 // MATH
272 // ----
273 
274 /// Normalize float-point number.
275 ///
276 /// Shift the mantissa so the number of leading zeros is 0, or the value
277 /// itself is 0.
278 ///
279 /// Get the number of bytes shifted.
normalize(fp: &mut ExtendedFloat) -> i32280 pub fn normalize(fp: &mut ExtendedFloat) -> i32 {
281     // Note:
282     // Using the ctlz intrinsic via leading_zeros is way faster (~10x)
283     // than shifting 1-bit at a time, via while loop, and also way
284     // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
285     // 2, and 1 bit.
286     //
287     // Using a modulus of pow2 (which will get optimized to a bitwise
288     // and with 0x3F or faster) is slightly slower than an if/then,
289     // however, removing the if/then will likely optimize more branched
290     // code as it removes conditional logic.
291 
292     // Calculate the number of leading zeros, and then zero-out
293     // any overflowing bits, to avoid shl overflow when self.mant == 0.
294     if fp.mant != 0 {
295         let shift = fp.mant.leading_zeros() as i32;
296         fp.mant <<= shift;
297         fp.exp -= shift;
298         shift
299     } else {
300         0
301     }
302 }
303 
304 /// Multiply two normalized extended-precision floats, as if by `a*b`.
305 ///
306 /// The precision is maximal when the numbers are normalized, however,
307 /// decent precision will occur as long as both values have high bits
308 /// set. The result is not normalized.
309 ///
310 /// Algorithm:
311 ///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
312 ///     2. Normalization of the result (not done here).
313 ///     3. Addition of exponents.
mul(x: &ExtendedFloat, y: &ExtendedFloat) -> ExtendedFloat314 pub fn mul(x: &ExtendedFloat, y: &ExtendedFloat) -> ExtendedFloat {
315     // Logic check, values must be decently normalized prior to multiplication.
316     debug_assert!(x.mant >> 32 != 0);
317     debug_assert!(y.mant >> 32 != 0);
318 
319     // Extract high-and-low masks.
320     // Mask is u32::MAX for older Rustc versions.
321     const LOMASK: u64 = 0xffff_ffff;
322     let x1 = x.mant >> 32;
323     let x0 = x.mant & LOMASK;
324     let y1 = y.mant >> 32;
325     let y0 = y.mant & LOMASK;
326 
327     // Get our products
328     let x1_y0 = x1 * y0;
329     let x0_y1 = x0 * y1;
330     let x0_y0 = x0 * y0;
331     let x1_y1 = x1 * y1;
332 
333     let mut tmp = (x1_y0 & LOMASK) + (x0_y1 & LOMASK) + (x0_y0 >> 32);
334     // round up
335     tmp += 1 << (32 - 1);
336 
337     ExtendedFloat {
338         mant: x1_y1 + (x1_y0 >> 32) + (x0_y1 >> 32) + (tmp >> 32),
339         exp: x.exp + y.exp + 64,
340     }
341 }
342 
343 // POWERS
344 // ------
345 
346 /// Precalculated powers of base N for the Bellerophon algorithm.
347 pub struct BellerophonPowers {
348     // Pre-calculated small powers.
349     pub small: &'static [u64],
350     // Pre-calculated large powers.
351     pub large: &'static [u64],
352     /// Pre-calculated small powers as 64-bit integers
353     pub small_int: &'static [u64],
354     // Step between large powers and number of small powers.
355     pub step: i32,
356     // Exponent bias for the large powers.
357     pub bias: i32,
358     /// ceil(log2(radix)) scaled as a multiplier.
359     pub log2: i64,
360     /// Bitshift for the log2 multiplier.
361     pub log2_shift: i32,
362 }
363 
364 /// Allow indexing of values without bounds checking
365 impl BellerophonPowers {
366     #[inline]
get_small(&self, index: usize) -> ExtendedFloat367     pub fn get_small(&self, index: usize) -> ExtendedFloat {
368         let mant = self.small[index];
369         let exp = (1 - 64) + ((self.log2 * index as i64) >> self.log2_shift);
370         ExtendedFloat {
371             mant,
372             exp: exp as i32,
373         }
374     }
375 
376     #[inline]
get_large(&self, index: usize) -> ExtendedFloat377     pub fn get_large(&self, index: usize) -> ExtendedFloat {
378         let mant = self.large[index];
379         let biased_e = index as i64 * self.step as i64 - self.bias as i64;
380         let exp = (1 - 64) + ((self.log2 * biased_e) >> self.log2_shift);
381         ExtendedFloat {
382             mant,
383             exp: exp as i32,
384         }
385     }
386 
387     #[inline]
get_small_int(&self, index: usize) -> u64388     pub fn get_small_int(&self, index: usize) -> u64 {
389         self.small_int[index]
390     }
391 }
392