1 //! Extended precision floating-point types.
2 //!
3 //! Also contains helpers to convert to and from native rust floats.
4 //! This representation stores the mantissa as a 64-bit unsigned integer,
5 //! and the exponent as a 32-bit unsigned integer, allowed ~80 bits of
6 //! precision (only 16 bits of the 32-bit integer are used, u32 is used
7 //! for performance). Since there is no storage for the sign bit,
8 //! this only works for positive floats.
9 // Lot of useful algorithms in here, and helper utilities.
10 // We want to make sure this code is not accidentally deleted.
11 #![allow(dead_code)]
12 
13 use crate::util::*;
14 use super::convert::*;
15 use super::mantissa::Mantissa;
16 use super::rounding::*;
17 use super::shift::*;
18 
19 // FLOAT TYPE
20 
21 /// Extended precision floating-point type.
22 ///
23 /// Private implementation, exposed only for testing purposes.
24 #[doc(hidden)]
25 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
26 pub struct ExtendedFloat<M: Mantissa> {
27     /// Mantissa for the extended-precision float.
28     pub mant: M,
29     /// Binary exponent for the extended-precision float.
30     pub exp: i32,
31 }
32 
33 impl<M: Mantissa> ExtendedFloat<M> {
34     // PROPERTIES
35 
36     perftools_inline!{
37     /// Get the mantissa component.
38     pub fn mantissa(&self) -> M {
39         self.mant
40     }}
41 
42     perftools_inline!{
43     /// Get the exponent component.
44     pub fn exponent(&self) -> i32 {
45         self.exp
46     }}
47 
48     // OPERATIONS
49 
50     perftools_inline!{
51     /// Multiply two normalized extended-precision floats, as if by `a*b`.
52     ///
53     /// The precision is maximal when the numbers are normalized, however,
54     /// decent precision will occur as long as both values have high bits
55     /// set. The result is not normalized.
56     ///
57     /// Algorithm:
58     ///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
59     ///     2. Normalization of the result (not done here).
60     ///     3. Addition of exponents.
61     pub fn mul(&self, b: &ExtendedFloat<M>)
62         -> ExtendedFloat<M>
63     {
64         // Logic check, values must be decently normalized prior to multiplication.
65         debug_assert!((self.mant & M::HIMASK != M::ZERO) && (b.mant & M::HIMASK != M::ZERO));
66 
67         // Extract high-and-low masks.
68         let ah = self.mant >> M::HALF;
69         let al = self.mant & M::LOMASK;
70         let bh = b.mant >> M::HALF;
71         let bl = b.mant & M::LOMASK;
72 
73         // Get our products
74         let ah_bl = ah * bl;
75         let al_bh = al * bh;
76         let al_bl = al * bl;
77         let ah_bh = ah * bh;
78 
79         let mut tmp = (ah_bl & M::LOMASK) + (al_bh & M::LOMASK) + (al_bl >> M::HALF);
80         // round up
81         tmp += M::ONE << (M::HALF-1);
82 
83         ExtendedFloat {
84             mant: ah_bh + (ah_bl >> M::HALF) + (al_bh >> M::HALF) + (tmp >> M::HALF),
85             exp: self.exp + b.exp + M::FULL
86         }
87     }}
88 
89     perftools_inline!{
90     /// Multiply in-place, as if by `a*b`.
91     ///
92     /// The result is not normalized.
93     pub fn imul(&mut self, b: &ExtendedFloat<M>)
94     {
95         *self = self.mul(b);
96     }}
97 
98     // NORMALIZE
99 
100     perftools_inline!{
101     /// Get if extended-float is normalized, MSB is set.
102     pub fn is_normalized(&self)
103         -> bool
104     {
105         self.mant & M::NORMALIZED_MASK == M::NORMALIZED_MASK
106     }}
107 
108     perftools_inline!{
109     /// Normalize float-point number.
110     ///
111     /// Shift the mantissa so the number of leading zeros is 0, or the value
112     /// itself is 0.
113     ///
114     /// Get the number of bytes shifted.
115     pub fn normalize(&mut self)
116         -> u32
117     {
118         // Note:
119         // Using the cltz intrinsic via leading_zeros is way faster (~10x)
120         // than shifting 1-bit at a time, via while loop, and also way
121         // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
122         // 2, and 1 bit.
123         //
124         // Using a modulus of pow2 (which will get optimized to a bitwise
125         // and with 0x3F or faster) is slightly slower than an if/then,
126         // however, removing the if/then will likely optimize more branched
127         // code as it removes conditional logic.
128 
129         // Calculate the number of leading zeros, and then zero-out
130         // any overflowing bits, to avoid shl overflow when self.mant == 0.
131         let shift = if self.mant.is_zero() { 0 } else { self.mant.leading_zeros() };
132         shl(self, shift);
133         shift
134     }}
135 
136     perftools_inline!{
137     /// Normalize floating-point number to n-bits away from the MSB.
138     ///
139     /// This may lead to lossy rounding, and will not use custom rounding
140     /// rules to accommodate for this.
141     pub fn normalize_to(&mut self, n: u32)
142         -> i32
143     {
144         debug_assert!(n <= M::BITS.as_u32(), "ExtendedFloat::normalize_to() attempting to shift beyond type size.");
145 
146         // Get the shift, with any of the higher bits removed.
147         // This way, we can guarantee that we will not overflow
148         // with the shl/shr.
149         let leading = if self.mant.is_zero() { n } else { self.mant.leading_zeros() };
150         let shift = leading.as_i32() - n.as_i32();
151         if shift > 0 {
152             // Need to shift left
153             shl(self, shift);
154         } else if shift < 0 {
155             // Need to shift right.
156             shr(self, -shift);
157         }
158 
159         shift
160     }}
161 
162     perftools_inline!{
163     /// Get normalized boundaries for float.
164     pub fn normalized_boundaries(&self)
165         -> (ExtendedFloat<M>, ExtendedFloat<M>)
166     {
167         let mut upper = ExtendedFloat {
168             mant: (self.mant << 1) + M::ONE,
169             exp: self.exp - 1,
170         };
171         upper.normalize();
172 
173         // Use a boolean hack to get 2 if they're equal, else 1, without
174         // any branching.
175         let is_hidden = self.mant == as_cast::<M, _>(f64::HIDDEN_BIT_MASK);
176         let l_shift: i32 = is_hidden as i32 + 1;
177 
178         let mut lower = ExtendedFloat {
179             mant: (self.mant << l_shift) - M::ONE,
180             exp: self.exp - l_shift,
181         };
182         lower.mant <<= lower.exp - upper.exp;
183         lower.exp = upper.exp;
184 
185         (lower, upper)
186     }}
187 
188     // ROUND
189 
190     perftools_inline!{
191     /// Lossy round float-point number to native mantissa boundaries.
192     pub(crate) fn round_to_native<F, Cb>(&mut self, cb: Cb)
193         where F: FloatRounding<M>,
194               Cb: FnOnce(&mut ExtendedFloat<M>, i32)
195     {
196         round_to_native::<F, M, _>(self, cb)
197     }}
198 
199     perftools_inline!{
200     /// Lossy round float-point number to f32 mantissa boundaries.
201     pub(crate) fn round_to_f32<Cb>(&mut self, cb: Cb)
202         where f32: FloatRounding<M>,
203               Cb: FnOnce(&mut ExtendedFloat<M>, i32)
204     {
205         self.round_to_native::<f32, Cb>(cb)
206     }}
207 
208     perftools_inline!{
209     /// Lossy round float-point number to f64 mantissa boundaries.
210     pub(crate) fn round_to_f64<Cb>(&mut self, cb: Cb)
211         where f64: FloatRounding<M>,
212               Cb: FnOnce(&mut ExtendedFloat<M>, i32)
213     {
214         self.round_to_native::<f64, Cb>(cb)
215     }}
216 
217     // FROM
218 
219     perftools_inline!{
220     /// Create extended float from 8-bit unsigned integer.
221     pub fn from_int<T: Integer>(i: T)
222         -> ExtendedFloat<M>
223     {
224         from_int(i)
225     }}
226 
227     perftools_inline!{
228     /// Create extended float from 8-bit unsigned integer.
229     pub fn from_u8(i: u8)
230         -> ExtendedFloat<M>
231     {
232         Self::from_int(i)
233     }}
234 
235     perftools_inline!{
236     /// Create extended float from 16-bit unsigned integer.
237     pub fn from_u16(i: u16)
238         -> ExtendedFloat<M>
239     {
240         Self::from_int(i)
241     }}
242 
243     perftools_inline!{
244     /// Create extended float from 32-bit unsigned integer.
245     pub fn from_u32(i: u32)
246         -> ExtendedFloat<M>
247     {
248         Self::from_int(i)
249     }}
250 
251     perftools_inline!{
252     /// Create extended float from 64-bit unsigned integer.
253     pub fn from_u64(i: u64)
254         -> ExtendedFloat<M>
255     {
256         Self::from_int(i)
257     }}
258 
259     perftools_inline!{
260     /// Create extended float from native float.
261     pub fn from_float<F: Float>(f: F)
262         -> ExtendedFloat<M>
263     {
264         from_float(f)
265     }}
266 
267     perftools_inline!{
268     /// Create extended float from 32-bit float.
269     pub fn from_f32(f: f32)
270         -> ExtendedFloat<M>
271     {
272         Self::from_float(f)
273     }}
274 
275     perftools_inline!{
276     /// Create extended float from 64-bit float.
277     pub fn from_f64(f: f64)
278         -> ExtendedFloat<M>
279     {
280         Self::from_float(f)
281     }}
282 
283     // INTO
284 
285     perftools_inline!{
286     /// Convert into lower-precision native float.
287     pub fn into_float<F: FloatRounding<M>>(self)
288         -> F
289     {
290         #[cfg(not(feature = "rounding"))] {
291             self.into_rounded_float::<F>(RoundingKind::NearestTieEven, Sign::Positive)
292         }
293 
294         #[cfg(feature = "rounding")] {
295             self.into_rounded_float::<F>(get_float_rounding(), Sign::Positive)
296         }
297     }}
298 
299     perftools_inline!{
300     /// Convert into lower-precision 32-bit float.
301     pub fn into_f32(self)
302         -> f32
303         where f32: FloatRounding<M>
304     {
305         self.into_float()
306     }}
307 
308     perftools_inline!{
309     /// Convert into lower-precision 64-bit float.
310     pub fn into_f64(self)
311         -> f64
312         where f64: FloatRounding<M>
313     {
314         self.into_float()
315     }}
316 
317     // INTO ROUNDED
318 
319     perftools_inline!{
320     /// Into rounded float where the rounding kind has been converted.
321     pub(crate) fn into_rounded_float_impl<F>(mut self, kind: RoundingKind)
322         -> F
323         where F: FloatRounding<M>
324     {
325         // Normalize the actual float rounding here.
326         let cb = match kind {
327             RoundingKind::NearestTieEven     => round_nearest_tie_even,
328             RoundingKind::NearestTieAwayZero => round_nearest_tie_away_zero,
329             RoundingKind::Upward             => round_upward,
330             RoundingKind::Downward           => round_downward,
331             _                                => unreachable!()
332         };
333 
334         self.round_to_native::<F, _>(cb);
335         into_float(self)
336     }}
337 
338     perftools_inline!{
339     /// Convert into lower-precision native float with custom rounding rules.
340     pub fn into_rounded_float<F>(self, kind: RoundingKind, sign: Sign)
341         -> F
342         where F: FloatRounding<M>
343     {
344         self.into_rounded_float_impl(internal_rounding(kind, sign))
345     }}
346 
347     perftools_inline!{
348     /// Convert into lower-precision 32-bit float with custom rounding rules.
349     pub fn into_rounded_f32(self, kind: RoundingKind, sign: Sign)
350         -> f32
351         where f32: FloatRounding<M>
352     {
353         self.into_rounded_float(kind, sign)
354     }}
355 
356     perftools_inline!{
357     /// Convert into lower-precision 64-bit float with custom rounding rules.
358     pub fn into_rounded_f64(self, kind: RoundingKind, sign: Sign)
359         -> f64
360         where f64: FloatRounding<M>
361     {
362         self.into_rounded_float(kind, sign)
363     }}
364 
365     // AS
366 
367     perftools_inline!{
368     /// Convert to lower-precision native float.
369     pub fn as_float<F: FloatRounding<M>>(&self)
370         -> F
371     {
372         self.clone().into_float::<F>()
373     }}
374 
375     perftools_inline!{
376     /// Convert to lower-precision 32-bit float.
377     pub fn as_f32(&self)
378         -> f32
379         where f32: FloatRounding<M>
380     {
381         self.as_float()
382     }}
383 
384     perftools_inline!{
385     /// Convert to lower-precision 64-bit float.
386     pub fn as_f64(&self)
387         -> f64
388         where f64: FloatRounding<M>
389     {
390         self.as_float()
391     }}
392 
393     // AS ROUNDED
394 
395     perftools_inline!{
396     /// Convert to lower-precision native float with custom rounding rules.
397     pub fn as_rounded_float<F>(&self, kind: RoundingKind, sign: Sign)
398         -> F
399         where F: FloatRounding<M>
400     {
401         self.clone().into_rounded_float::<F>(kind, sign)
402     }}
403 
404     perftools_inline!{
405     /// Convert to lower-precision 32-bit float with custom rounding rules.
406     pub fn as_rounded_f32(&self, kind: RoundingKind, sign: Sign)
407         -> f32
408         where f32: FloatRounding<M>
409     {
410         self.as_rounded_float(kind, sign)
411     }}
412 
413     perftools_inline!{
414     /// Convert to lower-precision 64-bit float with custom rounding rules.
415     pub fn as_rounded_f64(&self, kind: RoundingKind, sign: Sign)
416         -> f64
417         where f64: FloatRounding<M>
418     {
419         self.as_rounded_float(kind, sign)
420     }}
421 }
422 
423 impl ExtendedFloat<u128> {
424     perftools_inline!{
425     /// Create extended float from 64-bit unsigned integer.
426     pub fn from_u128(i: u128) -> ExtendedFloat<u128> {
427         Self::from_int(i)
428     }}
429 }
430 
431 // ALIASES
432 
433 /// Alias with ~80 bits of precision, 64 for the mantissa and 16 for exponent.
434 pub type ExtendedFloat80 = ExtendedFloat<u64>;
435 
436 /// Alias with ~160 bits of precision, 128 for the mantissa and 32 for exponent.
437 pub type ExtendedFloat160 = ExtendedFloat<u128>;
438 
439 // TESTS
440 // -----
441 
442 #[cfg(test)]
443 mod tests {
444     use super::*;
445 
446     use approx::assert_relative_eq;
447 
448     // NORMALIZE
449 
check_normalize(mant: u64, exp: i32, shift: u32, r_mant: u64, r_exp: i32)450     fn check_normalize(mant: u64, exp: i32, shift: u32, r_mant: u64, r_exp: i32) {
451         let mut x = ExtendedFloat {mant: mant, exp: exp};
452         assert!(!x.is_normalized());
453         assert_eq!(x.normalize(), shift);
454         assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp});
455         assert!(x.is_normalized() || x.mant.is_zero());
456 
457         let mut x = ExtendedFloat {mant: mant as u128, exp: exp};
458         let shift = if shift == 0 { 0 } else { shift+64 };
459         let r_exp = if r_exp == 0 { 0 } else { r_exp-64 };
460         assert!(!x.is_normalized());
461         assert_eq!(x.normalize(), shift);
462         assert_eq!(x, ExtendedFloat {mant: (r_mant as u128) << 64, exp: r_exp});
463         assert!(x.is_normalized() || x.mant.is_zero());
464     }
465 
466     #[test]
normalize_test()467     fn normalize_test() {
468         // F32
469         // 0
470         check_normalize(0, 0, 0, 0, 0);
471 
472         // min value
473         check_normalize(1, -149, 63, 9223372036854775808, -212);
474 
475         // 1.0e-40
476         check_normalize(71362, -149, 47, 10043308644012916736, -196);
477 
478         // 1.0e-20
479         check_normalize(12379400, -90, 40, 13611294244890214400, -130);
480 
481         // 1.0
482         check_normalize(8388608, -23, 40, 9223372036854775808, -63);
483 
484         // 1e20
485         check_normalize(11368684, 43, 40, 12500000250510966784, 3);
486 
487         // max value
488         check_normalize(16777213, 104, 40, 18446740775174668288, 64);
489 
490         // F64
491 
492         // min value
493         check_normalize(1, -1074, 63, 9223372036854775808, -1137);
494 
495         // 1.0e-250
496         check_normalize(6448907850777164, -883, 11, 13207363278391631872, -894);
497 
498         // 1.0e-150
499         check_normalize(7371020360979573, -551, 11, 15095849699286165504, -562);
500 
501         // 1.0e-45
502         check_normalize(6427752177035961, -202, 11, 13164036458569648128, -213);
503 
504         // 1.0e-40
505         check_normalize(4903985730770844, -185, 11, 10043362776618688512, -196);
506 
507         // 1.0e-20
508         check_normalize(6646139978924579, -119, 11, 13611294676837537792, -130);
509 
510         // 1.0
511         check_normalize(4503599627370496, -52, 11, 9223372036854775808, -63);
512 
513         // 1e20
514         check_normalize(6103515625000000, 14, 11, 12500000000000000000, 3);
515 
516         // 1e40
517         check_normalize(8271806125530277, 80, 11, 16940658945086007296, 69);
518 
519         // 1e150
520         check_normalize(5503284107318959, 446, 11, 11270725851789228032, 435);
521 
522         // 1e250
523         check_normalize(6290184345309700, 778, 11, 12882297539194265600, 767);
524 
525         // max value
526         check_normalize(9007199254740991, 971, 11, 18446744073709549568, 960);
527     }
528 
check_normalize_to(mant: u64, exp: i32, n: u32, shift: i32, r_mant: u64, r_exp: i32)529     fn check_normalize_to(mant: u64, exp: i32, n: u32, shift: i32, r_mant: u64, r_exp: i32) {
530         let mut x = ExtendedFloat {mant: mant, exp: exp};
531         assert_eq!(x.normalize_to(n), shift);
532         assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp});
533 
534         let mut x = ExtendedFloat {mant: mant as u128, exp: exp};
535         let shift = if shift == 0 { 0 } else { shift+64 };
536         let r_exp = if r_exp == 0 { 0 } else { r_exp-64 };
537         assert_eq!(x.normalize_to(n), shift);
538         assert_eq!(x, ExtendedFloat {mant: (r_mant as u128) << 64, exp: r_exp});
539     }
540 
541     #[test]
normalize_to_test()542     fn normalize_to_test() {
543         // F32
544         // 0
545         check_normalize_to(0, 0, 0, 0, 0, 0);
546         check_normalize_to(0, 0, 2, 0, 0, 0);
547 
548         // min value
549         check_normalize_to(1, -149, 0, 63, 9223372036854775808, -212);
550         check_normalize_to(1, -149, 2, 61, 2305843009213693952, -210);
551 
552         // 1.0e-40
553         check_normalize_to(71362, -149, 0, 47, 10043308644012916736, -196);
554         check_normalize_to(71362, -149, 2, 45, 2510827161003229184, -194);
555 
556         // 1.0e-20
557         check_normalize_to(12379400, -90, 0, 40, 13611294244890214400, -130);
558         check_normalize_to(12379400, -90, 2, 38, 3402823561222553600, -128);
559 
560         // 1.0
561         check_normalize_to(8388608, -23, 0, 40, 9223372036854775808, -63);
562         check_normalize_to(8388608, -23, 2, 38, 2305843009213693952, -61);
563 
564         // 1e20
565         check_normalize_to(11368684, 43, 0, 40, 12500000250510966784, 3);
566         check_normalize_to(11368684, 43, 2, 38, 3125000062627741696, 5);
567 
568         // max value
569         check_normalize_to(16777213, 104, 0, 40, 18446740775174668288, 64);
570         check_normalize_to(16777213, 104, 2, 38, 4611685193793667072, 66);
571 
572         // F64
573 
574         // min value
575         check_normalize_to(1, -1074, 0, 63, 9223372036854775808, -1137);
576         check_normalize_to(1, -1074, 2, 61, 2305843009213693952, -1135);
577 
578         // 1.0e-250
579         check_normalize_to(6448907850777164, -883, 0, 11, 13207363278391631872, -894);
580         check_normalize_to(6448907850777164, -883, 2, 9, 3301840819597907968, -892);
581 
582         // 1.0e-150
583         check_normalize_to(7371020360979573, -551, 0, 11, 15095849699286165504, -562);
584         check_normalize_to(7371020360979573, -551, 2, 9, 3773962424821541376, -560);
585 
586         // 1.0e-45
587         check_normalize_to(6427752177035961, -202, 0, 11, 13164036458569648128, -213);
588         check_normalize_to(6427752177035961, -202, 2, 9, 3291009114642412032, -211);
589 
590         // 1.0e-40
591         check_normalize_to(4903985730770844, -185, 0, 11, 10043362776618688512, -196);
592         check_normalize_to(4903985730770844, -185, 2, 9, 2510840694154672128, -194);
593 
594         // 1.0e-20
595         check_normalize_to(6646139978924579, -119, 0, 11, 13611294676837537792, -130);
596         check_normalize_to(6646139978924579, -119, 2, 9, 3402823669209384448, -128);
597 
598         // 1.0
599         check_normalize_to(4503599627370496, -52, 0, 11, 9223372036854775808, -63);
600         check_normalize_to(4503599627370496, -52, 2, 9, 2305843009213693952, -61);
601 
602         // 1e20
603         check_normalize_to(6103515625000000, 14, 0 ,11, 12500000000000000000, 3);
604         check_normalize_to(6103515625000000, 14, 2, 9, 3125000000000000000, 5);
605 
606         // 1e40
607         check_normalize_to(8271806125530277, 80, 0, 11, 16940658945086007296, 69);
608         check_normalize_to(8271806125530277, 80, 2, 9, 4235164736271501824, 71);
609 
610         // 1e150
611         check_normalize_to(5503284107318959, 446, 0, 11, 11270725851789228032, 435);
612         check_normalize_to(5503284107318959, 446, 2, 9, 2817681462947307008, 437);
613 
614         // 1e250
615         check_normalize_to(6290184345309700, 778, 0, 11, 12882297539194265600, 767);
616         check_normalize_to(6290184345309700, 778, 2, 9, 3220574384798566400, 769);
617 
618         // max value
619         check_normalize_to(9007199254740991, 971, 0, 11, 18446744073709549568, 960);
620         check_normalize_to(9007199254740991, 971, 2, 9, 4611686018427387392, 962);
621     }
622 
623     #[test]
normalized_boundaries_test()624     fn normalized_boundaries_test() {
625         let fp = ExtendedFloat80 {mant: 4503599627370496, exp: -50};
626         let u = ExtendedFloat80 {mant: 9223372036854775296, exp: -61};
627         let l = ExtendedFloat80 {mant: 9223372036854776832, exp: -61};
628         let (upper, lower) = fp.normalized_boundaries();
629         assert_eq!(upper, u);
630         assert_eq!(lower, l);
631     }
632 
633     // ROUND
634 
check_round_to_f32(mant: u64, exp: i32, r_mant: u64, r_exp: i32)635     fn check_round_to_f32(mant: u64, exp: i32, r_mant: u64, r_exp: i32)
636     {
637         let mut x = ExtendedFloat {mant: mant, exp: exp};
638         x.round_to_f32(round_nearest_tie_even);
639         assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp});
640 
641         let mut x = ExtendedFloat {mant: (mant as u128) << 64, exp: exp-64};
642         x.round_to_f32(round_nearest_tie_even);
643         assert_eq!(x, ExtendedFloat {mant: r_mant as u128, exp: r_exp});
644     }
645 
646     #[test]
round_to_f32_test()647     fn round_to_f32_test() {
648         // This is lossy, so some of these values are **slightly** rounded.
649 
650         // underflow
651         check_round_to_f32(9223372036854775808, -213, 0, -149);
652 
653         // min value
654         check_round_to_f32(9223372036854775808, -212, 1, -149);
655 
656         // 1.0e-40
657         check_round_to_f32(10043308644012916736, -196, 71362, -149);
658 
659         // 1.0e-20
660         check_round_to_f32(13611294244890214400, -130, 12379400, -90);
661 
662         // 1.0
663         check_round_to_f32(9223372036854775808, -63, 8388608, -23);
664 
665         // 1e20
666         check_round_to_f32(12500000250510966784, 3, 11368684, 43);
667 
668         // max value
669         check_round_to_f32(18446740775174668288, 64, 16777213, 104);
670 
671         // overflow
672         check_round_to_f32(18446740775174668288, 65, 16777213, 105);
673     }
674 
check_round_to_f64(mant: u64, exp: i32, r_mant: u64, r_exp: i32)675     fn check_round_to_f64(mant: u64, exp: i32, r_mant: u64, r_exp: i32)
676     {
677         let mut x = ExtendedFloat {mant: mant, exp: exp};
678         x.round_to_f64(round_nearest_tie_even);
679         assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp});
680 
681         let mut x = ExtendedFloat {mant: (mant as u128) << 64, exp: exp-64};
682         x.round_to_f64(round_nearest_tie_even);
683         assert_eq!(x, ExtendedFloat {mant: r_mant as u128, exp: r_exp});
684     }
685 
686     #[test]
round_to_f64_test()687     fn round_to_f64_test() {
688         // This is lossy, so some of these values are **slightly** rounded.
689 
690         // underflow
691         check_round_to_f64(9223372036854775808, -1138, 0, -1074);
692 
693         // min value
694         check_round_to_f64(9223372036854775808, -1137, 1, -1074);
695 
696         // 1.0e-250
697         check_round_to_f64(15095849699286165504, -562, 7371020360979573, -551);
698 
699         // 1.0e-150
700         check_round_to_f64(15095849699286165504, -562, 7371020360979573, -551);
701 
702         // 1.0e-45
703         check_round_to_f64(13164036458569648128, -213, 6427752177035961, -202);
704 
705         // 1.0e-40
706         check_round_to_f64(10043362776618688512, -196, 4903985730770844, -185);
707 
708         // 1.0e-20
709         check_round_to_f64(13611294676837537792, -130, 6646139978924579, -119);
710 
711         // 1.0
712         check_round_to_f64(9223372036854775808, -63, 4503599627370496, -52);
713 
714         // 1e20
715         check_round_to_f64(12500000000000000000, 3, 6103515625000000, 14);
716 
717         // 1e40
718         check_round_to_f64(16940658945086007296, 69, 8271806125530277, 80);
719 
720         // 1e150
721         check_round_to_f64(11270725851789228032, 435, 5503284107318959, 446);
722 
723         // 1e250
724         check_round_to_f64(12882297539194265600, 767, 6290184345309700, 778);
725 
726         // max value
727         check_round_to_f64(18446744073709549568, 960, 9007199254740991, 971);
728 
729         // Bug fixes
730         // 1.2345e-308
731         check_round_to_f64(10234494226754558294, -1086, 2498655817078750, -1074)
732     }
733 
734     // FROM
735 
736     #[test]
from_int_test()737     fn from_int_test() {
738         // 0
739         assert_eq!(ExtendedFloat80::from_u8(0), (0, 0).into());
740         assert_eq!(ExtendedFloat80::from_u16(0), (0, 0).into());
741         assert_eq!(ExtendedFloat80::from_u32(0), (0, 0).into());
742         assert_eq!(ExtendedFloat80::from_u64(0), (0, 0).into());
743         assert_eq!(ExtendedFloat160::from_u128(0), (0, 0).into());
744 
745         // 1
746         assert_eq!(ExtendedFloat80::from_u8(1), (1, 0).into());
747         assert_eq!(ExtendedFloat80::from_u16(1), (1, 0).into());
748         assert_eq!(ExtendedFloat80::from_u32(1), (1, 0).into());
749         assert_eq!(ExtendedFloat80::from_u64(1), (1, 0).into());
750         assert_eq!(ExtendedFloat160::from_u128(1), (1, 0).into());
751 
752         // (2^8-1) 255
753         assert_eq!(ExtendedFloat80::from_u8(255), (255, 0).into());
754         assert_eq!(ExtendedFloat80::from_u16(255), (255, 0).into());
755         assert_eq!(ExtendedFloat80::from_u32(255), (255, 0).into());
756         assert_eq!(ExtendedFloat80::from_u64(255), (255, 0).into());
757         assert_eq!(ExtendedFloat160::from_u128(255), (255, 0).into());
758 
759         // (2^16-1) 65535
760         assert_eq!(ExtendedFloat80::from_u16(65535), (65535, 0).into());
761         assert_eq!(ExtendedFloat80::from_u32(65535), (65535, 0).into());
762         assert_eq!(ExtendedFloat80::from_u64(65535), (65535, 0).into());
763         assert_eq!(ExtendedFloat160::from_u128(65535), (65535, 0).into());
764 
765         // (2^32-1) 4294967295
766         assert_eq!(ExtendedFloat80::from_u32(4294967295), (4294967295, 0).into());
767         assert_eq!(ExtendedFloat80::from_u64(4294967295), (4294967295, 0).into());
768         assert_eq!(ExtendedFloat160::from_u128(4294967295), (4294967295, 0).into());
769 
770         // (2^64-1) 18446744073709551615
771         assert_eq!(ExtendedFloat80::from_u64(18446744073709551615), (18446744073709551615, 0).into());
772         assert_eq!(ExtendedFloat160::from_u128(18446744073709551615), (18446744073709551615, 0).into());
773 
774         // (2^128-1) 340282366920938463463374607431768211455
775         assert_eq!(ExtendedFloat160::from_u128(340282366920938463463374607431768211455), (340282366920938463463374607431768211455, 0).into());
776     }
777 
778     #[test]
from_f32_test()779     fn from_f32_test() {
780         assert_eq!(ExtendedFloat80::from_f32(0.), (0, -149).into());
781         assert_eq!(ExtendedFloat80::from_f32(-0.), (0, -149).into());
782 
783         assert_eq!(ExtendedFloat80::from_f32(1e-45), (1, -149).into());
784         assert_eq!(ExtendedFloat80::from_f32(1e-40), (71362, -149).into());
785         assert_eq!(ExtendedFloat80::from_f32(2e-40), (142725, -149).into());
786         assert_eq!(ExtendedFloat80::from_f32(1e-20), (12379400, -90).into());
787         assert_eq!(ExtendedFloat80::from_f32(2e-20), (12379400, -89).into());
788         assert_eq!(ExtendedFloat80::from_f32(1.0), (8388608, -23).into());
789         assert_eq!(ExtendedFloat80::from_f32(2.0), (8388608, -22).into());
790         assert_eq!(ExtendedFloat80::from_f32(1e20), (11368684, 43).into());
791         assert_eq!(ExtendedFloat80::from_f32(2e20), (11368684, 44).into());
792         assert_eq!(ExtendedFloat80::from_f32(3.402823e38), (16777213, 104).into());
793     }
794 
795     #[test]
from_f64_test()796     fn from_f64_test() {
797         assert_eq!(ExtendedFloat80::from_f64(0.), (0, -1074).into());
798         assert_eq!(ExtendedFloat80::from_f64(-0.), (0, -1074).into());
799         assert_eq!(ExtendedFloat80::from_f64(5e-324), (1, -1074).into());
800         assert_eq!(ExtendedFloat80::from_f64(1e-250), (6448907850777164, -883).into());
801         assert_eq!(ExtendedFloat80::from_f64(1e-150), (7371020360979573, -551).into());
802         assert_eq!(ExtendedFloat80::from_f64(1e-45), (6427752177035961, -202).into());
803         assert_eq!(ExtendedFloat80::from_f64(1e-40), (4903985730770844, -185).into());
804         assert_eq!(ExtendedFloat80::from_f64(2e-40), (4903985730770844, -184).into());
805         assert_eq!(ExtendedFloat80::from_f64(1e-20), (6646139978924579, -119).into());
806         assert_eq!(ExtendedFloat80::from_f64(2e-20), (6646139978924579, -118).into());
807         assert_eq!(ExtendedFloat80::from_f64(1.0), (4503599627370496, -52).into());
808         assert_eq!(ExtendedFloat80::from_f64(2.0), (4503599627370496, -51).into());
809         assert_eq!(ExtendedFloat80::from_f64(1e20), (6103515625000000, 14).into());
810         assert_eq!(ExtendedFloat80::from_f64(2e20), (6103515625000000, 15).into());
811         assert_eq!(ExtendedFloat80::from_f64(1e40), (8271806125530277, 80).into());
812         assert_eq!(ExtendedFloat80::from_f64(2e40), (8271806125530277, 81).into());
813         assert_eq!(ExtendedFloat80::from_f64(1e150), (5503284107318959, 446).into());
814         assert_eq!(ExtendedFloat80::from_f64(1e250), (6290184345309700, 778).into());
815         assert_eq!(ExtendedFloat80::from_f64(1.7976931348623157e308), (9007199254740991, 971).into());
816     }
817 
assert_normalized_eq<M: Mantissa>(mut x: ExtendedFloat<M>, mut y: ExtendedFloat<M>)818     fn assert_normalized_eq<M: Mantissa>(mut x: ExtendedFloat<M>, mut y: ExtendedFloat<M>) {
819         x.normalize();
820         y.normalize();
821         assert_eq!(x, y);
822     }
823 
824     #[test]
from_float()825     fn from_float() {
826         let values: [f32; 26] = [
827             1e-40,
828             2e-40,
829             1e-35,
830             2e-35,
831             1e-30,
832             2e-30,
833             1e-25,
834             2e-25,
835             1e-20,
836             2e-20,
837             1e-15,
838             2e-15,
839             1e-10,
840             2e-10,
841             1e-5,
842             2e-5,
843             1.0,
844             2.0,
845             1e5,
846             2e5,
847             1e10,
848             2e10,
849             1e15,
850             2e15,
851             1e20,
852             2e20,
853         ];
854         for value in values.iter() {
855             assert_normalized_eq(ExtendedFloat80::from_f32(*value), ExtendedFloat80::from_f64(*value as f64));
856             assert_normalized_eq(ExtendedFloat160::from_f32(*value), ExtendedFloat160::from_f64(*value as f64));
857         }
858     }
859 
860     // TO
861 
862     // Sample of interesting numbers to check during standard test builds.
863     const INTEGERS: [u64; 32] = [
864         0,                      // 0x0
865         1,                      // 0x1
866         7,                      // 0x7
867         15,                     // 0xF
868         112,                    // 0x70
869         119,                    // 0x77
870         127,                    // 0x7F
871         240,                    // 0xF0
872         247,                    // 0xF7
873         255,                    // 0xFF
874         2032,                   // 0x7F0
875         2039,                   // 0x7F7
876         2047,                   // 0x7FF
877         4080,                   // 0xFF0
878         4087,                   // 0xFF7
879         4095,                   // 0xFFF
880         65520,                  // 0xFFF0
881         65527,                  // 0xFFF7
882         65535,                  // 0xFFFF
883         1048560,                // 0xFFFF0
884         1048567,                // 0xFFFF7
885         1048575,                // 0xFFFFF
886         16777200,               // 0xFFFFF0
887         16777207,               // 0xFFFFF7
888         16777215,               // 0xFFFFFF
889         268435440,              // 0xFFFFFF0
890         268435447,              // 0xFFFFFF7
891         268435455,              // 0xFFFFFFF
892         4294967280,             // 0xFFFFFFF0
893         4294967287,             // 0xFFFFFFF7
894         4294967295,             // 0xFFFFFFFF
895         18446744073709551615,   // 0xFFFFFFFFFFFFFFFF
896     ];
897 
898     #[test]
to_f32_test()899     fn to_f32_test() {
900         // underflow
901         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -213};
902         assert_eq!(x.into_f32(), 0.0);
903 
904         // min value
905         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -212};
906         assert_eq!(x.into_f32(), 1e-45);
907 
908         // 1.0e-40
909         let x = ExtendedFloat80 {mant: 10043308644012916736, exp: -196};
910         assert_eq!(x.into_f32(), 1e-40);
911 
912         // 1.0e-20
913         let x = ExtendedFloat80 {mant: 13611294244890214400, exp: -130};
914         assert_eq!(x.into_f32(), 1e-20);
915 
916         // 1.0
917         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63};
918         assert_eq!(x.into_f32(), 1.0);
919 
920         // 1e20
921         let x = ExtendedFloat80 {mant: 12500000250510966784, exp: 3};
922         assert_eq!(x.into_f32(), 1e20);
923 
924         // max value
925         let x = ExtendedFloat80 {mant: 18446740775174668288, exp: 64};
926         assert_eq!(x.into_f32(), 3.402823e38);
927 
928         // almost max, high exp
929         let x = ExtendedFloat80 {mant: 1048575, exp: 108};
930         assert_eq!(x.into_f32(), 3.4028204e38);
931 
932         // max value + 1
933         let x = ExtendedFloat80 {mant: 16777216, exp: 104};
934         assert_eq!(x.into_f32(), f32::INFINITY);
935 
936         // max value + 1
937         let x = ExtendedFloat80 {mant: 1048576, exp: 108};
938         assert_eq!(x.into_f32(), f32::INFINITY);
939 
940         // 1e40
941         let x = ExtendedFloat80 {mant: 16940658945086007296, exp: 69};
942         assert_eq!(x.into_f32(), f32::INFINITY);
943 
944         // Integers.
945         for int in INTEGERS.iter() {
946             let fp = ExtendedFloat80 {mant: *int, exp: 0};
947             assert_eq!(fp.into_f32(), *int as f32, "{:?} as f32", *int);
948         }
949     }
950 
951     #[test]
to_f64_test()952     fn to_f64_test() {
953         // underflow
954         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -1138};
955         assert_relative_eq!(x.into_f64(), 0.0);
956 
957         // min value
958         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -1137};
959         assert_relative_eq!(x.into_f64(), 5e-324);
960 
961         // 1.0e-250
962         let x = ExtendedFloat80 {mant: 13207363278391631872, exp: -894};
963         assert_relative_eq!(x.into_f64(), 1e-250);
964 
965         // 1.0e-150
966         let x = ExtendedFloat80 {mant: 15095849699286165504, exp: -562};
967         assert_relative_eq!(x.into_f64(), 1e-150);
968 
969         // 1.0e-45
970         let x = ExtendedFloat80 {mant: 13164036458569648128, exp: -213};
971         assert_relative_eq!(x.into_f64(), 1e-45);
972 
973         // 1.0e-40
974         let x = ExtendedFloat80 {mant: 10043362776618688512, exp: -196};
975         assert_relative_eq!(x.into_f64(), 1e-40);
976 
977         // 1.0e-20
978         let x = ExtendedFloat80 {mant: 13611294676837537792, exp: -130};
979         assert_relative_eq!(x.into_f64(), 1e-20);
980 
981         // 1.0
982         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63};
983         assert_relative_eq!(x.into_f64(), 1.0);
984 
985         // 1e20
986         let x = ExtendedFloat80 {mant: 12500000000000000000, exp: 3};
987         assert_relative_eq!(x.into_f64(), 1e20);
988 
989         // 1e40
990         let x = ExtendedFloat80 {mant: 16940658945086007296, exp: 69};
991         assert_relative_eq!(x.into_f64(), 1e40);
992 
993         // 1e150
994         let x = ExtendedFloat80 {mant: 11270725851789228032, exp: 435};
995         assert_relative_eq!(x.into_f64(), 1e150);
996 
997         // 1e250
998         let x = ExtendedFloat80 {mant: 12882297539194265600, exp: 767};
999         assert_relative_eq!(x.into_f64(), 1e250);
1000 
1001         // max value
1002         let x = ExtendedFloat80 {mant: 9007199254740991, exp: 971};
1003         assert_relative_eq!(x.into_f64(), 1.7976931348623157e308);
1004 
1005         // max value
1006         let x = ExtendedFloat80 {mant: 18446744073709549568, exp: 960};
1007         assert_relative_eq!(x.into_f64(), 1.7976931348623157e308);
1008 
1009         // overflow
1010         let x = ExtendedFloat80 {mant: 9007199254740992, exp: 971};
1011         assert_relative_eq!(x.into_f64(), f64::INFINITY);
1012 
1013         // overflow
1014         let x = ExtendedFloat80 {mant: 18446744073709549568, exp: 961};
1015         assert_relative_eq!(x.into_f64(), f64::INFINITY);
1016 
1017         // Underflow
1018         // Adapted from failures in strtod.
1019         let x = ExtendedFloat80 { exp: -1139, mant: 18446744073709550712 };
1020         assert_relative_eq!(x.into_f64(), 0.0);
1021 
1022         let x = ExtendedFloat80 { exp: -1139, mant: 18446744073709551460 };
1023         assert_relative_eq!(x.into_f64(), 0.0);
1024 
1025         let x = ExtendedFloat80 { exp: -1138, mant: 9223372036854776103 };
1026         assert_relative_eq!(x.into_f64(), 5e-324);
1027 
1028         // Integers.
1029         for int in INTEGERS.iter() {
1030             let fp = ExtendedFloat80 {mant: *int, exp: 0};
1031             assert_eq!(fp.into_f64(), *int as f64, "{:?} as f64", *int);
1032         }
1033     }
1034 
1035     #[test]
to_rounded_f32_test()1036     fn to_rounded_f32_test() {
1037         // Just check it compiles, we already check the underlying algorithms.
1038         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63};
1039         assert_eq!(x.as_rounded_f32(RoundingKind::NearestTieEven, Sign::Positive), 1.0);
1040         assert_eq!(x.as_rounded_f32(RoundingKind::NearestTieAwayZero, Sign::Positive), 1.0);
1041         assert_eq!(x.as_rounded_f32(RoundingKind::TowardPositiveInfinity, Sign::Positive), 1.0);
1042         assert_eq!(x.as_rounded_f32(RoundingKind::TowardNegativeInfinity, Sign::Positive), 1.0);
1043         assert_eq!(x.as_rounded_f32(RoundingKind::TowardZero, Sign::Positive), 1.0);
1044     }
1045 
1046     #[test]
to_rounded_f64_test()1047     fn to_rounded_f64_test() {
1048         // Just check it compiles, we already check the underlying algorithms.
1049         let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63};
1050         assert_eq!(x.as_rounded_f64(RoundingKind::NearestTieEven, Sign::Positive), 1.0);
1051         assert_eq!(x.as_rounded_f64(RoundingKind::NearestTieAwayZero, Sign::Positive), 1.0);
1052         assert_eq!(x.as_rounded_f64(RoundingKind::TowardPositiveInfinity, Sign::Positive), 1.0);
1053         assert_eq!(x.as_rounded_f64(RoundingKind::TowardNegativeInfinity, Sign::Positive), 1.0);
1054         assert_eq!(x.as_rounded_f64(RoundingKind::TowardZero, Sign::Positive), 1.0);
1055     }
1056 
1057     #[test]
1058     #[ignore]
to_f32_full_test()1059     fn to_f32_full_test() {
1060         // Use exhaustive search to ensure both lossy and unlossy items are checked.
1061         // 23-bits of precision, so go from 0-32.
1062         for int in 0..u32::max_value() {
1063             let fp = ExtendedFloat80 {mant: int as u64, exp: 0};
1064             assert_eq!(fp.into_f32(), int as f32, "ExtendedFloat80 {:?} as f32", int);
1065 
1066             let fp = ExtendedFloat160 {mant: int as u128, exp: 0};
1067             assert_eq!(fp.into_f32(), int as f32, "ExtendedFloat160 {:?} as f64", int);
1068         }
1069     }
1070 
1071     #[test]
1072     #[ignore]
to_f64_full_test()1073     fn to_f64_full_test() {
1074         // Use exhaustive search to ensure both lossy and unlossy items are checked.
1075         const U32_MAX: u64 = u32::max_value() as u64;
1076         const POW2_52: u64 = 4503599627370496;
1077         const START: u64 = POW2_52 - U32_MAX / 2;
1078         const END: u64 = START + U32_MAX;
1079         for int in START..END {
1080             let fp = ExtendedFloat80 {mant: int, exp: 0};
1081             assert_eq!(fp.into_f64(), int as f64, "ExtendedFloat80 {:?} as f64", int);
1082 
1083             let fp = ExtendedFloat160 {mant: int as u128, exp: 0};
1084             assert_eq!(fp.into_f64(), int as f64, "ExtendedFloat160 {:?} as f64", int);
1085         }
1086     }
1087 
1088     // OPERATIONS
1089 
check_mul<M: Mantissa>(a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>)1090     fn check_mul<M: Mantissa>(a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>) {
1091         let r = a.mul(&b);
1092         assert_eq!(r, c);
1093     }
1094 
1095     #[test]
mul_test()1096     fn mul_test() {
1097         // Normalized (64-bit mantissa)
1098         let a = ExtendedFloat80 {mant: 13164036458569648128, exp: -213};
1099         let b = ExtendedFloat80 {mant: 9223372036854775808, exp: -62};
1100         let c = ExtendedFloat80 {mant: 6582018229284824064, exp: -211};
1101         check_mul(a, b, c);
1102 
1103         // Normalized (128-bit mantissa)
1104         let a = ExtendedFloat160 {mant: 242833611528216130005140556221773774848, exp: -277};
1105         let b = ExtendedFloat160 {mant: 170141183460469231731687303715884105728, exp: -126};
1106         let c = ExtendedFloat160 {mant: 121416805764108065002570278110886887424, exp: -275};
1107         check_mul(a, b, c);
1108 
1109         // Check with integers
1110         // 64-bit mantissa
1111         let mut a = ExtendedFloat80::from_u8(10);
1112         let mut b = ExtendedFloat80::from_u8(10);
1113         a.normalize();
1114         b.normalize();
1115         assert_eq!(a.mul(&b).into_f64(), 100.0);
1116 
1117         // 128-bit mantissa
1118         let mut a = ExtendedFloat160::from_u8(10);
1119         let mut b = ExtendedFloat160::from_u8(10);
1120         a.normalize();
1121         b.normalize();
1122         assert_eq!(a.mul(&b).into_f64(), 100.0);
1123 
1124         // Check both values need high bits set.
1125         let a = ExtendedFloat80 { mant: 1 << 32, exp: -31 };
1126         let b = ExtendedFloat80 { mant: 1 << 32, exp: -31 };
1127         assert_eq!(a.mul(&b).into_f64(), 4.0);
1128 
1129         // Check both values need high bits set.
1130         let a = ExtendedFloat80 { mant: 10 << 31, exp: -31 };
1131         let b = ExtendedFloat80 { mant: 10 << 31, exp: -31 };
1132         assert_eq!(a.mul(&b).into_f64(), 100.0);
1133     }
1134 
check_imul<M: Mantissa>(mut a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>)1135     fn check_imul<M: Mantissa>(mut a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>) {
1136         a.imul(&b);
1137         assert_eq!(a, c);
1138     }
1139 
1140     #[test]
imul_test()1141     fn imul_test() {
1142         // Normalized (64-bit mantissa)
1143         let a = ExtendedFloat80 {mant: 13164036458569648128, exp: -213};
1144         let b = ExtendedFloat80 {mant: 9223372036854775808, exp: -62};
1145         let c = ExtendedFloat80 {mant: 6582018229284824064, exp: -211};
1146         check_imul(a, b, c);
1147 
1148         // Normalized (128-bit mantissa)
1149         let a = ExtendedFloat160 {mant: 242833611528216130005140556221773774848, exp: -277};
1150         let b = ExtendedFloat160 {mant: 170141183460469231731687303715884105728, exp: -126};
1151         let c = ExtendedFloat160 {mant: 121416805764108065002570278110886887424, exp: -275};
1152         check_imul(a, b, c);
1153 
1154         // Check with integers
1155         // 64-bit mantissa
1156         let mut a = ExtendedFloat80::from_u8(10);
1157         let mut b = ExtendedFloat80::from_u8(10);
1158         a.normalize();
1159         b.normalize();
1160         a.imul(&b);
1161         assert_eq!(a.into_f64(), 100.0);
1162 
1163         // 128-bit mantissa
1164         let mut a = ExtendedFloat160::from_u8(10);
1165         let mut b = ExtendedFloat160::from_u8(10);
1166         a.normalize();
1167         b.normalize();
1168         a.imul(&b);
1169         assert_eq!(a.into_f64(), 100.0);
1170 
1171         // Check both values need high bits set.
1172         let mut a = ExtendedFloat80 { mant: 1 << 32, exp: -31 };
1173         let b = ExtendedFloat80 { mant: 1 << 32, exp: -31 };
1174         a.imul(&b);
1175         assert_eq!(a.into_f64(), 4.0);
1176 
1177         // Check both values need high bits set.
1178         let mut a = ExtendedFloat80 { mant: 10 << 31, exp: -31 };
1179         let b = ExtendedFloat80 { mant: 10 << 31, exp: -31 };
1180         a.imul(&b);
1181         assert_eq!(a.into_f64(), 100.0);
1182     }
1183 }
1184