1 // FLOAT TYPE
2
3 use super::num::*;
4 use super::rounding::*;
5 use super::shift::*;
6
7 /// Extended precision floating-point type.
8 ///
9 /// Private implementation, exposed only for testing purposes.
10 #[doc(hidden)]
11 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
12 pub(crate) struct ExtendedFloat {
13 /// Mantissa for the extended-precision float.
14 pub mant: u64,
15 /// Binary exponent for the extended-precision float.
16 pub exp: i32,
17 }
18
19 impl ExtendedFloat {
20 // PROPERTIES
21
22 // OPERATIONS
23
24 /// Multiply two normalized extended-precision floats, as if by `a*b`.
25 ///
26 /// The precision is maximal when the numbers are normalized, however,
27 /// decent precision will occur as long as both values have high bits
28 /// set. The result is not normalized.
29 ///
30 /// Algorithm:
31 /// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
32 /// 2. Normalization of the result (not done here).
33 /// 3. Addition of exponents.
mul(&self, b: &ExtendedFloat) -> ExtendedFloat34 pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
35 // Logic check, values must be decently normalized prior to multiplication.
36 debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
37
38 // Extract high-and-low masks.
39 let ah = self.mant >> u64::HALF;
40 let al = self.mant & u64::LOMASK;
41 let bh = b.mant >> u64::HALF;
42 let bl = b.mant & u64::LOMASK;
43
44 // Get our products
45 let ah_bl = ah * bl;
46 let al_bh = al * bh;
47 let al_bl = al * bl;
48 let ah_bh = ah * bh;
49
50 let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
51 // round up
52 tmp += 1 << (u64::HALF - 1);
53
54 ExtendedFloat {
55 mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
56 exp: self.exp + b.exp + u64::FULL,
57 }
58 }
59
60 /// Multiply in-place, as if by `a*b`.
61 ///
62 /// The result is not normalized.
63 #[inline]
imul(&mut self, b: &ExtendedFloat)64 pub(crate) fn imul(&mut self, b: &ExtendedFloat) {
65 *self = self.mul(b);
66 }
67
68 // NORMALIZE
69
70 /// Normalize float-point number.
71 ///
72 /// Shift the mantissa so the number of leading zeros is 0, or the value
73 /// itself is 0.
74 ///
75 /// Get the number of bytes shifted.
76 #[inline]
normalize(&mut self) -> u3277 pub(crate) fn normalize(&mut self) -> u32 {
78 // Note:
79 // Using the cltz intrinsic via leading_zeros is way faster (~10x)
80 // than shifting 1-bit at a time, via while loop, and also way
81 // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
82 // 2, and 1 bit.
83 //
84 // Using a modulus of pow2 (which will get optimized to a bitwise
85 // and with 0x3F or faster) is slightly slower than an if/then,
86 // however, removing the if/then will likely optimize more branched
87 // code as it removes conditional logic.
88
89 // Calculate the number of leading zeros, and then zero-out
90 // any overflowing bits, to avoid shl overflow when self.mant == 0.
91 let shift = if self.mant == 0 {
92 0
93 } else {
94 self.mant.leading_zeros()
95 };
96 shl(self, shift as i32);
97 shift
98 }
99
100 // ROUND
101
102 /// Lossy round float-point number to native mantissa boundaries.
103 #[inline]
round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm) where F: Float, Algorithm: FnOnce(&mut ExtendedFloat, i32),104 pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm)
105 where
106 F: Float,
107 Algorithm: FnOnce(&mut ExtendedFloat, i32),
108 {
109 round_to_native::<F, _>(self, algorithm)
110 }
111
112 // FROM
113
114 /// Create extended float from native float.
115 #[inline]
from_float<F: Float>(f: F) -> ExtendedFloat116 pub fn from_float<F: Float>(f: F) -> ExtendedFloat {
117 from_float(f)
118 }
119
120 // INTO
121
122 /// Convert into default-rounded, lower-precision native float.
123 #[inline]
into_float<F: Float>(mut self) -> F124 pub(crate) fn into_float<F: Float>(mut self) -> F {
125 self.round_to_native::<F, _>(round_nearest_tie_even);
126 into_float(self)
127 }
128
129 /// Convert into downward-rounded, lower-precision native float.
130 #[inline]
into_downward_float<F: Float>(mut self) -> F131 pub(crate) fn into_downward_float<F: Float>(mut self) -> F {
132 self.round_to_native::<F, _>(round_downward);
133 into_float(self)
134 }
135 }
136
137 // FROM FLOAT
138
139 // Import ExtendedFloat from native float.
140 #[inline]
from_float<F>(f: F) -> ExtendedFloat where F: Float,141 pub(crate) fn from_float<F>(f: F) -> ExtendedFloat
142 where
143 F: Float,
144 {
145 ExtendedFloat {
146 mant: u64::as_cast(f.mantissa()),
147 exp: f.exponent(),
148 }
149 }
150
151 // INTO FLOAT
152
153 // Export extended-precision float to native float.
154 //
155 // The extended-precision float must be in native float representation,
156 // with overflow/underflow appropriately handled.
157 #[inline]
into_float<F>(fp: ExtendedFloat) -> F where F: Float,158 pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
159 where
160 F: Float,
161 {
162 // Export floating-point number.
163 if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
164 // sub-denormal, underflow
165 F::ZERO
166 } else if fp.exp >= F::MAX_EXPONENT {
167 // overflow
168 F::from_bits(F::INFINITY_BITS)
169 } else {
170 // calculate the exp and fraction bits, and return a float from bits.
171 let exp: u64;
172 if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
173 exp = 0;
174 } else {
175 exp = (fp.exp + F::EXPONENT_BIAS).as_u64();
176 }
177 let exp = exp << F::MANTISSA_SIZE;
178 let mant = fp.mant & F::MANTISSA_MASK.as_u64();
179 F::from_bits(F::Unsigned::as_cast(mant | exp))
180 }
181 }
182
183 // TESTS
184 // -----
185
186 #[cfg(test)]
187 mod tests {
188 use super::*;
189 use crate::lib::{f32, f64};
190
191 // NORMALIZE
192
check_normalize(mant: u64, exp: i32, shift: u32, r_mant: u64, r_exp: i32)193 fn check_normalize(mant: u64, exp: i32, shift: u32, r_mant: u64, r_exp: i32) {
194 let mut x = ExtendedFloat {
195 mant,
196 exp,
197 };
198 assert_eq!(x.normalize(), shift);
199 assert_eq!(
200 x,
201 ExtendedFloat {
202 mant: r_mant,
203 exp: r_exp
204 }
205 );
206 }
207
208 #[test]
normalize_test()209 fn normalize_test() {
210 // F32
211 // 0
212 check_normalize(0, 0, 0, 0, 0);
213
214 // min value
215 check_normalize(1, -149, 63, 9223372036854775808, -212);
216
217 // 1.0e-40
218 check_normalize(71362, -149, 47, 10043308644012916736, -196);
219
220 // 1.0e-20
221 check_normalize(12379400, -90, 40, 13611294244890214400, -130);
222
223 // 1.0
224 check_normalize(8388608, -23, 40, 9223372036854775808, -63);
225
226 // 1e20
227 check_normalize(11368684, 43, 40, 12500000250510966784, 3);
228
229 // max value
230 check_normalize(16777213, 104, 40, 18446740775174668288, 64);
231
232 // F64
233
234 // min value
235 check_normalize(1, -1074, 63, 9223372036854775808, -1137);
236
237 // 1.0e-250
238 check_normalize(6448907850777164, -883, 11, 13207363278391631872, -894);
239
240 // 1.0e-150
241 check_normalize(7371020360979573, -551, 11, 15095849699286165504, -562);
242
243 // 1.0e-45
244 check_normalize(6427752177035961, -202, 11, 13164036458569648128, -213);
245
246 // 1.0e-40
247 check_normalize(4903985730770844, -185, 11, 10043362776618688512, -196);
248
249 // 1.0e-20
250 check_normalize(6646139978924579, -119, 11, 13611294676837537792, -130);
251
252 // 1.0
253 check_normalize(4503599627370496, -52, 11, 9223372036854775808, -63);
254
255 // 1e20
256 check_normalize(6103515625000000, 14, 11, 12500000000000000000, 3);
257
258 // 1e40
259 check_normalize(8271806125530277, 80, 11, 16940658945086007296, 69);
260
261 // 1e150
262 check_normalize(5503284107318959, 446, 11, 11270725851789228032, 435);
263
264 // 1e250
265 check_normalize(6290184345309700, 778, 11, 12882297539194265600, 767);
266
267 // max value
268 check_normalize(9007199254740991, 971, 11, 18446744073709549568, 960);
269 }
270
271 // ROUND
272
check_round_to_f32(mant: u64, exp: i32, r_mant: u64, r_exp: i32)273 fn check_round_to_f32(mant: u64, exp: i32, r_mant: u64, r_exp: i32) {
274 let mut x = ExtendedFloat {
275 mant,
276 exp,
277 };
278 x.round_to_native::<f32, _>(round_nearest_tie_even);
279 assert_eq!(
280 x,
281 ExtendedFloat {
282 mant: r_mant,
283 exp: r_exp
284 }
285 );
286 }
287
288 #[test]
round_to_f32_test()289 fn round_to_f32_test() {
290 // This is lossy, so some of these values are **slightly** rounded.
291
292 // underflow
293 check_round_to_f32(9223372036854775808, -213, 0, -149);
294
295 // min value
296 check_round_to_f32(9223372036854775808, -212, 1, -149);
297
298 // 1.0e-40
299 check_round_to_f32(10043308644012916736, -196, 71362, -149);
300
301 // 1.0e-20
302 check_round_to_f32(13611294244890214400, -130, 12379400, -90);
303
304 // 1.0
305 check_round_to_f32(9223372036854775808, -63, 8388608, -23);
306
307 // 1e20
308 check_round_to_f32(12500000250510966784, 3, 11368684, 43);
309
310 // max value
311 check_round_to_f32(18446740775174668288, 64, 16777213, 104);
312
313 // overflow
314 check_round_to_f32(18446740775174668288, 65, 16777213, 105);
315 }
316
check_round_to_f64(mant: u64, exp: i32, r_mant: u64, r_exp: i32)317 fn check_round_to_f64(mant: u64, exp: i32, r_mant: u64, r_exp: i32) {
318 let mut x = ExtendedFloat {
319 mant,
320 exp,
321 };
322 x.round_to_native::<f64, _>(round_nearest_tie_even);
323 assert_eq!(
324 x,
325 ExtendedFloat {
326 mant: r_mant,
327 exp: r_exp
328 }
329 );
330 }
331
332 #[test]
round_to_f64_test()333 fn round_to_f64_test() {
334 // This is lossy, so some of these values are **slightly** rounded.
335
336 // underflow
337 check_round_to_f64(9223372036854775808, -1138, 0, -1074);
338
339 // min value
340 check_round_to_f64(9223372036854775808, -1137, 1, -1074);
341
342 // 1.0e-250
343 check_round_to_f64(15095849699286165504, -562, 7371020360979573, -551);
344
345 // 1.0e-150
346 check_round_to_f64(15095849699286165504, -562, 7371020360979573, -551);
347
348 // 1.0e-45
349 check_round_to_f64(13164036458569648128, -213, 6427752177035961, -202);
350
351 // 1.0e-40
352 check_round_to_f64(10043362776618688512, -196, 4903985730770844, -185);
353
354 // 1.0e-20
355 check_round_to_f64(13611294676837537792, -130, 6646139978924579, -119);
356
357 // 1.0
358 check_round_to_f64(9223372036854775808, -63, 4503599627370496, -52);
359
360 // 1e20
361 check_round_to_f64(12500000000000000000, 3, 6103515625000000, 14);
362
363 // 1e40
364 check_round_to_f64(16940658945086007296, 69, 8271806125530277, 80);
365
366 // 1e150
367 check_round_to_f64(11270725851789228032, 435, 5503284107318959, 446);
368
369 // 1e250
370 check_round_to_f64(12882297539194265600, 767, 6290184345309700, 778);
371
372 // max value
373 check_round_to_f64(18446744073709549568, 960, 9007199254740991, 971);
374
375 // Bug fixes
376 // 1.2345e-308
377 check_round_to_f64(10234494226754558294, -1086, 2498655817078750, -1074)
378 }
379
assert_normalized_eq(mut x: ExtendedFloat, mut y: ExtendedFloat)380 fn assert_normalized_eq(mut x: ExtendedFloat, mut y: ExtendedFloat) {
381 x.normalize();
382 y.normalize();
383 assert_eq!(x, y);
384 }
385
386 #[test]
from_float()387 fn from_float() {
388 let values: [f32; 26] = [
389 1e-40, 2e-40, 1e-35, 2e-35, 1e-30, 2e-30, 1e-25, 2e-25, 1e-20, 2e-20, 1e-15, 2e-15,
390 1e-10, 2e-10, 1e-5, 2e-5, 1.0, 2.0, 1e5, 2e5, 1e10, 2e10, 1e15, 2e15, 1e20, 2e20,
391 ];
392 for value in values.iter() {
393 assert_normalized_eq(
394 ExtendedFloat::from_float(*value),
395 ExtendedFloat::from_float(*value as f64),
396 );
397 }
398 }
399
400 // TO
401
402 // Sample of interesting numbers to check during standard test builds.
403 const INTEGERS: [u64; 32] = [
404 0, // 0x0
405 1, // 0x1
406 7, // 0x7
407 15, // 0xF
408 112, // 0x70
409 119, // 0x77
410 127, // 0x7F
411 240, // 0xF0
412 247, // 0xF7
413 255, // 0xFF
414 2032, // 0x7F0
415 2039, // 0x7F7
416 2047, // 0x7FF
417 4080, // 0xFF0
418 4087, // 0xFF7
419 4095, // 0xFFF
420 65520, // 0xFFF0
421 65527, // 0xFFF7
422 65535, // 0xFFFF
423 1048560, // 0xFFFF0
424 1048567, // 0xFFFF7
425 1048575, // 0xFFFFF
426 16777200, // 0xFFFFF0
427 16777207, // 0xFFFFF7
428 16777215, // 0xFFFFFF
429 268435440, // 0xFFFFFF0
430 268435447, // 0xFFFFFF7
431 268435455, // 0xFFFFFFF
432 4294967280, // 0xFFFFFFF0
433 4294967287, // 0xFFFFFFF7
434 4294967295, // 0xFFFFFFFF
435 18446744073709551615, // 0xFFFFFFFFFFFFFFFF
436 ];
437
438 #[test]
to_f32_test()439 fn to_f32_test() {
440 // underflow
441 let x = ExtendedFloat {
442 mant: 9223372036854775808,
443 exp: -213,
444 };
445 assert_eq!(x.into_float::<f32>(), 0.0);
446
447 // min value
448 let x = ExtendedFloat {
449 mant: 9223372036854775808,
450 exp: -212,
451 };
452 assert_eq!(x.into_float::<f32>(), 1e-45);
453
454 // 1.0e-40
455 let x = ExtendedFloat {
456 mant: 10043308644012916736,
457 exp: -196,
458 };
459 assert_eq!(x.into_float::<f32>(), 1e-40);
460
461 // 1.0e-20
462 let x = ExtendedFloat {
463 mant: 13611294244890214400,
464 exp: -130,
465 };
466 assert_eq!(x.into_float::<f32>(), 1e-20);
467
468 // 1.0
469 let x = ExtendedFloat {
470 mant: 9223372036854775808,
471 exp: -63,
472 };
473 assert_eq!(x.into_float::<f32>(), 1.0);
474
475 // 1e20
476 let x = ExtendedFloat {
477 mant: 12500000250510966784,
478 exp: 3,
479 };
480 assert_eq!(x.into_float::<f32>(), 1e20);
481
482 // max value
483 let x = ExtendedFloat {
484 mant: 18446740775174668288,
485 exp: 64,
486 };
487 assert_eq!(x.into_float::<f32>(), 3.402823e38);
488
489 // almost max, high exp
490 let x = ExtendedFloat {
491 mant: 1048575,
492 exp: 108,
493 };
494 assert_eq!(x.into_float::<f32>(), 3.4028204e38);
495
496 // max value + 1
497 let x = ExtendedFloat {
498 mant: 16777216,
499 exp: 104,
500 };
501 assert_eq!(x.into_float::<f32>(), f32::INFINITY);
502
503 // max value + 1
504 let x = ExtendedFloat {
505 mant: 1048576,
506 exp: 108,
507 };
508 assert_eq!(x.into_float::<f32>(), f32::INFINITY);
509
510 // 1e40
511 let x = ExtendedFloat {
512 mant: 16940658945086007296,
513 exp: 69,
514 };
515 assert_eq!(x.into_float::<f32>(), f32::INFINITY);
516
517 // Integers.
518 for int in INTEGERS.iter() {
519 let fp = ExtendedFloat {
520 mant: *int,
521 exp: 0,
522 };
523 assert_eq!(fp.into_float::<f32>(), *int as f32, "{:?} as f32", *int);
524 }
525 }
526
527 #[test]
to_f64_test()528 fn to_f64_test() {
529 // underflow
530 let x = ExtendedFloat {
531 mant: 9223372036854775808,
532 exp: -1138,
533 };
534 assert_eq!(x.into_float::<f64>(), 0.0);
535
536 // min value
537 let x = ExtendedFloat {
538 mant: 9223372036854775808,
539 exp: -1137,
540 };
541 assert_eq!(x.into_float::<f64>(), 5e-324);
542
543 // 1.0e-250
544 let x = ExtendedFloat {
545 mant: 13207363278391631872,
546 exp: -894,
547 };
548 assert_eq!(x.into_float::<f64>(), 1e-250);
549
550 // 1.0e-150
551 let x = ExtendedFloat {
552 mant: 15095849699286165504,
553 exp: -562,
554 };
555 assert_eq!(x.into_float::<f64>(), 1e-150);
556
557 // 1.0e-45
558 let x = ExtendedFloat {
559 mant: 13164036458569648128,
560 exp: -213,
561 };
562 assert_eq!(x.into_float::<f64>(), 1e-45);
563
564 // 1.0e-40
565 let x = ExtendedFloat {
566 mant: 10043362776618688512,
567 exp: -196,
568 };
569 assert_eq!(x.into_float::<f64>(), 1e-40);
570
571 // 1.0e-20
572 let x = ExtendedFloat {
573 mant: 13611294676837537792,
574 exp: -130,
575 };
576 assert_eq!(x.into_float::<f64>(), 1e-20);
577
578 // 1.0
579 let x = ExtendedFloat {
580 mant: 9223372036854775808,
581 exp: -63,
582 };
583 assert_eq!(x.into_float::<f64>(), 1.0);
584
585 // 1e20
586 let x = ExtendedFloat {
587 mant: 12500000000000000000,
588 exp: 3,
589 };
590 assert_eq!(x.into_float::<f64>(), 1e20);
591
592 // 1e40
593 let x = ExtendedFloat {
594 mant: 16940658945086007296,
595 exp: 69,
596 };
597 assert_eq!(x.into_float::<f64>(), 1e40);
598
599 // 1e150
600 let x = ExtendedFloat {
601 mant: 11270725851789228032,
602 exp: 435,
603 };
604 assert_eq!(x.into_float::<f64>(), 1e150);
605
606 // 1e250
607 let x = ExtendedFloat {
608 mant: 12882297539194265600,
609 exp: 767,
610 };
611 assert_eq!(x.into_float::<f64>(), 1e250);
612
613 // max value
614 let x = ExtendedFloat {
615 mant: 9007199254740991,
616 exp: 971,
617 };
618 assert_eq!(x.into_float::<f64>(), 1.7976931348623157e308);
619
620 // max value
621 let x = ExtendedFloat {
622 mant: 18446744073709549568,
623 exp: 960,
624 };
625 assert_eq!(x.into_float::<f64>(), 1.7976931348623157e308);
626
627 // overflow
628 let x = ExtendedFloat {
629 mant: 9007199254740992,
630 exp: 971,
631 };
632 assert_eq!(x.into_float::<f64>(), f64::INFINITY);
633
634 // overflow
635 let x = ExtendedFloat {
636 mant: 18446744073709549568,
637 exp: 961,
638 };
639 assert_eq!(x.into_float::<f64>(), f64::INFINITY);
640
641 // Underflow
642 // Adapted from failures in strtod.
643 let x = ExtendedFloat {
644 exp: -1139,
645 mant: 18446744073709550712,
646 };
647 assert_eq!(x.into_float::<f64>(), 0.0);
648
649 let x = ExtendedFloat {
650 exp: -1139,
651 mant: 18446744073709551460,
652 };
653 assert_eq!(x.into_float::<f64>(), 0.0);
654
655 let x = ExtendedFloat {
656 exp: -1138,
657 mant: 9223372036854776103,
658 };
659 assert_eq!(x.into_float::<f64>(), 5e-324);
660
661 // Integers.
662 for int in INTEGERS.iter() {
663 let fp = ExtendedFloat {
664 mant: *int,
665 exp: 0,
666 };
667 assert_eq!(fp.into_float::<f64>(), *int as f64, "{:?} as f64", *int);
668 }
669 }
670
671 // OPERATIONS
672
check_mul(a: ExtendedFloat, b: ExtendedFloat, c: ExtendedFloat)673 fn check_mul(a: ExtendedFloat, b: ExtendedFloat, c: ExtendedFloat) {
674 let r = a.mul(&b);
675 assert_eq!(r, c);
676 }
677
678 #[test]
mul_test()679 fn mul_test() {
680 // Normalized (64-bit mantissa)
681 let a = ExtendedFloat {
682 mant: 13164036458569648128,
683 exp: -213,
684 };
685 let b = ExtendedFloat {
686 mant: 9223372036854775808,
687 exp: -62,
688 };
689 let c = ExtendedFloat {
690 mant: 6582018229284824064,
691 exp: -211,
692 };
693 check_mul(a, b, c);
694
695 // Check with integers
696 // 64-bit mantissa
697 let mut a = ExtendedFloat {
698 mant: 10,
699 exp: 0,
700 };
701 let mut b = ExtendedFloat {
702 mant: 10,
703 exp: 0,
704 };
705 a.normalize();
706 b.normalize();
707 assert_eq!(a.mul(&b).into_float::<f64>(), 100.0);
708
709 // Check both values need high bits set.
710 let a = ExtendedFloat {
711 mant: 1 << 32,
712 exp: -31,
713 };
714 let b = ExtendedFloat {
715 mant: 1 << 32,
716 exp: -31,
717 };
718 assert_eq!(a.mul(&b).into_float::<f64>(), 4.0);
719
720 // Check both values need high bits set.
721 let a = ExtendedFloat {
722 mant: 10 << 31,
723 exp: -31,
724 };
725 let b = ExtendedFloat {
726 mant: 10 << 31,
727 exp: -31,
728 };
729 assert_eq!(a.mul(&b).into_float::<f64>(), 100.0);
730 }
731
check_imul(mut a: ExtendedFloat, b: ExtendedFloat, c: ExtendedFloat)732 fn check_imul(mut a: ExtendedFloat, b: ExtendedFloat, c: ExtendedFloat) {
733 a.imul(&b);
734 assert_eq!(a, c);
735 }
736
737 #[test]
imul_test()738 fn imul_test() {
739 // Normalized (64-bit mantissa)
740 let a = ExtendedFloat {
741 mant: 13164036458569648128,
742 exp: -213,
743 };
744 let b = ExtendedFloat {
745 mant: 9223372036854775808,
746 exp: -62,
747 };
748 let c = ExtendedFloat {
749 mant: 6582018229284824064,
750 exp: -211,
751 };
752 check_imul(a, b, c);
753
754 // Check with integers
755 // 64-bit mantissa
756 let mut a = ExtendedFloat {
757 mant: 10,
758 exp: 0,
759 };
760 let mut b = ExtendedFloat {
761 mant: 10,
762 exp: 0,
763 };
764 a.normalize();
765 b.normalize();
766 a.imul(&b);
767 assert_eq!(a.into_float::<f64>(), 100.0);
768
769 // Check both values need high bits set.
770 let mut a = ExtendedFloat {
771 mant: 1 << 32,
772 exp: -31,
773 };
774 let b = ExtendedFloat {
775 mant: 1 << 32,
776 exp: -31,
777 };
778 a.imul(&b);
779 assert_eq!(a.into_float::<f64>(), 4.0);
780
781 // Check both values need high bits set.
782 let mut a = ExtendedFloat {
783 mant: 10 << 31,
784 exp: -31,
785 };
786 let b = ExtendedFloat {
787 mant: 10 << 31,
788 exp: -31,
789 };
790 a.imul(&b);
791 assert_eq!(a.into_float::<f64>(), 100.0);
792 }
793 }
794