1 //! Extended precision floating-point types. 2 //! 3 //! Also contains helpers to convert to and from native rust floats. 4 //! This representation stores the mantissa as a 64-bit unsigned integer, 5 //! and the exponent as a 32-bit unsigned integer, allowed ~80 bits of 6 //! precision (only 16 bits of the 32-bit integer are used, u32 is used 7 //! for performance). Since there is no storage for the sign bit, 8 //! this only works for positive floats. 9 // Lot of useful algorithms in here, and helper utilities. 10 // We want to make sure this code is not accidentally deleted. 11 #![allow(dead_code)] 12 13 use crate::util::*; 14 use super::convert::*; 15 use super::mantissa::Mantissa; 16 use super::rounding::*; 17 use super::shift::*; 18 19 // FLOAT TYPE 20 21 /// Extended precision floating-point type. 22 /// 23 /// Private implementation, exposed only for testing purposes. 24 #[doc(hidden)] 25 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 26 pub struct ExtendedFloat<M: Mantissa> { 27 /// Mantissa for the extended-precision float. 28 pub mant: M, 29 /// Binary exponent for the extended-precision float. 30 pub exp: i32, 31 } 32 33 impl<M: Mantissa> ExtendedFloat<M> { 34 // PROPERTIES 35 36 perftools_inline!{ 37 /// Get the mantissa component. 38 pub fn mantissa(&self) -> M { 39 self.mant 40 }} 41 42 perftools_inline!{ 43 /// Get the exponent component. 44 pub fn exponent(&self) -> i32 { 45 self.exp 46 }} 47 48 // OPERATIONS 49 50 perftools_inline!{ 51 /// Multiply two normalized extended-precision floats, as if by `a*b`. 52 /// 53 /// The precision is maximal when the numbers are normalized, however, 54 /// decent precision will occur as long as both values have high bits 55 /// set. The result is not normalized. 56 /// 57 /// Algorithm: 58 /// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input). 59 /// 2. Normalization of the result (not done here). 60 /// 3. Addition of exponents. 61 pub fn mul(&self, b: &ExtendedFloat<M>) 62 -> ExtendedFloat<M> 63 { 64 // Logic check, values must be decently normalized prior to multiplication. 65 debug_assert!((self.mant & M::HIMASK != M::ZERO) && (b.mant & M::HIMASK != M::ZERO)); 66 67 // Extract high-and-low masks. 68 let ah = self.mant >> M::HALF; 69 let al = self.mant & M::LOMASK; 70 let bh = b.mant >> M::HALF; 71 let bl = b.mant & M::LOMASK; 72 73 // Get our products 74 let ah_bl = ah * bl; 75 let al_bh = al * bh; 76 let al_bl = al * bl; 77 let ah_bh = ah * bh; 78 79 let mut tmp = (ah_bl & M::LOMASK) + (al_bh & M::LOMASK) + (al_bl >> M::HALF); 80 // round up 81 tmp += M::ONE << (M::HALF-1); 82 83 ExtendedFloat { 84 mant: ah_bh + (ah_bl >> M::HALF) + (al_bh >> M::HALF) + (tmp >> M::HALF), 85 exp: self.exp + b.exp + M::FULL 86 } 87 }} 88 89 perftools_inline!{ 90 /// Multiply in-place, as if by `a*b`. 91 /// 92 /// The result is not normalized. 93 pub fn imul(&mut self, b: &ExtendedFloat<M>) 94 { 95 *self = self.mul(b); 96 }} 97 98 // NORMALIZE 99 100 perftools_inline!{ 101 /// Get if extended-float is normalized, MSB is set. 102 pub fn is_normalized(&self) 103 -> bool 104 { 105 self.mant & M::NORMALIZED_MASK == M::NORMALIZED_MASK 106 }} 107 108 perftools_inline!{ 109 /// Normalize float-point number. 110 /// 111 /// Shift the mantissa so the number of leading zeros is 0, or the value 112 /// itself is 0. 113 /// 114 /// Get the number of bytes shifted. 115 pub fn normalize(&mut self) 116 -> u32 117 { 118 // Note: 119 // Using the cltz intrinsic via leading_zeros is way faster (~10x) 120 // than shifting 1-bit at a time, via while loop, and also way 121 // faster (~2x) than an unrolled loop that checks at 32, 16, 4, 122 // 2, and 1 bit. 123 // 124 // Using a modulus of pow2 (which will get optimized to a bitwise 125 // and with 0x3F or faster) is slightly slower than an if/then, 126 // however, removing the if/then will likely optimize more branched 127 // code as it removes conditional logic. 128 129 // Calculate the number of leading zeros, and then zero-out 130 // any overflowing bits, to avoid shl overflow when self.mant == 0. 131 let shift = if self.mant.is_zero() { 0 } else { self.mant.leading_zeros() }; 132 shl(self, shift); 133 shift 134 }} 135 136 perftools_inline!{ 137 /// Normalize floating-point number to n-bits away from the MSB. 138 /// 139 /// This may lead to lossy rounding, and will not use custom rounding 140 /// rules to accommodate for this. 141 pub fn normalize_to(&mut self, n: u32) 142 -> i32 143 { 144 debug_assert!(n <= M::BITS.as_u32(), "ExtendedFloat::normalize_to() attempting to shift beyond type size."); 145 146 // Get the shift, with any of the higher bits removed. 147 // This way, we can guarantee that we will not overflow 148 // with the shl/shr. 149 let leading = if self.mant.is_zero() { n } else { self.mant.leading_zeros() }; 150 let shift = leading.as_i32() - n.as_i32(); 151 if shift > 0 { 152 // Need to shift left 153 shl(self, shift); 154 } else if shift < 0 { 155 // Need to shift right. 156 shr(self, -shift); 157 } 158 159 shift 160 }} 161 162 perftools_inline!{ 163 /// Get normalized boundaries for float. 164 pub fn normalized_boundaries(&self) 165 -> (ExtendedFloat<M>, ExtendedFloat<M>) 166 { 167 let mut upper = ExtendedFloat { 168 mant: (self.mant << 1) + M::ONE, 169 exp: self.exp - 1, 170 }; 171 upper.normalize(); 172 173 // Use a boolean hack to get 2 if they're equal, else 1, without 174 // any branching. 175 let is_hidden = self.mant == as_cast::<M, _>(f64::HIDDEN_BIT_MASK); 176 let l_shift: i32 = is_hidden as i32 + 1; 177 178 let mut lower = ExtendedFloat { 179 mant: (self.mant << l_shift) - M::ONE, 180 exp: self.exp - l_shift, 181 }; 182 lower.mant <<= lower.exp - upper.exp; 183 lower.exp = upper.exp; 184 185 (lower, upper) 186 }} 187 188 // ROUND 189 190 perftools_inline!{ 191 /// Lossy round float-point number to native mantissa boundaries. 192 pub(crate) fn round_to_native<F, Cb>(&mut self, cb: Cb) 193 where F: FloatRounding<M>, 194 Cb: FnOnce(&mut ExtendedFloat<M>, i32) 195 { 196 round_to_native::<F, M, _>(self, cb) 197 }} 198 199 perftools_inline!{ 200 /// Lossy round float-point number to f32 mantissa boundaries. 201 pub(crate) fn round_to_f32<Cb>(&mut self, cb: Cb) 202 where f32: FloatRounding<M>, 203 Cb: FnOnce(&mut ExtendedFloat<M>, i32) 204 { 205 self.round_to_native::<f32, Cb>(cb) 206 }} 207 208 perftools_inline!{ 209 /// Lossy round float-point number to f64 mantissa boundaries. 210 pub(crate) fn round_to_f64<Cb>(&mut self, cb: Cb) 211 where f64: FloatRounding<M>, 212 Cb: FnOnce(&mut ExtendedFloat<M>, i32) 213 { 214 self.round_to_native::<f64, Cb>(cb) 215 }} 216 217 // FROM 218 219 perftools_inline!{ 220 /// Create extended float from 8-bit unsigned integer. 221 pub fn from_int<T: Integer>(i: T) 222 -> ExtendedFloat<M> 223 { 224 from_int(i) 225 }} 226 227 perftools_inline!{ 228 /// Create extended float from 8-bit unsigned integer. 229 pub fn from_u8(i: u8) 230 -> ExtendedFloat<M> 231 { 232 Self::from_int(i) 233 }} 234 235 perftools_inline!{ 236 /// Create extended float from 16-bit unsigned integer. 237 pub fn from_u16(i: u16) 238 -> ExtendedFloat<M> 239 { 240 Self::from_int(i) 241 }} 242 243 perftools_inline!{ 244 /// Create extended float from 32-bit unsigned integer. 245 pub fn from_u32(i: u32) 246 -> ExtendedFloat<M> 247 { 248 Self::from_int(i) 249 }} 250 251 perftools_inline!{ 252 /// Create extended float from 64-bit unsigned integer. 253 pub fn from_u64(i: u64) 254 -> ExtendedFloat<M> 255 { 256 Self::from_int(i) 257 }} 258 259 perftools_inline!{ 260 /// Create extended float from native float. 261 pub fn from_float<F: Float>(f: F) 262 -> ExtendedFloat<M> 263 { 264 from_float(f) 265 }} 266 267 perftools_inline!{ 268 /// Create extended float from 32-bit float. 269 pub fn from_f32(f: f32) 270 -> ExtendedFloat<M> 271 { 272 Self::from_float(f) 273 }} 274 275 perftools_inline!{ 276 /// Create extended float from 64-bit float. 277 pub fn from_f64(f: f64) 278 -> ExtendedFloat<M> 279 { 280 Self::from_float(f) 281 }} 282 283 // INTO 284 285 perftools_inline!{ 286 /// Convert into lower-precision native float. 287 pub fn into_float<F: FloatRounding<M>>(self) 288 -> F 289 { 290 #[cfg(not(feature = "rounding"))] { 291 self.into_rounded_float::<F>(RoundingKind::NearestTieEven, Sign::Positive) 292 } 293 294 #[cfg(feature = "rounding")] { 295 self.into_rounded_float::<F>(get_float_rounding(), Sign::Positive) 296 } 297 }} 298 299 perftools_inline!{ 300 /// Convert into lower-precision 32-bit float. 301 pub fn into_f32(self) 302 -> f32 303 where f32: FloatRounding<M> 304 { 305 self.into_float() 306 }} 307 308 perftools_inline!{ 309 /// Convert into lower-precision 64-bit float. 310 pub fn into_f64(self) 311 -> f64 312 where f64: FloatRounding<M> 313 { 314 self.into_float() 315 }} 316 317 // INTO ROUNDED 318 319 perftools_inline!{ 320 /// Into rounded float where the rounding kind has been converted. 321 pub(crate) fn into_rounded_float_impl<F>(mut self, kind: RoundingKind) 322 -> F 323 where F: FloatRounding<M> 324 { 325 // Normalize the actual float rounding here. 326 let cb = match kind { 327 RoundingKind::NearestTieEven => round_nearest_tie_even, 328 RoundingKind::NearestTieAwayZero => round_nearest_tie_away_zero, 329 RoundingKind::Upward => round_upward, 330 RoundingKind::Downward => round_downward, 331 _ => unreachable!() 332 }; 333 334 self.round_to_native::<F, _>(cb); 335 into_float(self) 336 }} 337 338 perftools_inline!{ 339 /// Convert into lower-precision native float with custom rounding rules. 340 pub fn into_rounded_float<F>(self, kind: RoundingKind, sign: Sign) 341 -> F 342 where F: FloatRounding<M> 343 { 344 self.into_rounded_float_impl(internal_rounding(kind, sign)) 345 }} 346 347 perftools_inline!{ 348 /// Convert into lower-precision 32-bit float with custom rounding rules. 349 pub fn into_rounded_f32(self, kind: RoundingKind, sign: Sign) 350 -> f32 351 where f32: FloatRounding<M> 352 { 353 self.into_rounded_float(kind, sign) 354 }} 355 356 perftools_inline!{ 357 /// Convert into lower-precision 64-bit float with custom rounding rules. 358 pub fn into_rounded_f64(self, kind: RoundingKind, sign: Sign) 359 -> f64 360 where f64: FloatRounding<M> 361 { 362 self.into_rounded_float(kind, sign) 363 }} 364 365 // AS 366 367 perftools_inline!{ 368 /// Convert to lower-precision native float. 369 pub fn as_float<F: FloatRounding<M>>(&self) 370 -> F 371 { 372 self.clone().into_float::<F>() 373 }} 374 375 perftools_inline!{ 376 /// Convert to lower-precision 32-bit float. 377 pub fn as_f32(&self) 378 -> f32 379 where f32: FloatRounding<M> 380 { 381 self.as_float() 382 }} 383 384 perftools_inline!{ 385 /// Convert to lower-precision 64-bit float. 386 pub fn as_f64(&self) 387 -> f64 388 where f64: FloatRounding<M> 389 { 390 self.as_float() 391 }} 392 393 // AS ROUNDED 394 395 perftools_inline!{ 396 /// Convert to lower-precision native float with custom rounding rules. 397 pub fn as_rounded_float<F>(&self, kind: RoundingKind, sign: Sign) 398 -> F 399 where F: FloatRounding<M> 400 { 401 self.clone().into_rounded_float::<F>(kind, sign) 402 }} 403 404 perftools_inline!{ 405 /// Convert to lower-precision 32-bit float with custom rounding rules. 406 pub fn as_rounded_f32(&self, kind: RoundingKind, sign: Sign) 407 -> f32 408 where f32: FloatRounding<M> 409 { 410 self.as_rounded_float(kind, sign) 411 }} 412 413 perftools_inline!{ 414 /// Convert to lower-precision 64-bit float with custom rounding rules. 415 pub fn as_rounded_f64(&self, kind: RoundingKind, sign: Sign) 416 -> f64 417 where f64: FloatRounding<M> 418 { 419 self.as_rounded_float(kind, sign) 420 }} 421 } 422 423 impl ExtendedFloat<u128> { 424 perftools_inline!{ 425 /// Create extended float from 64-bit unsigned integer. 426 pub fn from_u128(i: u128) -> ExtendedFloat<u128> { 427 Self::from_int(i) 428 }} 429 } 430 431 // ALIASES 432 433 /// Alias with ~80 bits of precision, 64 for the mantissa and 16 for exponent. 434 pub type ExtendedFloat80 = ExtendedFloat<u64>; 435 436 /// Alias with ~160 bits of precision, 128 for the mantissa and 32 for exponent. 437 pub type ExtendedFloat160 = ExtendedFloat<u128>; 438 439 // TESTS 440 // ----- 441 442 #[cfg(test)] 443 mod tests { 444 use super::*; 445 446 use approx::assert_relative_eq; 447 448 // NORMALIZE 449 check_normalize(mant: u64, exp: i32, shift: u32, r_mant: u64, r_exp: i32)450 fn check_normalize(mant: u64, exp: i32, shift: u32, r_mant: u64, r_exp: i32) { 451 let mut x = ExtendedFloat {mant: mant, exp: exp}; 452 assert!(!x.is_normalized()); 453 assert_eq!(x.normalize(), shift); 454 assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp}); 455 assert!(x.is_normalized() || x.mant.is_zero()); 456 457 let mut x = ExtendedFloat {mant: mant as u128, exp: exp}; 458 let shift = if shift == 0 { 0 } else { shift+64 }; 459 let r_exp = if r_exp == 0 { 0 } else { r_exp-64 }; 460 assert!(!x.is_normalized()); 461 assert_eq!(x.normalize(), shift); 462 assert_eq!(x, ExtendedFloat {mant: (r_mant as u128) << 64, exp: r_exp}); 463 assert!(x.is_normalized() || x.mant.is_zero()); 464 } 465 466 #[test] normalize_test()467 fn normalize_test() { 468 // F32 469 // 0 470 check_normalize(0, 0, 0, 0, 0); 471 472 // min value 473 check_normalize(1, -149, 63, 9223372036854775808, -212); 474 475 // 1.0e-40 476 check_normalize(71362, -149, 47, 10043308644012916736, -196); 477 478 // 1.0e-20 479 check_normalize(12379400, -90, 40, 13611294244890214400, -130); 480 481 // 1.0 482 check_normalize(8388608, -23, 40, 9223372036854775808, -63); 483 484 // 1e20 485 check_normalize(11368684, 43, 40, 12500000250510966784, 3); 486 487 // max value 488 check_normalize(16777213, 104, 40, 18446740775174668288, 64); 489 490 // F64 491 492 // min value 493 check_normalize(1, -1074, 63, 9223372036854775808, -1137); 494 495 // 1.0e-250 496 check_normalize(6448907850777164, -883, 11, 13207363278391631872, -894); 497 498 // 1.0e-150 499 check_normalize(7371020360979573, -551, 11, 15095849699286165504, -562); 500 501 // 1.0e-45 502 check_normalize(6427752177035961, -202, 11, 13164036458569648128, -213); 503 504 // 1.0e-40 505 check_normalize(4903985730770844, -185, 11, 10043362776618688512, -196); 506 507 // 1.0e-20 508 check_normalize(6646139978924579, -119, 11, 13611294676837537792, -130); 509 510 // 1.0 511 check_normalize(4503599627370496, -52, 11, 9223372036854775808, -63); 512 513 // 1e20 514 check_normalize(6103515625000000, 14, 11, 12500000000000000000, 3); 515 516 // 1e40 517 check_normalize(8271806125530277, 80, 11, 16940658945086007296, 69); 518 519 // 1e150 520 check_normalize(5503284107318959, 446, 11, 11270725851789228032, 435); 521 522 // 1e250 523 check_normalize(6290184345309700, 778, 11, 12882297539194265600, 767); 524 525 // max value 526 check_normalize(9007199254740991, 971, 11, 18446744073709549568, 960); 527 } 528 check_normalize_to(mant: u64, exp: i32, n: u32, shift: i32, r_mant: u64, r_exp: i32)529 fn check_normalize_to(mant: u64, exp: i32, n: u32, shift: i32, r_mant: u64, r_exp: i32) { 530 let mut x = ExtendedFloat {mant: mant, exp: exp}; 531 assert_eq!(x.normalize_to(n), shift); 532 assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp}); 533 534 let mut x = ExtendedFloat {mant: mant as u128, exp: exp}; 535 let shift = if shift == 0 { 0 } else { shift+64 }; 536 let r_exp = if r_exp == 0 { 0 } else { r_exp-64 }; 537 assert_eq!(x.normalize_to(n), shift); 538 assert_eq!(x, ExtendedFloat {mant: (r_mant as u128) << 64, exp: r_exp}); 539 } 540 541 #[test] normalize_to_test()542 fn normalize_to_test() { 543 // F32 544 // 0 545 check_normalize_to(0, 0, 0, 0, 0, 0); 546 check_normalize_to(0, 0, 2, 0, 0, 0); 547 548 // min value 549 check_normalize_to(1, -149, 0, 63, 9223372036854775808, -212); 550 check_normalize_to(1, -149, 2, 61, 2305843009213693952, -210); 551 552 // 1.0e-40 553 check_normalize_to(71362, -149, 0, 47, 10043308644012916736, -196); 554 check_normalize_to(71362, -149, 2, 45, 2510827161003229184, -194); 555 556 // 1.0e-20 557 check_normalize_to(12379400, -90, 0, 40, 13611294244890214400, -130); 558 check_normalize_to(12379400, -90, 2, 38, 3402823561222553600, -128); 559 560 // 1.0 561 check_normalize_to(8388608, -23, 0, 40, 9223372036854775808, -63); 562 check_normalize_to(8388608, -23, 2, 38, 2305843009213693952, -61); 563 564 // 1e20 565 check_normalize_to(11368684, 43, 0, 40, 12500000250510966784, 3); 566 check_normalize_to(11368684, 43, 2, 38, 3125000062627741696, 5); 567 568 // max value 569 check_normalize_to(16777213, 104, 0, 40, 18446740775174668288, 64); 570 check_normalize_to(16777213, 104, 2, 38, 4611685193793667072, 66); 571 572 // F64 573 574 // min value 575 check_normalize_to(1, -1074, 0, 63, 9223372036854775808, -1137); 576 check_normalize_to(1, -1074, 2, 61, 2305843009213693952, -1135); 577 578 // 1.0e-250 579 check_normalize_to(6448907850777164, -883, 0, 11, 13207363278391631872, -894); 580 check_normalize_to(6448907850777164, -883, 2, 9, 3301840819597907968, -892); 581 582 // 1.0e-150 583 check_normalize_to(7371020360979573, -551, 0, 11, 15095849699286165504, -562); 584 check_normalize_to(7371020360979573, -551, 2, 9, 3773962424821541376, -560); 585 586 // 1.0e-45 587 check_normalize_to(6427752177035961, -202, 0, 11, 13164036458569648128, -213); 588 check_normalize_to(6427752177035961, -202, 2, 9, 3291009114642412032, -211); 589 590 // 1.0e-40 591 check_normalize_to(4903985730770844, -185, 0, 11, 10043362776618688512, -196); 592 check_normalize_to(4903985730770844, -185, 2, 9, 2510840694154672128, -194); 593 594 // 1.0e-20 595 check_normalize_to(6646139978924579, -119, 0, 11, 13611294676837537792, -130); 596 check_normalize_to(6646139978924579, -119, 2, 9, 3402823669209384448, -128); 597 598 // 1.0 599 check_normalize_to(4503599627370496, -52, 0, 11, 9223372036854775808, -63); 600 check_normalize_to(4503599627370496, -52, 2, 9, 2305843009213693952, -61); 601 602 // 1e20 603 check_normalize_to(6103515625000000, 14, 0 ,11, 12500000000000000000, 3); 604 check_normalize_to(6103515625000000, 14, 2, 9, 3125000000000000000, 5); 605 606 // 1e40 607 check_normalize_to(8271806125530277, 80, 0, 11, 16940658945086007296, 69); 608 check_normalize_to(8271806125530277, 80, 2, 9, 4235164736271501824, 71); 609 610 // 1e150 611 check_normalize_to(5503284107318959, 446, 0, 11, 11270725851789228032, 435); 612 check_normalize_to(5503284107318959, 446, 2, 9, 2817681462947307008, 437); 613 614 // 1e250 615 check_normalize_to(6290184345309700, 778, 0, 11, 12882297539194265600, 767); 616 check_normalize_to(6290184345309700, 778, 2, 9, 3220574384798566400, 769); 617 618 // max value 619 check_normalize_to(9007199254740991, 971, 0, 11, 18446744073709549568, 960); 620 check_normalize_to(9007199254740991, 971, 2, 9, 4611686018427387392, 962); 621 } 622 623 #[test] normalized_boundaries_test()624 fn normalized_boundaries_test() { 625 let fp = ExtendedFloat80 {mant: 4503599627370496, exp: -50}; 626 let u = ExtendedFloat80 {mant: 9223372036854775296, exp: -61}; 627 let l = ExtendedFloat80 {mant: 9223372036854776832, exp: -61}; 628 let (upper, lower) = fp.normalized_boundaries(); 629 assert_eq!(upper, u); 630 assert_eq!(lower, l); 631 } 632 633 // ROUND 634 check_round_to_f32(mant: u64, exp: i32, r_mant: u64, r_exp: i32)635 fn check_round_to_f32(mant: u64, exp: i32, r_mant: u64, r_exp: i32) 636 { 637 let mut x = ExtendedFloat {mant: mant, exp: exp}; 638 x.round_to_f32(round_nearest_tie_even); 639 assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp}); 640 641 let mut x = ExtendedFloat {mant: (mant as u128) << 64, exp: exp-64}; 642 x.round_to_f32(round_nearest_tie_even); 643 assert_eq!(x, ExtendedFloat {mant: r_mant as u128, exp: r_exp}); 644 } 645 646 #[test] round_to_f32_test()647 fn round_to_f32_test() { 648 // This is lossy, so some of these values are **slightly** rounded. 649 650 // underflow 651 check_round_to_f32(9223372036854775808, -213, 0, -149); 652 653 // min value 654 check_round_to_f32(9223372036854775808, -212, 1, -149); 655 656 // 1.0e-40 657 check_round_to_f32(10043308644012916736, -196, 71362, -149); 658 659 // 1.0e-20 660 check_round_to_f32(13611294244890214400, -130, 12379400, -90); 661 662 // 1.0 663 check_round_to_f32(9223372036854775808, -63, 8388608, -23); 664 665 // 1e20 666 check_round_to_f32(12500000250510966784, 3, 11368684, 43); 667 668 // max value 669 check_round_to_f32(18446740775174668288, 64, 16777213, 104); 670 671 // overflow 672 check_round_to_f32(18446740775174668288, 65, 16777213, 105); 673 } 674 check_round_to_f64(mant: u64, exp: i32, r_mant: u64, r_exp: i32)675 fn check_round_to_f64(mant: u64, exp: i32, r_mant: u64, r_exp: i32) 676 { 677 let mut x = ExtendedFloat {mant: mant, exp: exp}; 678 x.round_to_f64(round_nearest_tie_even); 679 assert_eq!(x, ExtendedFloat {mant: r_mant, exp: r_exp}); 680 681 let mut x = ExtendedFloat {mant: (mant as u128) << 64, exp: exp-64}; 682 x.round_to_f64(round_nearest_tie_even); 683 assert_eq!(x, ExtendedFloat {mant: r_mant as u128, exp: r_exp}); 684 } 685 686 #[test] round_to_f64_test()687 fn round_to_f64_test() { 688 // This is lossy, so some of these values are **slightly** rounded. 689 690 // underflow 691 check_round_to_f64(9223372036854775808, -1138, 0, -1074); 692 693 // min value 694 check_round_to_f64(9223372036854775808, -1137, 1, -1074); 695 696 // 1.0e-250 697 check_round_to_f64(15095849699286165504, -562, 7371020360979573, -551); 698 699 // 1.0e-150 700 check_round_to_f64(15095849699286165504, -562, 7371020360979573, -551); 701 702 // 1.0e-45 703 check_round_to_f64(13164036458569648128, -213, 6427752177035961, -202); 704 705 // 1.0e-40 706 check_round_to_f64(10043362776618688512, -196, 4903985730770844, -185); 707 708 // 1.0e-20 709 check_round_to_f64(13611294676837537792, -130, 6646139978924579, -119); 710 711 // 1.0 712 check_round_to_f64(9223372036854775808, -63, 4503599627370496, -52); 713 714 // 1e20 715 check_round_to_f64(12500000000000000000, 3, 6103515625000000, 14); 716 717 // 1e40 718 check_round_to_f64(16940658945086007296, 69, 8271806125530277, 80); 719 720 // 1e150 721 check_round_to_f64(11270725851789228032, 435, 5503284107318959, 446); 722 723 // 1e250 724 check_round_to_f64(12882297539194265600, 767, 6290184345309700, 778); 725 726 // max value 727 check_round_to_f64(18446744073709549568, 960, 9007199254740991, 971); 728 729 // Bug fixes 730 // 1.2345e-308 731 check_round_to_f64(10234494226754558294, -1086, 2498655817078750, -1074) 732 } 733 734 // FROM 735 736 #[test] from_int_test()737 fn from_int_test() { 738 // 0 739 assert_eq!(ExtendedFloat80::from_u8(0), (0, 0).into()); 740 assert_eq!(ExtendedFloat80::from_u16(0), (0, 0).into()); 741 assert_eq!(ExtendedFloat80::from_u32(0), (0, 0).into()); 742 assert_eq!(ExtendedFloat80::from_u64(0), (0, 0).into()); 743 assert_eq!(ExtendedFloat160::from_u128(0), (0, 0).into()); 744 745 // 1 746 assert_eq!(ExtendedFloat80::from_u8(1), (1, 0).into()); 747 assert_eq!(ExtendedFloat80::from_u16(1), (1, 0).into()); 748 assert_eq!(ExtendedFloat80::from_u32(1), (1, 0).into()); 749 assert_eq!(ExtendedFloat80::from_u64(1), (1, 0).into()); 750 assert_eq!(ExtendedFloat160::from_u128(1), (1, 0).into()); 751 752 // (2^8-1) 255 753 assert_eq!(ExtendedFloat80::from_u8(255), (255, 0).into()); 754 assert_eq!(ExtendedFloat80::from_u16(255), (255, 0).into()); 755 assert_eq!(ExtendedFloat80::from_u32(255), (255, 0).into()); 756 assert_eq!(ExtendedFloat80::from_u64(255), (255, 0).into()); 757 assert_eq!(ExtendedFloat160::from_u128(255), (255, 0).into()); 758 759 // (2^16-1) 65535 760 assert_eq!(ExtendedFloat80::from_u16(65535), (65535, 0).into()); 761 assert_eq!(ExtendedFloat80::from_u32(65535), (65535, 0).into()); 762 assert_eq!(ExtendedFloat80::from_u64(65535), (65535, 0).into()); 763 assert_eq!(ExtendedFloat160::from_u128(65535), (65535, 0).into()); 764 765 // (2^32-1) 4294967295 766 assert_eq!(ExtendedFloat80::from_u32(4294967295), (4294967295, 0).into()); 767 assert_eq!(ExtendedFloat80::from_u64(4294967295), (4294967295, 0).into()); 768 assert_eq!(ExtendedFloat160::from_u128(4294967295), (4294967295, 0).into()); 769 770 // (2^64-1) 18446744073709551615 771 assert_eq!(ExtendedFloat80::from_u64(18446744073709551615), (18446744073709551615, 0).into()); 772 assert_eq!(ExtendedFloat160::from_u128(18446744073709551615), (18446744073709551615, 0).into()); 773 774 // (2^128-1) 340282366920938463463374607431768211455 775 assert_eq!(ExtendedFloat160::from_u128(340282366920938463463374607431768211455), (340282366920938463463374607431768211455, 0).into()); 776 } 777 778 #[test] from_f32_test()779 fn from_f32_test() { 780 assert_eq!(ExtendedFloat80::from_f32(0.), (0, -149).into()); 781 assert_eq!(ExtendedFloat80::from_f32(-0.), (0, -149).into()); 782 783 assert_eq!(ExtendedFloat80::from_f32(1e-45), (1, -149).into()); 784 assert_eq!(ExtendedFloat80::from_f32(1e-40), (71362, -149).into()); 785 assert_eq!(ExtendedFloat80::from_f32(2e-40), (142725, -149).into()); 786 assert_eq!(ExtendedFloat80::from_f32(1e-20), (12379400, -90).into()); 787 assert_eq!(ExtendedFloat80::from_f32(2e-20), (12379400, -89).into()); 788 assert_eq!(ExtendedFloat80::from_f32(1.0), (8388608, -23).into()); 789 assert_eq!(ExtendedFloat80::from_f32(2.0), (8388608, -22).into()); 790 assert_eq!(ExtendedFloat80::from_f32(1e20), (11368684, 43).into()); 791 assert_eq!(ExtendedFloat80::from_f32(2e20), (11368684, 44).into()); 792 assert_eq!(ExtendedFloat80::from_f32(3.402823e38), (16777213, 104).into()); 793 } 794 795 #[test] from_f64_test()796 fn from_f64_test() { 797 assert_eq!(ExtendedFloat80::from_f64(0.), (0, -1074).into()); 798 assert_eq!(ExtendedFloat80::from_f64(-0.), (0, -1074).into()); 799 assert_eq!(ExtendedFloat80::from_f64(5e-324), (1, -1074).into()); 800 assert_eq!(ExtendedFloat80::from_f64(1e-250), (6448907850777164, -883).into()); 801 assert_eq!(ExtendedFloat80::from_f64(1e-150), (7371020360979573, -551).into()); 802 assert_eq!(ExtendedFloat80::from_f64(1e-45), (6427752177035961, -202).into()); 803 assert_eq!(ExtendedFloat80::from_f64(1e-40), (4903985730770844, -185).into()); 804 assert_eq!(ExtendedFloat80::from_f64(2e-40), (4903985730770844, -184).into()); 805 assert_eq!(ExtendedFloat80::from_f64(1e-20), (6646139978924579, -119).into()); 806 assert_eq!(ExtendedFloat80::from_f64(2e-20), (6646139978924579, -118).into()); 807 assert_eq!(ExtendedFloat80::from_f64(1.0), (4503599627370496, -52).into()); 808 assert_eq!(ExtendedFloat80::from_f64(2.0), (4503599627370496, -51).into()); 809 assert_eq!(ExtendedFloat80::from_f64(1e20), (6103515625000000, 14).into()); 810 assert_eq!(ExtendedFloat80::from_f64(2e20), (6103515625000000, 15).into()); 811 assert_eq!(ExtendedFloat80::from_f64(1e40), (8271806125530277, 80).into()); 812 assert_eq!(ExtendedFloat80::from_f64(2e40), (8271806125530277, 81).into()); 813 assert_eq!(ExtendedFloat80::from_f64(1e150), (5503284107318959, 446).into()); 814 assert_eq!(ExtendedFloat80::from_f64(1e250), (6290184345309700, 778).into()); 815 assert_eq!(ExtendedFloat80::from_f64(1.7976931348623157e308), (9007199254740991, 971).into()); 816 } 817 assert_normalized_eq<M: Mantissa>(mut x: ExtendedFloat<M>, mut y: ExtendedFloat<M>)818 fn assert_normalized_eq<M: Mantissa>(mut x: ExtendedFloat<M>, mut y: ExtendedFloat<M>) { 819 x.normalize(); 820 y.normalize(); 821 assert_eq!(x, y); 822 } 823 824 #[test] from_float()825 fn from_float() { 826 let values: [f32; 26] = [ 827 1e-40, 828 2e-40, 829 1e-35, 830 2e-35, 831 1e-30, 832 2e-30, 833 1e-25, 834 2e-25, 835 1e-20, 836 2e-20, 837 1e-15, 838 2e-15, 839 1e-10, 840 2e-10, 841 1e-5, 842 2e-5, 843 1.0, 844 2.0, 845 1e5, 846 2e5, 847 1e10, 848 2e10, 849 1e15, 850 2e15, 851 1e20, 852 2e20, 853 ]; 854 for value in values.iter() { 855 assert_normalized_eq(ExtendedFloat80::from_f32(*value), ExtendedFloat80::from_f64(*value as f64)); 856 assert_normalized_eq(ExtendedFloat160::from_f32(*value), ExtendedFloat160::from_f64(*value as f64)); 857 } 858 } 859 860 // TO 861 862 // Sample of interesting numbers to check during standard test builds. 863 const INTEGERS: [u64; 32] = [ 864 0, // 0x0 865 1, // 0x1 866 7, // 0x7 867 15, // 0xF 868 112, // 0x70 869 119, // 0x77 870 127, // 0x7F 871 240, // 0xF0 872 247, // 0xF7 873 255, // 0xFF 874 2032, // 0x7F0 875 2039, // 0x7F7 876 2047, // 0x7FF 877 4080, // 0xFF0 878 4087, // 0xFF7 879 4095, // 0xFFF 880 65520, // 0xFFF0 881 65527, // 0xFFF7 882 65535, // 0xFFFF 883 1048560, // 0xFFFF0 884 1048567, // 0xFFFF7 885 1048575, // 0xFFFFF 886 16777200, // 0xFFFFF0 887 16777207, // 0xFFFFF7 888 16777215, // 0xFFFFFF 889 268435440, // 0xFFFFFF0 890 268435447, // 0xFFFFFF7 891 268435455, // 0xFFFFFFF 892 4294967280, // 0xFFFFFFF0 893 4294967287, // 0xFFFFFFF7 894 4294967295, // 0xFFFFFFFF 895 18446744073709551615, // 0xFFFFFFFFFFFFFFFF 896 ]; 897 898 #[test] to_f32_test()899 fn to_f32_test() { 900 // underflow 901 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -213}; 902 assert_eq!(x.into_f32(), 0.0); 903 904 // min value 905 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -212}; 906 assert_eq!(x.into_f32(), 1e-45); 907 908 // 1.0e-40 909 let x = ExtendedFloat80 {mant: 10043308644012916736, exp: -196}; 910 assert_eq!(x.into_f32(), 1e-40); 911 912 // 1.0e-20 913 let x = ExtendedFloat80 {mant: 13611294244890214400, exp: -130}; 914 assert_eq!(x.into_f32(), 1e-20); 915 916 // 1.0 917 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63}; 918 assert_eq!(x.into_f32(), 1.0); 919 920 // 1e20 921 let x = ExtendedFloat80 {mant: 12500000250510966784, exp: 3}; 922 assert_eq!(x.into_f32(), 1e20); 923 924 // max value 925 let x = ExtendedFloat80 {mant: 18446740775174668288, exp: 64}; 926 assert_eq!(x.into_f32(), 3.402823e38); 927 928 // almost max, high exp 929 let x = ExtendedFloat80 {mant: 1048575, exp: 108}; 930 assert_eq!(x.into_f32(), 3.4028204e38); 931 932 // max value + 1 933 let x = ExtendedFloat80 {mant: 16777216, exp: 104}; 934 assert_eq!(x.into_f32(), f32::INFINITY); 935 936 // max value + 1 937 let x = ExtendedFloat80 {mant: 1048576, exp: 108}; 938 assert_eq!(x.into_f32(), f32::INFINITY); 939 940 // 1e40 941 let x = ExtendedFloat80 {mant: 16940658945086007296, exp: 69}; 942 assert_eq!(x.into_f32(), f32::INFINITY); 943 944 // Integers. 945 for int in INTEGERS.iter() { 946 let fp = ExtendedFloat80 {mant: *int, exp: 0}; 947 assert_eq!(fp.into_f32(), *int as f32, "{:?} as f32", *int); 948 } 949 } 950 951 #[test] to_f64_test()952 fn to_f64_test() { 953 // underflow 954 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -1138}; 955 assert_relative_eq!(x.into_f64(), 0.0); 956 957 // min value 958 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -1137}; 959 assert_relative_eq!(x.into_f64(), 5e-324); 960 961 // 1.0e-250 962 let x = ExtendedFloat80 {mant: 13207363278391631872, exp: -894}; 963 assert_relative_eq!(x.into_f64(), 1e-250); 964 965 // 1.0e-150 966 let x = ExtendedFloat80 {mant: 15095849699286165504, exp: -562}; 967 assert_relative_eq!(x.into_f64(), 1e-150); 968 969 // 1.0e-45 970 let x = ExtendedFloat80 {mant: 13164036458569648128, exp: -213}; 971 assert_relative_eq!(x.into_f64(), 1e-45); 972 973 // 1.0e-40 974 let x = ExtendedFloat80 {mant: 10043362776618688512, exp: -196}; 975 assert_relative_eq!(x.into_f64(), 1e-40); 976 977 // 1.0e-20 978 let x = ExtendedFloat80 {mant: 13611294676837537792, exp: -130}; 979 assert_relative_eq!(x.into_f64(), 1e-20); 980 981 // 1.0 982 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63}; 983 assert_relative_eq!(x.into_f64(), 1.0); 984 985 // 1e20 986 let x = ExtendedFloat80 {mant: 12500000000000000000, exp: 3}; 987 assert_relative_eq!(x.into_f64(), 1e20); 988 989 // 1e40 990 let x = ExtendedFloat80 {mant: 16940658945086007296, exp: 69}; 991 assert_relative_eq!(x.into_f64(), 1e40); 992 993 // 1e150 994 let x = ExtendedFloat80 {mant: 11270725851789228032, exp: 435}; 995 assert_relative_eq!(x.into_f64(), 1e150); 996 997 // 1e250 998 let x = ExtendedFloat80 {mant: 12882297539194265600, exp: 767}; 999 assert_relative_eq!(x.into_f64(), 1e250); 1000 1001 // max value 1002 let x = ExtendedFloat80 {mant: 9007199254740991, exp: 971}; 1003 assert_relative_eq!(x.into_f64(), 1.7976931348623157e308); 1004 1005 // max value 1006 let x = ExtendedFloat80 {mant: 18446744073709549568, exp: 960}; 1007 assert_relative_eq!(x.into_f64(), 1.7976931348623157e308); 1008 1009 // overflow 1010 let x = ExtendedFloat80 {mant: 9007199254740992, exp: 971}; 1011 assert_relative_eq!(x.into_f64(), f64::INFINITY); 1012 1013 // overflow 1014 let x = ExtendedFloat80 {mant: 18446744073709549568, exp: 961}; 1015 assert_relative_eq!(x.into_f64(), f64::INFINITY); 1016 1017 // Underflow 1018 // Adapted from failures in strtod. 1019 let x = ExtendedFloat80 { exp: -1139, mant: 18446744073709550712 }; 1020 assert_relative_eq!(x.into_f64(), 0.0); 1021 1022 let x = ExtendedFloat80 { exp: -1139, mant: 18446744073709551460 }; 1023 assert_relative_eq!(x.into_f64(), 0.0); 1024 1025 let x = ExtendedFloat80 { exp: -1138, mant: 9223372036854776103 }; 1026 assert_relative_eq!(x.into_f64(), 5e-324); 1027 1028 // Integers. 1029 for int in INTEGERS.iter() { 1030 let fp = ExtendedFloat80 {mant: *int, exp: 0}; 1031 assert_eq!(fp.into_f64(), *int as f64, "{:?} as f64", *int); 1032 } 1033 } 1034 1035 #[test] to_rounded_f32_test()1036 fn to_rounded_f32_test() { 1037 // Just check it compiles, we already check the underlying algorithms. 1038 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63}; 1039 assert_eq!(x.as_rounded_f32(RoundingKind::NearestTieEven, Sign::Positive), 1.0); 1040 assert_eq!(x.as_rounded_f32(RoundingKind::NearestTieAwayZero, Sign::Positive), 1.0); 1041 assert_eq!(x.as_rounded_f32(RoundingKind::TowardPositiveInfinity, Sign::Positive), 1.0); 1042 assert_eq!(x.as_rounded_f32(RoundingKind::TowardNegativeInfinity, Sign::Positive), 1.0); 1043 assert_eq!(x.as_rounded_f32(RoundingKind::TowardZero, Sign::Positive), 1.0); 1044 } 1045 1046 #[test] to_rounded_f64_test()1047 fn to_rounded_f64_test() { 1048 // Just check it compiles, we already check the underlying algorithms. 1049 let x = ExtendedFloat80 {mant: 9223372036854775808, exp: -63}; 1050 assert_eq!(x.as_rounded_f64(RoundingKind::NearestTieEven, Sign::Positive), 1.0); 1051 assert_eq!(x.as_rounded_f64(RoundingKind::NearestTieAwayZero, Sign::Positive), 1.0); 1052 assert_eq!(x.as_rounded_f64(RoundingKind::TowardPositiveInfinity, Sign::Positive), 1.0); 1053 assert_eq!(x.as_rounded_f64(RoundingKind::TowardNegativeInfinity, Sign::Positive), 1.0); 1054 assert_eq!(x.as_rounded_f64(RoundingKind::TowardZero, Sign::Positive), 1.0); 1055 } 1056 1057 #[test] 1058 #[ignore] to_f32_full_test()1059 fn to_f32_full_test() { 1060 // Use exhaustive search to ensure both lossy and unlossy items are checked. 1061 // 23-bits of precision, so go from 0-32. 1062 for int in 0..u32::max_value() { 1063 let fp = ExtendedFloat80 {mant: int as u64, exp: 0}; 1064 assert_eq!(fp.into_f32(), int as f32, "ExtendedFloat80 {:?} as f32", int); 1065 1066 let fp = ExtendedFloat160 {mant: int as u128, exp: 0}; 1067 assert_eq!(fp.into_f32(), int as f32, "ExtendedFloat160 {:?} as f64", int); 1068 } 1069 } 1070 1071 #[test] 1072 #[ignore] to_f64_full_test()1073 fn to_f64_full_test() { 1074 // Use exhaustive search to ensure both lossy and unlossy items are checked. 1075 const U32_MAX: u64 = u32::max_value() as u64; 1076 const POW2_52: u64 = 4503599627370496; 1077 const START: u64 = POW2_52 - U32_MAX / 2; 1078 const END: u64 = START + U32_MAX; 1079 for int in START..END { 1080 let fp = ExtendedFloat80 {mant: int, exp: 0}; 1081 assert_eq!(fp.into_f64(), int as f64, "ExtendedFloat80 {:?} as f64", int); 1082 1083 let fp = ExtendedFloat160 {mant: int as u128, exp: 0}; 1084 assert_eq!(fp.into_f64(), int as f64, "ExtendedFloat160 {:?} as f64", int); 1085 } 1086 } 1087 1088 // OPERATIONS 1089 check_mul<M: Mantissa>(a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>)1090 fn check_mul<M: Mantissa>(a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>) { 1091 let r = a.mul(&b); 1092 assert_eq!(r, c); 1093 } 1094 1095 #[test] mul_test()1096 fn mul_test() { 1097 // Normalized (64-bit mantissa) 1098 let a = ExtendedFloat80 {mant: 13164036458569648128, exp: -213}; 1099 let b = ExtendedFloat80 {mant: 9223372036854775808, exp: -62}; 1100 let c = ExtendedFloat80 {mant: 6582018229284824064, exp: -211}; 1101 check_mul(a, b, c); 1102 1103 // Normalized (128-bit mantissa) 1104 let a = ExtendedFloat160 {mant: 242833611528216130005140556221773774848, exp: -277}; 1105 let b = ExtendedFloat160 {mant: 170141183460469231731687303715884105728, exp: -126}; 1106 let c = ExtendedFloat160 {mant: 121416805764108065002570278110886887424, exp: -275}; 1107 check_mul(a, b, c); 1108 1109 // Check with integers 1110 // 64-bit mantissa 1111 let mut a = ExtendedFloat80::from_u8(10); 1112 let mut b = ExtendedFloat80::from_u8(10); 1113 a.normalize(); 1114 b.normalize(); 1115 assert_eq!(a.mul(&b).into_f64(), 100.0); 1116 1117 // 128-bit mantissa 1118 let mut a = ExtendedFloat160::from_u8(10); 1119 let mut b = ExtendedFloat160::from_u8(10); 1120 a.normalize(); 1121 b.normalize(); 1122 assert_eq!(a.mul(&b).into_f64(), 100.0); 1123 1124 // Check both values need high bits set. 1125 let a = ExtendedFloat80 { mant: 1 << 32, exp: -31 }; 1126 let b = ExtendedFloat80 { mant: 1 << 32, exp: -31 }; 1127 assert_eq!(a.mul(&b).into_f64(), 4.0); 1128 1129 // Check both values need high bits set. 1130 let a = ExtendedFloat80 { mant: 10 << 31, exp: -31 }; 1131 let b = ExtendedFloat80 { mant: 10 << 31, exp: -31 }; 1132 assert_eq!(a.mul(&b).into_f64(), 100.0); 1133 } 1134 check_imul<M: Mantissa>(mut a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>)1135 fn check_imul<M: Mantissa>(mut a: ExtendedFloat<M>, b: ExtendedFloat<M>, c: ExtendedFloat<M>) { 1136 a.imul(&b); 1137 assert_eq!(a, c); 1138 } 1139 1140 #[test] imul_test()1141 fn imul_test() { 1142 // Normalized (64-bit mantissa) 1143 let a = ExtendedFloat80 {mant: 13164036458569648128, exp: -213}; 1144 let b = ExtendedFloat80 {mant: 9223372036854775808, exp: -62}; 1145 let c = ExtendedFloat80 {mant: 6582018229284824064, exp: -211}; 1146 check_imul(a, b, c); 1147 1148 // Normalized (128-bit mantissa) 1149 let a = ExtendedFloat160 {mant: 242833611528216130005140556221773774848, exp: -277}; 1150 let b = ExtendedFloat160 {mant: 170141183460469231731687303715884105728, exp: -126}; 1151 let c = ExtendedFloat160 {mant: 121416805764108065002570278110886887424, exp: -275}; 1152 check_imul(a, b, c); 1153 1154 // Check with integers 1155 // 64-bit mantissa 1156 let mut a = ExtendedFloat80::from_u8(10); 1157 let mut b = ExtendedFloat80::from_u8(10); 1158 a.normalize(); 1159 b.normalize(); 1160 a.imul(&b); 1161 assert_eq!(a.into_f64(), 100.0); 1162 1163 // 128-bit mantissa 1164 let mut a = ExtendedFloat160::from_u8(10); 1165 let mut b = ExtendedFloat160::from_u8(10); 1166 a.normalize(); 1167 b.normalize(); 1168 a.imul(&b); 1169 assert_eq!(a.into_f64(), 100.0); 1170 1171 // Check both values need high bits set. 1172 let mut a = ExtendedFloat80 { mant: 1 << 32, exp: -31 }; 1173 let b = ExtendedFloat80 { mant: 1 << 32, exp: -31 }; 1174 a.imul(&b); 1175 assert_eq!(a.into_f64(), 4.0); 1176 1177 // Check both values need high bits set. 1178 let mut a = ExtendedFloat80 { mant: 10 << 31, exp: -31 }; 1179 let b = ExtendedFloat80 { mant: 10 << 31, exp: -31 }; 1180 a.imul(&b); 1181 assert_eq!(a.into_f64(), 100.0); 1182 } 1183 } 1184