1 //! Defines rounding schemes for floating-point numbers.
2
3 use util::*;
4 use super::float::ExtendedFloat;
5 use super::mantissa::Mantissa;
6 use super::shift::*;
7
8 // GENERIC
9 // -------
10
11 // NEAREST ROUNDING
12
13 /// Shift right N-bytes and round to the nearest.
14 ///
15 /// Return if we are above halfway and if we are halfway.
16 #[inline]
round_nearest<M>(fp: &mut ExtendedFloat<M>, shift: i32) -> (bool, bool) where M: Mantissa17 pub(crate) fn round_nearest<M>(fp: &mut ExtendedFloat<M>, shift: i32)
18 -> (bool, bool)
19 where M: Mantissa
20 {
21 // Extract the truncated bits using mask.
22 // Calculate if the value of the truncated bits are either above
23 // the mid-way point, or equal to it.
24 //
25 // For example, for 4 truncated bytes, the mask would be b1111
26 // and the midway point would be b1000.
27 let mask: M = lower_n_mask(as_cast(shift));
28 let halfway: M = lower_n_halfway(as_cast(shift));
29
30 let truncated_bits = fp.mant & mask;
31 let is_above = truncated_bits > halfway;
32 let is_halfway = truncated_bits == halfway;
33
34 // Bit shift so the leading bit is in the hidden bit.
35 overflowing_shr(fp, shift);
36
37 (is_above, is_halfway)
38 }
39
40 /// Tie rounded floating point to event.
41 #[inline]
tie_even<M>(fp: &mut ExtendedFloat<M>, is_above: bool, is_halfway: bool) where M: Mantissa42 pub(crate) fn tie_even<M>(fp: &mut ExtendedFloat<M>, is_above: bool, is_halfway: bool)
43 where M: Mantissa
44 {
45 // Extract the last bit after shifting (and determine if it is odd).
46 let is_odd = fp.mant & M::ONE == M::ONE;
47
48 // Calculate if we need to roundup.
49 // We need to roundup if we are above halfway, or if we are odd
50 // and at half-way (need to tie-to-even).
51 if is_above || (is_odd && is_halfway) {
52 fp.mant += M::ONE;
53 }
54 }
55
56 /// Shift right N-bytes and round nearest, tie-to-even.
57 ///
58 /// Floating-point arithmetic uses round to nearest, ties to even,
59 /// which rounds to the nearest value, if the value is halfway in between,
60 /// round to an even value.
61 #[inline]
round_nearest_tie_even<M>(fp: &mut ExtendedFloat<M>, shift: i32) where M: Mantissa62 pub(crate) fn round_nearest_tie_even<M>(fp: &mut ExtendedFloat<M>, shift: i32)
63 where M: Mantissa
64 {
65 let (is_above, is_halfway) = round_nearest(fp, shift);
66 tie_even(fp, is_above, is_halfway);
67 }
68
69 /// Tie rounded floating point away from zero.
70 #[inline]
tie_away_zero<M>(fp: &mut ExtendedFloat<M>, is_above: bool, is_halfway: bool) where M: Mantissa71 pub(crate) fn tie_away_zero<M>(fp: &mut ExtendedFloat<M>, is_above: bool, is_halfway: bool)
72 where M: Mantissa
73 {
74 // Calculate if we need to roundup.
75 // We need to roundup if we are halfway or above halfway,
76 // since the value is always positive and we need to round away
77 // from zero.
78 if is_above || is_halfway {
79 fp.mant += M::ONE;
80 }
81 }
82
83 /// Shift right N-bytes and round nearest, tie-away-zero.
84 ///
85 /// Floating-point arithmetic defines round to nearest, ties away from zero,
86 /// which rounds to the nearest value, if the value is halfway in between,
87 /// ties away from zero.
88 #[inline]
round_nearest_tie_away_zero<M>(fp: &mut ExtendedFloat<M>, shift: i32) where M: Mantissa89 pub(crate) fn round_nearest_tie_away_zero<M>(fp: &mut ExtendedFloat<M>, shift: i32)
90 where M: Mantissa
91 {
92 let (is_above, is_halfway) = round_nearest(fp, shift);
93 tie_away_zero(fp, is_above, is_halfway);
94 }
95
96 // DIRECTED ROUNDING
97
98 /// Shift right N-bytes and round towards a direction.
99 ///
100 /// Return if we have any truncated bytes.
101 #[inline]
round_toward<M>(fp: &mut ExtendedFloat<M>, shift: i32) -> bool where M: Mantissa102 pub(crate) fn round_toward<M>(fp: &mut ExtendedFloat<M>, shift: i32)
103 -> bool
104 where M: Mantissa
105 {
106 let mask: M = lower_n_mask(as_cast(shift));
107 let truncated_bits = fp.mant & mask;
108
109 // Bit shift so the leading bit is in the hidden bit.
110 overflowing_shr(fp, shift);
111
112 truncated_bits != M::ZERO
113 }
114
115 /// Round up.
116 #[inline]
upward<M>(fp: &mut ExtendedFloat<M>, is_truncated: bool) where M: Mantissa117 pub(crate) fn upward<M>(fp: &mut ExtendedFloat<M>, is_truncated: bool)
118 where M: Mantissa
119 {
120 if is_truncated {
121 fp.mant += M::ONE;
122 }
123 }
124
125 /// Shift right N-bytes and round toward infinity.
126 ///
127 /// Floating-point arithmetic defines round toward infinity, which rounds
128 /// towards positive infinity.
129 #[inline]
round_upward<M>(fp: &mut ExtendedFloat<M>, shift: i32) where M: Mantissa130 pub(crate) fn round_upward<M>(fp: &mut ExtendedFloat<M>, shift: i32)
131 where M: Mantissa
132 {
133 // If the truncated bits are non-zero, that is, any rounding error occurred,
134 // round-up.
135 let is_truncated = round_toward(fp, shift);
136 upward(fp, is_truncated);
137 }
138
139 /// Round down.
140 #[inline]
downard<M>(_: &mut ExtendedFloat<M>, _: bool) where M: Mantissa141 pub(crate) fn downard<M>(_: &mut ExtendedFloat<M>, _: bool)
142 where M: Mantissa
143 {}
144
145 /// Shift right N-bytes and round toward zero.
146 ///
147 /// Floating-point arithmetic defines round toward zero, which rounds
148 /// towards positive zero.
149 #[inline]
round_downward<M>(fp: &mut ExtendedFloat<M>, shift: i32) where M: Mantissa150 pub(crate) fn round_downward<M>(fp: &mut ExtendedFloat<M>, shift: i32)
151 where M: Mantissa
152 {
153 // Bit shift so the leading bit is in the hidden bit.
154 // No rounding schemes, so we just ignore everything else.
155 let is_truncated = round_toward(fp, shift);
156 downard(fp, is_truncated);
157 }
158
159 // NATIVE FLOAT
160 // ------------
161
162 // FLOAT ROUNDING
163
164 /// Trait to round extended-precision floats to native representations.
165 pub trait FloatRounding<M: Mantissa>: Float {
166 /// Default number of bits to shift (or 64 - mantissa size - 1).
167 const DEFAULT_SHIFT: i32;
168 /// Mask to determine if a full-carry occurred (1 in bit above hidden bit).
169 const CARRY_MASK: M;
170 }
171
172 // Literals don't work for generic types, we need to use this as a hack.
173 macro_rules! float_rounding_f32 {
174 ($($t:tt)*) => ($(
175 impl FloatRounding<$t> for f32 {
176 const DEFAULT_SHIFT: i32 = $t::FULL - f32::MANTISSA_SIZE - 1;
177 const CARRY_MASK: $t = 0x1000000;
178 }
179 )*)
180 }
181
182 #[cfg(has_i128)]
183 float_rounding_f32! { u64 u128 }
184
185 #[cfg(not(has_i128))]
186 float_rounding_f32! { u64 }
187
188 // Literals don't work for generic types, we need to use this as a hack.
189 macro_rules! float_rounding_f64 {
190 ($($t:tt)*) => ($(
191 impl FloatRounding<$t> for f64 {
192 const DEFAULT_SHIFT: i32 = $t::FULL - f64::MANTISSA_SIZE - 1;
193 const CARRY_MASK: $t = 0x20000000000000;
194 }
195 )*)
196 }
197
198 #[cfg(has_i128)]
199 float_rounding_f64! { u64 u128 }
200
201 #[cfg(not(has_i128))]
202 float_rounding_f64! { u64 }
203
204 // ROUND TO FLOAT
205
206 /// Shift the ExtendedFloat fraction to the fraction bits in a native float.
207 ///
208 /// Floating-point arithmetic uses round to nearest, ties to even,
209 /// which rounds to the nearest value, if the value is halfway in between,
210 /// round to an even value.
211 #[inline]
round_to_float<T, M, Cb>(fp: &mut ExtendedFloat<M>, cb: Cb) where T: FloatRounding<M>, M: Mantissa, Cb: FnOnce(&mut ExtendedFloat<M>, i32)212 pub(crate) fn round_to_float<T, M, Cb>(fp: &mut ExtendedFloat<M>, cb: Cb)
213 where T: FloatRounding<M>,
214 M: Mantissa,
215 Cb: FnOnce(&mut ExtendedFloat<M>, i32)
216 {
217 // Calculate the difference to allow a single calculation
218 // rather than a loop, to minimize the number of ops required.
219 // This does underflow detection.
220 let final_exp = fp.exp + T::DEFAULT_SHIFT;
221 if final_exp < T::DENORMAL_EXPONENT {
222 // We would end up with a denormal exponent, try to round to more
223 // digits. Only shift right if we can avoid zeroing out the value,
224 // which requires the exponent diff to be < M::BITS. The value
225 // is already normalized, so we shouldn't have any issue zeroing
226 // out the value.
227 let diff = T::DENORMAL_EXPONENT - fp.exp;
228 if diff <= M::FULL {
229 // We can avoid underflow, can get a valid representation.
230 cb(fp, diff);
231 } else {
232 // Certain underflow, assign literal 0s.
233 fp.mant = M::ZERO;
234 fp.exp = 0;
235 }
236 } else {
237 cb(fp, T::DEFAULT_SHIFT);
238 }
239
240 if fp.mant & T::CARRY_MASK == T::CARRY_MASK {
241 // Roundup carried over to 1 past the hidden bit.
242 shr(fp, 1);
243 }
244 }
245
246 // AVOID OVERFLOW/UNDERFLOW
247
248 /// Avoid overflow for large values, shift left as needed.
249 ///
250 /// Shift until a 1-bit is in the hidden bit, if the mantissa is not 0.
251 #[inline]
avoid_overflow<T, M>(fp: &mut ExtendedFloat<M>) where T: FloatRounding<M>, M: Mantissa252 pub(crate) fn avoid_overflow<T, M>(fp: &mut ExtendedFloat<M>)
253 where T: FloatRounding<M>,
254 M: Mantissa
255 {
256 // Calculate the difference to allow a single calculation
257 // rather than a loop, minimizing the number of ops required.
258 if fp.exp >= T::MAX_EXPONENT {
259 let diff = fp.exp - T::MAX_EXPONENT;
260 if diff <= T::MANTISSA_SIZE {
261 // Our overflow mask needs to start at the hidden bit, or at
262 // `T::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set,
263 // to see if our value overflows.
264 let bit = as_cast(T::MANTISSA_SIZE+1);
265 let n = as_cast(diff+1);
266 let mask: M = internal_n_mask(bit, n);
267 if (fp.mant & mask).is_zero() {
268 // If we have no 1-bit in the hidden-bit position,
269 // which is index 0, we need to shift 1.
270 let shift = diff + 1;
271 shl(fp, shift);
272 }
273 }
274 }
275 }
276
277 // ROUND TO NATIVE
278
279 /// Round an extended-precision float to a native float representation.
280 #[inline]
round_to_native<T, M, Cb>(fp: &mut ExtendedFloat<M>, cb: Cb) where T: FloatRounding<M>, M: Mantissa, Cb: FnOnce(&mut ExtendedFloat<M>, i32)281 pub(crate) fn round_to_native<T, M, Cb>(fp: &mut ExtendedFloat<M>, cb: Cb)
282 where T: FloatRounding<M>,
283 M: Mantissa,
284 Cb: FnOnce(&mut ExtendedFloat<M>, i32)
285 {
286 // Shift all the way left, to ensure a consistent representation.
287 // The following right-shifts do not work for a non-normalized number.
288 fp.normalize();
289
290 // Round so the fraction is in a native mantissa representation,
291 // and avoid overflow/underflow.
292 round_to_float::<T, M, _>(fp, cb);
293 avoid_overflow::<T, M>(fp);
294 }
295
296 /// Get the rounding scheme to determine if we should go up or down.
297 #[inline]
298 #[allow(unused_variables)]
internal_rounding(kind: RoundingKind, sign: Sign) -> RoundingKind299 pub(crate) fn internal_rounding(kind: RoundingKind, sign: Sign)
300 -> RoundingKind
301 {
302 #[cfg(not(feature = "rounding"))] {
303 RoundingKind::NearestTieEven
304 }
305
306 #[cfg(feature = "rounding")] {
307 match sign {
308 Sign::Positive => {
309 match kind {
310 RoundingKind::TowardPositiveInfinity => RoundingKind::Upward,
311 RoundingKind::TowardNegativeInfinity => RoundingKind::Downward,
312 RoundingKind::TowardZero => RoundingKind::Downward,
313 _ => kind,
314 }
315 },
316 Sign::Negative => {
317 match kind {
318 RoundingKind::TowardPositiveInfinity => RoundingKind::Downward,
319 RoundingKind::TowardNegativeInfinity => RoundingKind::Upward,
320 RoundingKind::TowardZero => RoundingKind::Downward,
321 _ => kind,
322 }
323 },
324 }
325 }
326 }
327
328 /// Get the global, default rounding scheme.
329 #[cfg(feature = "correct")]
330 #[inline]
331 #[allow(unused_variables)]
global_rounding(sign: Sign) -> RoundingKind332 pub(crate) fn global_rounding(sign: Sign) -> RoundingKind {
333 #[cfg(not(feature = "rounding"))] {
334 RoundingKind::NearestTieEven
335 }
336
337 #[cfg(feature = "rounding")] {
338 // Use of static mutable `FLOAT_ROUNDING`.
339 unsafe {
340 internal_rounding(FLOAT_ROUNDING, sign)
341 }
342 }
343 }
344
345 // TESTS
346 // -----
347
348 #[cfg(test)]
349 mod tests {
350 use float::ExtendedFloat80;
351 use super::*;
352
353 // NEAREST ROUNDING
354
355 #[test]
round_nearest_test()356 fn round_nearest_test() {
357 // Check exactly halfway (b'1100000')
358 let mut fp = ExtendedFloat80 { mant: 0x60, exp: 0 };
359 let (above, halfway) = round_nearest(&mut fp, 6);
360 assert!(!above);
361 assert!(halfway);
362 assert_eq!(fp.mant, 1);
363
364 // Check above halfway (b'1100001')
365 let mut fp = ExtendedFloat80 { mant: 0x61, exp: 0 };
366 let (above, halfway) = round_nearest(&mut fp, 6);
367 assert!(above);
368 assert!(!halfway);
369 assert_eq!(fp.mant, 1);
370
371 // Check below halfway (b'1011111')
372 let mut fp = ExtendedFloat80 { mant: 0x5F, exp: 0 };
373 let (above, halfway) = round_nearest(&mut fp, 6);
374 assert!(!above);
375 assert!(!halfway);
376 assert_eq!(fp.mant, 1);
377 }
378
379 #[test]
round_nearest_tie_even_test()380 fn round_nearest_tie_even_test() {
381 // Check round-up, halfway
382 let mut fp = ExtendedFloat80 { mant: 0x60, exp: 0 };
383 round_nearest_tie_even(&mut fp, 6);
384 assert_eq!(fp.mant, 2);
385
386 // Check round-down, halfway
387 let mut fp = ExtendedFloat80 { mant: 0x20, exp: 0 };
388 round_nearest_tie_even(&mut fp, 6);
389 assert_eq!(fp.mant, 0);
390
391 // Check round-up, above halfway
392 let mut fp = ExtendedFloat80 { mant: 0x61, exp: 0 };
393 round_nearest_tie_even(&mut fp, 6);
394 assert_eq!(fp.mant, 2);
395
396 let mut fp = ExtendedFloat80 { mant: 0x21, exp: 0 };
397 round_nearest_tie_even(&mut fp, 6);
398 assert_eq!(fp.mant, 1);
399
400 // Check round-down, below halfway
401 let mut fp = ExtendedFloat80 { mant: 0x5F, exp: 0 };
402 round_nearest_tie_even(&mut fp, 6);
403 assert_eq!(fp.mant, 1);
404
405 let mut fp = ExtendedFloat80 { mant: 0x1F, exp: 0 };
406 round_nearest_tie_even(&mut fp, 6);
407 assert_eq!(fp.mant, 0);
408 }
409
410 #[test]
round_nearest_tie_away_zero_test()411 fn round_nearest_tie_away_zero_test() {
412 // Check round-up, halfway
413 let mut fp = ExtendedFloat80 { mant: 0x60, exp: 0 };
414 round_nearest_tie_away_zero(&mut fp, 6);
415 assert_eq!(fp.mant, 2);
416
417 let mut fp = ExtendedFloat80 { mant: 0x20, exp: 0 };
418 round_nearest_tie_away_zero(&mut fp, 6);
419 assert_eq!(fp.mant, 1);
420
421 // Check round-up, above halfway
422 let mut fp = ExtendedFloat80 { mant: 0x61, exp: 0 };
423 round_nearest_tie_away_zero(&mut fp, 6);
424 assert_eq!(fp.mant, 2);
425
426 let mut fp = ExtendedFloat80 { mant: 0x21, exp: 0 };
427 round_nearest_tie_away_zero(&mut fp, 6);
428 assert_eq!(fp.mant, 1);
429
430 // Check round-down, below halfway
431 let mut fp = ExtendedFloat80 { mant: 0x5F, exp: 0 };
432 round_nearest_tie_away_zero(&mut fp, 6);
433 assert_eq!(fp.mant, 1);
434
435 let mut fp = ExtendedFloat80 { mant: 0x1F, exp: 0 };
436 round_nearest_tie_away_zero(&mut fp, 6);
437 assert_eq!(fp.mant, 0);
438 }
439
440 // DIRECTED ROUNDING
441
442 #[test]
round_upward_test()443 fn round_upward_test() {
444 // b0000000
445 let mut fp = ExtendedFloat80 { mant: 0x00, exp: 0 };
446 round_upward(&mut fp, 6);
447 assert_eq!(fp.mant, 0);
448
449 // b1000000
450 let mut fp = ExtendedFloat80 { mant: 0x40, exp: 0 };
451 round_upward(&mut fp, 6);
452 assert_eq!(fp.mant, 1);
453
454 // b1100000
455 let mut fp = ExtendedFloat80 { mant: 0x60, exp: 0 };
456 round_upward(&mut fp, 6);
457 assert_eq!(fp.mant, 2);
458
459 // b1110000
460 let mut fp = ExtendedFloat80 { mant: 0x70, exp: 0 };
461 round_upward(&mut fp, 6);
462 assert_eq!(fp.mant, 2);
463 }
464
465 #[test]
round_downward_test()466 fn round_downward_test() {
467 // b0000000
468 let mut fp = ExtendedFloat80 { mant: 0x00, exp: 0 };
469 round_downward(&mut fp, 6);
470 assert_eq!(fp.mant, 0);
471
472 // b1000000
473 let mut fp = ExtendedFloat80 { mant: 0x40, exp: 0 };
474 round_downward(&mut fp, 6);
475 assert_eq!(fp.mant, 1);
476
477 // b1100000
478 let mut fp = ExtendedFloat80 { mant: 0x60, exp: 0 };
479 round_downward(&mut fp, 6);
480 assert_eq!(fp.mant, 1);
481
482 // b1110000
483 let mut fp = ExtendedFloat80 { mant: 0x70, exp: 0 };
484 round_downward(&mut fp, 6);
485 assert_eq!(fp.mant, 1);
486 }
487
488 // HIGH-LEVEL
489
490 #[test]
round_to_float_test()491 fn round_to_float_test() {
492 // Denormal
493 let mut fp = ExtendedFloat80 { mant: 1<<63, exp: f64::DENORMAL_EXPONENT - 15 };
494 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
495 assert_eq!(fp.mant, 1<<48);
496 assert_eq!(fp.exp, f64::DENORMAL_EXPONENT);
497
498 // Halfway, round-down (b'1000000000000000000000000000000000000000000000000000010000000000')
499 let mut fp = ExtendedFloat80 { mant: 0x8000000000000400, exp: -63 };
500 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
501 assert_eq!(fp.mant, 1<<52);
502 assert_eq!(fp.exp, -52);
503
504 // Halfway, round-up (b'1000000000000000000000000000000000000000000000000000110000000000')
505 let mut fp = ExtendedFloat80 { mant: 0x8000000000000C00, exp: -63 };
506 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
507 assert_eq!(fp.mant, (1<<52) + 2);
508 assert_eq!(fp.exp, -52);
509
510 // Above halfway
511 let mut fp = ExtendedFloat80 { mant: 0x8000000000000401, exp: -63 };
512 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
513 assert_eq!(fp.mant, (1<<52)+1);
514 assert_eq!(fp.exp, -52);
515
516 let mut fp = ExtendedFloat80 { mant: 0x8000000000000C01, exp: -63 };
517 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
518 assert_eq!(fp.mant, (1<<52) + 2);
519 assert_eq!(fp.exp, -52);
520
521 // Below halfway
522 let mut fp = ExtendedFloat80 { mant: 0x80000000000003FF, exp: -63 };
523 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
524 assert_eq!(fp.mant, 1<<52);
525 assert_eq!(fp.exp, -52);
526
527 let mut fp = ExtendedFloat80 { mant: 0x8000000000000BFF, exp: -63 };
528 round_to_float::<f64, _, _>(&mut fp, round_nearest_tie_even);
529 assert_eq!(fp.mant, (1<<52) + 1);
530 assert_eq!(fp.exp, -52);
531 }
532
533 #[test]
avoid_overflow_test()534 fn avoid_overflow_test() {
535 // Avoid overflow, fails by 1
536 let mut fp = ExtendedFloat80 { mant: 0xFFFFFFFFFFFF, exp: f64::MAX_EXPONENT + 5 };
537 avoid_overflow::<f64, _>(&mut fp);
538 assert_eq!(fp.mant, 0xFFFFFFFFFFFF);
539 assert_eq!(fp.exp, f64::MAX_EXPONENT+5);
540
541 // Avoid overflow, succeeds
542 let mut fp = ExtendedFloat80 { mant: 0xFFFFFFFFFFFF, exp: f64::MAX_EXPONENT + 4 };
543 avoid_overflow::<f64, _>(&mut fp);
544 assert_eq!(fp.mant, 0x1FFFFFFFFFFFE0);
545 assert_eq!(fp.exp, f64::MAX_EXPONENT-1);
546 }
547
548 #[test]
round_to_native_test()549 fn round_to_native_test() {
550 // Overflow
551 let mut fp = ExtendedFloat80 { mant: 0xFFFFFFFFFFFF, exp: f64::MAX_EXPONENT + 4 };
552 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
553 assert_eq!(fp.mant, 0x1FFFFFFFFFFFE0);
554 assert_eq!(fp.exp, f64::MAX_EXPONENT-1);
555
556 // Need denormal
557 let mut fp = ExtendedFloat80 { mant: 1, exp: f64::DENORMAL_EXPONENT +48 };
558 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
559 assert_eq!(fp.mant, 1<<48);
560 assert_eq!(fp.exp, f64::DENORMAL_EXPONENT);
561
562 // Halfway, round-down (b'10000000000000000000000000000000000000000000000000000100000')
563 let mut fp = ExtendedFloat80 { mant: 0x400000000000020, exp: -58 };
564 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
565 assert_eq!(fp.mant, 1<<52);
566 assert_eq!(fp.exp, -52);
567
568 // Halfway, round-up (b'10000000000000000000000000000000000000000000000000001100000')
569 let mut fp = ExtendedFloat80 { mant: 0x400000000000060, exp: -58 };
570 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
571 assert_eq!(fp.mant, (1<<52) + 2);
572 assert_eq!(fp.exp, -52);
573
574 // Above halfway
575 let mut fp = ExtendedFloat80 { mant: 0x400000000000021, exp: -58 };
576 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
577 assert_eq!(fp.mant, (1<<52)+1);
578 assert_eq!(fp.exp, -52);
579
580 let mut fp = ExtendedFloat80 { mant: 0x400000000000061, exp: -58 };
581 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
582 assert_eq!(fp.mant, (1<<52) + 2);
583 assert_eq!(fp.exp, -52);
584
585 // Below halfway
586 let mut fp = ExtendedFloat80 { mant: 0x40000000000001F, exp: -58 };
587 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
588 assert_eq!(fp.mant, 1<<52);
589 assert_eq!(fp.exp, -52);
590
591 let mut fp = ExtendedFloat80 { mant: 0x40000000000005F, exp: -58 };
592 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
593 assert_eq!(fp.mant, (1<<52) + 1);
594 assert_eq!(fp.exp, -52);
595
596 // Underflow
597 // Adapted from failures in strtod.
598 let mut fp = ExtendedFloat80 { exp: -1139, mant: 18446744073709550712 };
599 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
600 assert_eq!(fp.mant, 0);
601 assert_eq!(fp.exp, 0);
602
603 let mut fp = ExtendedFloat80 { exp: -1139, mant: 18446744073709551460 };
604 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
605 assert_eq!(fp.mant, 0);
606 assert_eq!(fp.exp, 0);
607
608 let mut fp = ExtendedFloat80 { exp: -1138, mant: 9223372036854776103 };
609 round_to_native::<f64, _, _>(&mut fp, round_nearest_tie_even);
610 assert_eq!(fp.mant, 1);
611 assert_eq!(fp.exp, -1074);
612 }
613 }
614