1 #[cfg(feature = "serde")]
2 use serde::{Deserialize, Serialize};
3 
4 #[cfg(feature = "bytemuck")]
5 use bytemuck::{Pod, Zeroable};
6 
7 use core::{
8     cmp::Ordering,
9     fmt::{
10         Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
11     },
12     num::{FpCategory, ParseFloatError},
13     str::FromStr,
14 };
15 
16 pub(crate) mod convert;
17 
18 /// A 16-bit floating point type implementing the [`bfloat16`] format.
19 ///
20 /// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard
21 /// `binary32`, a.k.a `f32`. [`bf16`] has approximately the same dynamic range as `f32` by having
22 /// a lower precision than [`f16`]. While [`f16`] has a precision of 11 bits, [`bf16`] has a
23 /// precision of only 8 bits.
24 ///
25 /// Like [`f16`], [`bf16`] does not offer arithmetic operations as it is intended for compact
26 /// storage rather than calculations. Operations should be performed with `f32` or higher-precision
27 /// types and converted to/from [`bf16`] as necessary.
28 ///
29 /// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
30 /// [`bf16`]: struct.bf16.html
31 /// [`f16`]: struct.f16.html
32 #[allow(non_camel_case_types)]
33 #[derive(Clone, Copy, Default)]
34 #[repr(transparent)]
35 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
36 #[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))]
37 pub struct bf16(u16);
38 
39 impl bf16 {
40     /// Constructs a [`bf16`](struct.bf16.html) value from the raw bits.
41     #[inline]
from_bits(bits: u16) -> bf1642     pub const fn from_bits(bits: u16) -> bf16 {
43         bf16(bits)
44     }
45 
46     /// Constructs a [`bf16`](struct.bf16.html) value from a 32-bit floating point value.
47     ///
48     /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved.
49     /// Subnormal values that are too tiny to be represented will result in ±0. All other values
50     /// are truncated and rounded to the nearest representable value.
51     #[inline]
from_f32(value: f32) -> bf1652     pub fn from_f32(value: f32) -> bf16 {
53         bf16(convert::f32_to_bf16(value))
54     }
55 
56     /// Constructs a [`bf16`](struct.bf16.html) value from a 64-bit floating point value.
57     ///
58     /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved.
59     /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that
60     /// underflow the minimum exponent will result in subnormals or ±0. All other values are
61     /// truncated and rounded to the nearest representable value.
62     #[inline]
from_f64(value: f64) -> bf1663     pub fn from_f64(value: f64) -> bf16 {
64         bf16(convert::f64_to_bf16(value))
65     }
66 
67     /// Converts a [`bf16`](struct.bf16.html) into the underlying bit representation.
68     #[inline]
to_bits(self) -> u1669     pub const fn to_bits(self) -> u16 {
70         self.0
71     }
72 
73     /// Return the memory representation of the underlying bit representation as a byte array in
74     /// little-endian byte order.
75     ///
76     /// # Examples
77     ///
78     /// ```rust
79     /// # use half::prelude::*;
80     /// let bytes = bf16::from_f32(12.5).to_le_bytes();
81     /// assert_eq!(bytes, [0x48, 0x41]);
82     /// ```
83     #[inline]
to_le_bytes(self) -> [u8; 2]84     pub fn to_le_bytes(self) -> [u8; 2] {
85         self.0.to_le_bytes()
86     }
87 
88     /// Return the memory representation of the underlying bit representation as a byte array in
89     /// big-endian (network) byte order.
90     ///
91     /// # Examples
92     ///
93     /// ```rust
94     /// # use half::prelude::*;
95     /// let bytes = bf16::from_f32(12.5).to_be_bytes();
96     /// assert_eq!(bytes, [0x41, 0x48]);
97     /// ```
98     #[inline]
to_be_bytes(self) -> [u8; 2]99     pub fn to_be_bytes(self) -> [u8; 2] {
100         self.0.to_be_bytes()
101     }
102 
103     /// Return the memory representation of the underlying bit representation as a byte array in
104     /// native byte order.
105     ///
106     /// As the target platform's native endianness is used, portable code should use `to_be_bytes`
107     /// or `to_le_bytes`, as appropriate, instead.
108     ///
109     /// # Examples
110     ///
111     /// ```rust
112     /// # use half::prelude::*;
113     /// let bytes = bf16::from_f32(12.5).to_ne_bytes();
114     /// assert_eq!(bytes, if cfg!(target_endian = "big") {
115     ///     [0x41, 0x48]
116     /// } else {
117     ///     [0x48, 0x41]
118     /// });
119     /// ```
120     #[inline]
to_ne_bytes(self) -> [u8; 2]121     pub fn to_ne_bytes(self) -> [u8; 2] {
122         self.0.to_ne_bytes()
123     }
124 
125     /// Create a floating point value from its representation as a byte array in little endian.
126     ///
127     /// # Examples
128     ///
129     /// ```rust
130     /// # use half::prelude::*;
131     /// let value = bf16::from_le_bytes([0x48, 0x41]);
132     /// assert_eq!(value, bf16::from_f32(12.5));
133     /// ```
134     #[inline]
from_le_bytes(bytes: [u8; 2]) -> bf16135     pub fn from_le_bytes(bytes: [u8; 2]) -> bf16 {
136         bf16::from_bits(u16::from_le_bytes(bytes))
137     }
138 
139     /// Create a floating point value from its representation as a byte array in big endian.
140     ///
141     /// # Examples
142     ///
143     /// ```rust
144     /// # use half::prelude::*;
145     /// let value = bf16::from_be_bytes([0x41, 0x48]);
146     /// assert_eq!(value, bf16::from_f32(12.5));
147     /// ```
148     #[inline]
from_be_bytes(bytes: [u8; 2]) -> bf16149     pub fn from_be_bytes(bytes: [u8; 2]) -> bf16 {
150         bf16::from_bits(u16::from_be_bytes(bytes))
151     }
152 
153     /// Create a floating point value from its representation as a byte array in native endian.
154     ///
155     /// As the target platform's native endianness is used, portable code likely wants to use
156     /// `from_be_bytes` or `from_le_bytes`, as appropriate instead.
157     ///
158     /// # Examples
159     ///
160     /// ```rust
161     /// # use half::prelude::*;
162     /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") {
163     ///     [0x41, 0x48]
164     /// } else {
165     ///     [0x48, 0x41]
166     /// });
167     /// assert_eq!(value, bf16::from_f32(12.5));
168     /// ```
169     #[inline]
from_ne_bytes(bytes: [u8; 2]) -> bf16170     pub fn from_ne_bytes(bytes: [u8; 2]) -> bf16 {
171         bf16::from_bits(u16::from_ne_bytes(bytes))
172     }
173 
174     /// Converts a [`bf16`](struct.bf16.html) value into an `f32` value.
175     ///
176     /// This conversion is lossless as all values can be represented exactly in `f32`.
177     #[inline]
to_f32(self) -> f32178     pub fn to_f32(self) -> f32 {
179         convert::bf16_to_f32(self.0)
180     }
181 
182     /// Converts a [`bf16`](struct.bf16.html) value into an `f64` value.
183     ///
184     /// This conversion is lossless as all values can be represented exactly in `f64`.
185     #[inline]
to_f64(self) -> f64186     pub fn to_f64(self) -> f64 {
187         convert::bf16_to_f64(self.0)
188     }
189 
190     /// Returns `true` if this value is NaN and `false` otherwise.
191     ///
192     /// # Examples
193     ///
194     /// ```rust
195     /// # use half::prelude::*;
196     ///
197     /// let nan = bf16::NAN;
198     /// let f = bf16::from_f32(7.0_f32);
199     ///
200     /// assert!(nan.is_nan());
201     /// assert!(!f.is_nan());
202     /// ```
203     #[inline]
is_nan(self) -> bool204     pub const fn is_nan(self) -> bool {
205         self.0 & 0x7FFFu16 > 0x7F80u16
206     }
207 
208     /// Returns `true` if this value is ±∞ and `false` otherwise.
209     ///
210     /// # Examples
211     ///
212     /// ```rust
213     /// # use half::prelude::*;
214     ///
215     /// let f = bf16::from_f32(7.0f32);
216     /// let inf = bf16::INFINITY;
217     /// let neg_inf = bf16::NEG_INFINITY;
218     /// let nan = bf16::NAN;
219     ///
220     /// assert!(!f.is_infinite());
221     /// assert!(!nan.is_infinite());
222     ///
223     /// assert!(inf.is_infinite());
224     /// assert!(neg_inf.is_infinite());
225     /// ```
226     #[inline]
is_infinite(self) -> bool227     pub const fn is_infinite(self) -> bool {
228         self.0 & 0x7FFFu16 == 0x7F80u16
229     }
230 
231     /// Returns `true` if this number is neither infinite nor NaN.
232     ///
233     /// # Examples
234     ///
235     /// ```rust
236     /// # use half::prelude::*;
237     ///
238     /// let f = bf16::from_f32(7.0f32);
239     /// let inf = bf16::INFINITY;
240     /// let neg_inf = bf16::NEG_INFINITY;
241     /// let nan = bf16::NAN;
242     ///
243     /// assert!(f.is_finite());
244     ///
245     /// assert!(!nan.is_finite());
246     /// assert!(!inf.is_finite());
247     /// assert!(!neg_inf.is_finite());
248     /// ```
249     #[inline]
is_finite(self) -> bool250     pub const fn is_finite(self) -> bool {
251         self.0 & 0x7F80u16 != 0x7F80u16
252     }
253 
254     /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN.
255     ///
256     /// # Examples
257     ///
258     /// ```rust
259     /// # use half::prelude::*;
260     ///
261     /// let min = bf16::MIN_POSITIVE;
262     /// let max = bf16::MAX;
263     /// let lower_than_min = bf16::from_f32(1.0e-39_f32);
264     /// let zero = bf16::from_f32(0.0_f32);
265     ///
266     /// assert!(min.is_normal());
267     /// assert!(max.is_normal());
268     ///
269     /// assert!(!zero.is_normal());
270     /// assert!(!bf16::NAN.is_normal());
271     /// assert!(!bf16::INFINITY.is_normal());
272     /// // Values between 0 and `min` are subnormal.
273     /// assert!(!lower_than_min.is_normal());
274     /// ```
275     #[inline]
is_normal(self) -> bool276     pub fn is_normal(self) -> bool {
277         let exp = self.0 & 0x7F80u16;
278         exp != 0x7F80u16 && exp != 0
279     }
280 
281     /// Returns the floating point category of the number.
282     ///
283     /// If only one property is going to be tested, it is generally faster to use the specific
284     /// predicate instead.
285     ///
286     /// # Examples
287     ///
288     /// ```rust
289     /// use std::num::FpCategory;
290     /// # use half::prelude::*;
291     ///
292     /// let num = bf16::from_f32(12.4_f32);
293     /// let inf = bf16::INFINITY;
294     ///
295     /// assert_eq!(num.classify(), FpCategory::Normal);
296     /// assert_eq!(inf.classify(), FpCategory::Infinite);
297     /// ```
classify(self) -> FpCategory298     pub fn classify(self) -> FpCategory {
299         let exp = self.0 & 0x7F80u16;
300         let man = self.0 & 0x007Fu16;
301         match (exp, man) {
302             (0, 0) => FpCategory::Zero,
303             (0, _) => FpCategory::Subnormal,
304             (0x7F80u16, 0) => FpCategory::Infinite,
305             (0x7F80u16, _) => FpCategory::Nan,
306             _ => FpCategory::Normal,
307         }
308     }
309 
310     /// Returns a number that represents the sign of `self`.
311     ///
312     /// * 1.0 if the number is positive, +0.0 or `INFINITY`
313     /// * −1.0 if the number is negative, −0.0` or `NEG_INFINITY`
314     /// * NaN if the number is NaN
315     ///
316     /// # Examples
317     ///
318     /// ```rust
319     /// # use half::prelude::*;
320     ///
321     /// let f = bf16::from_f32(3.5_f32);
322     ///
323     /// assert_eq!(f.signum(), bf16::from_f32(1.0));
324     /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0));
325     ///
326     /// assert!(bf16::NAN.signum().is_nan());
327     /// ```
signum(self) -> bf16328     pub fn signum(self) -> bf16 {
329         if self.is_nan() {
330             self
331         } else if self.0 & 0x8000u16 != 0 {
332             bf16::from_f32(-1.0)
333         } else {
334             bf16::from_f32(1.0)
335         }
336     }
337 
338     /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a
339     /// positive sign bit and +∞.
340     ///
341     /// # Examples
342     ///
343     /// ```rust
344     /// # use half::prelude::*;
345     ///
346     /// let nan = bf16::NAN;
347     /// let f = bf16::from_f32(7.0_f32);
348     /// let g = bf16::from_f32(-7.0_f32);
349     ///
350     /// assert!(f.is_sign_positive());
351     /// assert!(!g.is_sign_positive());
352     /// // NaN can be either positive or negative
353     /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
354     /// ```
355     #[inline]
is_sign_positive(self) -> bool356     pub const fn is_sign_positive(self) -> bool {
357         self.0 & 0x8000u16 == 0
358     }
359 
360     /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a
361     /// negative sign bit and −∞.
362     ///
363     /// # Examples
364     ///
365     /// ```rust
366     /// # use half::prelude::*;
367     ///
368     /// let nan = bf16::NAN;
369     /// let f = bf16::from_f32(7.0f32);
370     /// let g = bf16::from_f32(-7.0f32);
371     ///
372     /// assert!(!f.is_sign_negative());
373     /// assert!(g.is_sign_negative());
374     /// // NaN can be either positive or negative
375     /// assert!(nan.is_sign_positive() != nan.is_sign_negative());
376     /// ```
377     #[inline]
is_sign_negative(self) -> bool378     pub const fn is_sign_negative(self) -> bool {
379         self.0 & 0x8000u16 != 0
380     }
381 
382     /// Approximate number of [`bf16`](struct.bf16.html) significant digits in base 10.
383     pub const DIGITS: u32 = 2;
384     /// [`bf16`](struct.bf16.html)
385     /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value.
386     ///
387     /// This is the difference between 1.0 and the next largest representable number.
388     pub const EPSILON: bf16 = bf16(0x3C00u16);
389     /// [`bf16`](struct.bf16.html) positive Infinity (+∞).
390     pub const INFINITY: bf16 = bf16(0x7F80u16);
391     /// Number of [`bf16`](struct.bf16.html) significant digits in base 2.
392     pub const MANTISSA_DIGITS: u32 = 8;
393     /// Largest finite [`bf16`](struct.bf16.html) value.
394     pub const MAX: bf16 = bf16(0x7F7F);
395     /// Maximum possible [`bf16`](struct.bf16.html) power of 10 exponent.
396     pub const MAX_10_EXP: i32 = 38;
397     /// Maximum possible [`bf16`](struct.bf16.html) power of 2 exponent.
398     pub const MAX_EXP: i32 = 128;
399     /// Smallest finite [`bf16`](struct.bf16.html) value.
400     pub const MIN: bf16 = bf16(0xFF7F);
401     /// Minimum possible normal [`bf16`](struct.bf16.html) power of 10 exponent.
402     pub const MIN_10_EXP: i32 = -37;
403     /// One greater than the minimum possible normal [`bf16`](struct.bf16.html) power of 2 exponent.
404     pub const MIN_EXP: i32 = -125;
405     /// Smallest positive normal [`bf16`](struct.bf16.html) value.
406     pub const MIN_POSITIVE: bf16 = bf16(0x0080u16);
407     /// [`bf16`](struct.bf16.html) Not a Number (NaN).
408     pub const NAN: bf16 = bf16(0x7FC0u16);
409     /// [`bf16`](struct.bf16.html) negative infinity (-∞).
410     pub const NEG_INFINITY: bf16 = bf16(0xFF80u16);
411     /// The radix or base of the internal representation of [`bf16`](struct.bf16.html).
412     pub const RADIX: u32 = 2;
413 
414     /// Minimum positive subnormal [`bf16`](struct.bf16.html) value.
415     pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16);
416     /// Maximum subnormal [`bf16`](struct.bf16.html) value.
417     pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16);
418 
419     /// [`bf16`](struct.bf16.html) 1
420     pub const ONE: bf16 = bf16(0x3F80u16);
421     /// [`bf16`](struct.bf16.html) 0
422     pub const ZERO: bf16 = bf16(0x0000u16);
423     /// [`bf16`](struct.bf16.html) -0
424     pub const NEG_ZERO: bf16 = bf16(0x8000u16);
425 
426     /// [`bf16`](struct.bf16.html) Euler's number (ℯ).
427     pub const E: bf16 = bf16(0x402Eu16);
428     /// [`bf16`](struct.bf16.html) Archimedes' constant (π).
429     pub const PI: bf16 = bf16(0x4049u16);
430     /// [`bf16`](struct.bf16.html) 1/π
431     pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16);
432     /// [`bf16`](struct.bf16.html) 1/√2
433     pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16);
434     /// [`bf16`](struct.bf16.html) 2/π
435     pub const FRAC_2_PI: bf16 = bf16(0x3F23u16);
436     /// [`bf16`](struct.bf16.html) 2/√π
437     pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16);
438     /// [`bf16`](struct.bf16.html) π/2
439     pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16);
440     /// [`bf16`](struct.bf16.html) π/3
441     pub const FRAC_PI_3: bf16 = bf16(0x3F86u16);
442     /// [`bf16`](struct.bf16.html) π/4
443     pub const FRAC_PI_4: bf16 = bf16(0x3F49u16);
444     /// [`bf16`](struct.bf16.html) π/6
445     pub const FRAC_PI_6: bf16 = bf16(0x3F06u16);
446     /// [`bf16`](struct.bf16.html) π/8
447     pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16);
448     /// [`bf16`](struct.bf16.html) ���� 10
449     pub const LN_10: bf16 = bf16(0x4013u16);
450     /// [`bf16`](struct.bf16.html) ���� 2
451     pub const LN_2: bf16 = bf16(0x3F31u16);
452     /// [`bf16`](struct.bf16.html) ������₁₀ℯ
453     pub const LOG10_E: bf16 = bf16(0x3EDEu16);
454     /// [`bf16`](struct.bf16.html) ������₁₀2
455     pub const LOG10_2: bf16 = bf16(0x3E9Au16);
456     /// [`bf16`](struct.bf16.html) ������₂ℯ
457     pub const LOG2_E: bf16 = bf16(0x3FB9u16);
458     /// [`bf16`](struct.bf16.html) ������₂10
459     pub const LOG2_10: bf16 = bf16(0x4055u16);
460     /// [`bf16`](struct.bf16.html) √2
461     pub const SQRT_2: bf16 = bf16(0x3FB5u16);
462 }
463 
464 impl From<bf16> for f32 {
465     #[inline]
from(x: bf16) -> f32466     fn from(x: bf16) -> f32 {
467         x.to_f32()
468     }
469 }
470 
471 impl From<bf16> for f64 {
472     #[inline]
from(x: bf16) -> f64473     fn from(x: bf16) -> f64 {
474         x.to_f64()
475     }
476 }
477 
478 impl From<i8> for bf16 {
479     #[inline]
from(x: i8) -> bf16480     fn from(x: i8) -> bf16 {
481         // Convert to f32, then to bf16
482         bf16::from_f32(f32::from(x))
483     }
484 }
485 
486 impl From<u8> for bf16 {
487     #[inline]
from(x: u8) -> bf16488     fn from(x: u8) -> bf16 {
489         // Convert to f32, then to f16
490         bf16::from_f32(f32::from(x))
491     }
492 }
493 
494 impl PartialEq for bf16 {
eq(&self, other: &bf16) -> bool495     fn eq(&self, other: &bf16) -> bool {
496         if self.is_nan() || other.is_nan() {
497             false
498         } else {
499             (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0)
500         }
501     }
502 }
503 
504 impl PartialOrd for bf16 {
partial_cmp(&self, other: &bf16) -> Option<Ordering>505     fn partial_cmp(&self, other: &bf16) -> Option<Ordering> {
506         if self.is_nan() || other.is_nan() {
507             None
508         } else {
509             let neg = self.0 & 0x8000u16 != 0;
510             let other_neg = other.0 & 0x8000u16 != 0;
511             match (neg, other_neg) {
512                 (false, false) => Some(self.0.cmp(&other.0)),
513                 (false, true) => {
514                     if (self.0 | other.0) & 0x7FFFu16 == 0 {
515                         Some(Ordering::Equal)
516                     } else {
517                         Some(Ordering::Greater)
518                     }
519                 }
520                 (true, false) => {
521                     if (self.0 | other.0) & 0x7FFFu16 == 0 {
522                         Some(Ordering::Equal)
523                     } else {
524                         Some(Ordering::Less)
525                     }
526                 }
527                 (true, true) => Some(other.0.cmp(&self.0)),
528             }
529         }
530     }
531 
lt(&self, other: &bf16) -> bool532     fn lt(&self, other: &bf16) -> bool {
533         if self.is_nan() || other.is_nan() {
534             false
535         } else {
536             let neg = self.0 & 0x8000u16 != 0;
537             let other_neg = other.0 & 0x8000u16 != 0;
538             match (neg, other_neg) {
539                 (false, false) => self.0 < other.0,
540                 (false, true) => false,
541                 (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0,
542                 (true, true) => self.0 > other.0,
543             }
544         }
545     }
546 
le(&self, other: &bf16) -> bool547     fn le(&self, other: &bf16) -> bool {
548         if self.is_nan() || other.is_nan() {
549             false
550         } else {
551             let neg = self.0 & 0x8000u16 != 0;
552             let other_neg = other.0 & 0x8000u16 != 0;
553             match (neg, other_neg) {
554                 (false, false) => self.0 <= other.0,
555                 (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0,
556                 (true, false) => true,
557                 (true, true) => self.0 >= other.0,
558             }
559         }
560     }
561 
gt(&self, other: &bf16) -> bool562     fn gt(&self, other: &bf16) -> bool {
563         if self.is_nan() || other.is_nan() {
564             false
565         } else {
566             let neg = self.0 & 0x8000u16 != 0;
567             let other_neg = other.0 & 0x8000u16 != 0;
568             match (neg, other_neg) {
569                 (false, false) => self.0 > other.0,
570                 (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0,
571                 (true, false) => false,
572                 (true, true) => self.0 < other.0,
573             }
574         }
575     }
576 
ge(&self, other: &bf16) -> bool577     fn ge(&self, other: &bf16) -> bool {
578         if self.is_nan() || other.is_nan() {
579             false
580         } else {
581             let neg = self.0 & 0x8000u16 != 0;
582             let other_neg = other.0 & 0x8000u16 != 0;
583             match (neg, other_neg) {
584                 (false, false) => self.0 >= other.0,
585                 (false, true) => true,
586                 (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0,
587                 (true, true) => self.0 <= other.0,
588             }
589         }
590     }
591 }
592 
593 impl FromStr for bf16 {
594     type Err = ParseFloatError;
from_str(src: &str) -> Result<bf16, ParseFloatError>595     fn from_str(src: &str) -> Result<bf16, ParseFloatError> {
596         f32::from_str(src).map(bf16::from_f32)
597     }
598 }
599 
600 impl Debug for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>601     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
602         write!(f, "{:?}", self.to_f32())
603     }
604 }
605 
606 impl Display for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>607     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
608         write!(f, "{}", self.to_f32())
609     }
610 }
611 
612 impl LowerExp for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>613     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
614         write!(f, "{:e}", self.to_f32())
615     }
616 }
617 
618 impl UpperExp for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>619     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
620         write!(f, "{:E}", self.to_f32())
621     }
622 }
623 
624 impl Binary for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>625     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
626         write!(f, "{:b}", self.0)
627     }
628 }
629 
630 impl Octal for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>631     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
632         write!(f, "{:o}", self.0)
633     }
634 }
635 
636 impl LowerHex for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>637     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
638         write!(f, "{:x}", self.0)
639     }
640 }
641 
642 impl UpperHex for bf16 {
fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>643     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
644         write!(f, "{:X}", self.0)
645     }
646 }
647 
648 #[cfg(feature = "num-traits")]
649 mod impl_num_traits {
650     use super::bf16;
651     use num_traits::{FromPrimitive, ToPrimitive};
652 
653     impl ToPrimitive for bf16 {
to_i64(&self) -> Option<i64>654         fn to_i64(&self) -> Option<i64> {
655             Self::to_f32(*self).to_i64()
656         }
to_u64(&self) -> Option<u64>657         fn to_u64(&self) -> Option<u64> {
658             Self::to_f32(*self).to_u64()
659         }
to_i8(&self) -> Option<i8>660         fn to_i8(&self) -> Option<i8> {
661             Self::to_f32(*self).to_i8()
662         }
to_u8(&self) -> Option<u8>663         fn to_u8(&self) -> Option<u8> {
664             Self::to_f32(*self).to_u8()
665         }
to_i16(&self) -> Option<i16>666         fn to_i16(&self) -> Option<i16> {
667             Self::to_f32(*self).to_i16()
668         }
to_u16(&self) -> Option<u16>669         fn to_u16(&self) -> Option<u16> {
670             Self::to_f32(*self).to_u16()
671         }
to_i32(&self) -> Option<i32>672         fn to_i32(&self) -> Option<i32> {
673             Self::to_f32(*self).to_i32()
674         }
to_u32(&self) -> Option<u32>675         fn to_u32(&self) -> Option<u32> {
676             Self::to_f32(*self).to_u32()
677         }
to_f32(&self) -> Option<f32>678         fn to_f32(&self) -> Option<f32> {
679             Some(Self::to_f32(*self))
680         }
to_f64(&self) -> Option<f64>681         fn to_f64(&self) -> Option<f64> {
682             Some(Self::to_f64(*self))
683         }
684     }
685 
686     impl FromPrimitive for bf16 {
from_i64(n: i64) -> Option<Self>687         fn from_i64(n: i64) -> Option<Self> {
688             n.to_f32().map(|x| Self::from_f32(x))
689         }
from_u64(n: u64) -> Option<Self>690         fn from_u64(n: u64) -> Option<Self> {
691             n.to_f32().map(|x| Self::from_f32(x))
692         }
from_i8(n: i8) -> Option<Self>693         fn from_i8(n: i8) -> Option<Self> {
694             n.to_f32().map(|x| Self::from_f32(x))
695         }
from_u8(n: u8) -> Option<Self>696         fn from_u8(n: u8) -> Option<Self> {
697             n.to_f32().map(|x| Self::from_f32(x))
698         }
from_i16(n: i16) -> Option<Self>699         fn from_i16(n: i16) -> Option<Self> {
700             n.to_f32().map(|x| Self::from_f32(x))
701         }
from_u16(n: u16) -> Option<Self>702         fn from_u16(n: u16) -> Option<Self> {
703             n.to_f32().map(|x| Self::from_f32(x))
704         }
from_i32(n: i32) -> Option<Self>705         fn from_i32(n: i32) -> Option<Self> {
706             n.to_f32().map(|x| Self::from_f32(x))
707         }
from_u32(n: u32) -> Option<Self>708         fn from_u32(n: u32) -> Option<Self> {
709             n.to_f32().map(|x| Self::from_f32(x))
710         }
from_f32(n: f32) -> Option<Self>711         fn from_f32(n: f32) -> Option<Self> {
712             n.to_f32().map(|x| Self::from_f32(x))
713         }
from_f64(n: f64) -> Option<Self>714         fn from_f64(n: f64) -> Option<Self> {
715             n.to_f64().map(|x| Self::from_f64(x))
716         }
717     }
718 }
719 
720 #[allow(
721     clippy::cognitive_complexity,
722     clippy::float_cmp,
723     clippy::neg_cmp_op_on_partial_ord
724 )]
725 #[cfg(test)]
726 mod test {
727     use super::*;
728     use core;
729     use core::cmp::Ordering;
730     use quickcheck_macros::quickcheck;
731 
732     #[test]
test_bf16_consts_from_f32()733     fn test_bf16_consts_from_f32() {
734         let one = bf16::from_f32(1.0);
735         let zero = bf16::from_f32(0.0);
736         let neg_zero = bf16::from_f32(-0.0);
737         let inf = bf16::from_f32(core::f32::INFINITY);
738         let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY);
739         let nan = bf16::from_f32(core::f32::NAN);
740 
741         assert_eq!(bf16::ONE, one);
742         assert_eq!(bf16::ZERO, zero);
743         assert_eq!(bf16::NEG_ZERO, neg_zero);
744         assert_eq!(bf16::INFINITY, inf);
745         assert_eq!(bf16::NEG_INFINITY, neg_inf);
746         assert!(nan.is_nan());
747         assert!(bf16::NAN.is_nan());
748 
749         let e = bf16::from_f32(core::f32::consts::E);
750         let pi = bf16::from_f32(core::f32::consts::PI);
751         let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI);
752         let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2);
753         let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI);
754         let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI);
755         let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2);
756         let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3);
757         let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4);
758         let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6);
759         let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8);
760         let ln_10 = bf16::from_f32(core::f32::consts::LN_10);
761         let ln_2 = bf16::from_f32(core::f32::consts::LN_2);
762         let log10_e = bf16::from_f32(core::f32::consts::LOG10_E);
763         // core::f32::consts::LOG10_2 requires rustc 1.43.0
764         let log10_2 = bf16::from_f32(2f32.log10());
765         let log2_e = bf16::from_f32(core::f32::consts::LOG2_E);
766         // core::f32::consts::LOG2_10 requires rustc 1.43.0
767         let log2_10 = bf16::from_f32(10f32.log2());
768         let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2);
769 
770         assert_eq!(bf16::E, e);
771         assert_eq!(bf16::PI, pi);
772         assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
773         assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
774         assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
775         assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
776         assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
777         assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
778         assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
779         assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
780         assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
781         assert_eq!(bf16::LN_10, ln_10);
782         assert_eq!(bf16::LN_2, ln_2);
783         assert_eq!(bf16::LOG10_E, log10_e);
784         assert_eq!(bf16::LOG10_2, log10_2);
785         assert_eq!(bf16::LOG2_E, log2_e);
786         assert_eq!(bf16::LOG2_10, log2_10);
787         assert_eq!(bf16::SQRT_2, sqrt_2);
788     }
789 
790     #[test]
test_bf16_consts_from_f64()791     fn test_bf16_consts_from_f64() {
792         let one = bf16::from_f64(1.0);
793         let zero = bf16::from_f64(0.0);
794         let neg_zero = bf16::from_f64(-0.0);
795         let inf = bf16::from_f64(core::f64::INFINITY);
796         let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY);
797         let nan = bf16::from_f64(core::f64::NAN);
798 
799         assert_eq!(bf16::ONE, one);
800         assert_eq!(bf16::ZERO, zero);
801         assert_eq!(bf16::NEG_ZERO, neg_zero);
802         assert_eq!(bf16::INFINITY, inf);
803         assert_eq!(bf16::NEG_INFINITY, neg_inf);
804         assert!(nan.is_nan());
805         assert!(bf16::NAN.is_nan());
806 
807         let e = bf16::from_f64(core::f64::consts::E);
808         let pi = bf16::from_f64(core::f64::consts::PI);
809         let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI);
810         let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2);
811         let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI);
812         let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI);
813         let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2);
814         let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3);
815         let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4);
816         let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6);
817         let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8);
818         let ln_10 = bf16::from_f64(core::f64::consts::LN_10);
819         let ln_2 = bf16::from_f64(core::f64::consts::LN_2);
820         let log10_e = bf16::from_f64(core::f64::consts::LOG10_E);
821         // core::f64::consts::LOG10_2 requires rustc 1.43.0
822         let log10_2 = bf16::from_f64(2f64.log10());
823         let log2_e = bf16::from_f64(core::f64::consts::LOG2_E);
824         // core::f64::consts::LOG2_10 requires rustc 1.43.0
825         let log2_10 = bf16::from_f64(10f64.log2());
826         let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2);
827 
828         assert_eq!(bf16::E, e);
829         assert_eq!(bf16::PI, pi);
830         assert_eq!(bf16::FRAC_1_PI, frac_1_pi);
831         assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2);
832         assert_eq!(bf16::FRAC_2_PI, frac_2_pi);
833         assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi);
834         assert_eq!(bf16::FRAC_PI_2, frac_pi_2);
835         assert_eq!(bf16::FRAC_PI_3, frac_pi_3);
836         assert_eq!(bf16::FRAC_PI_4, frac_pi_4);
837         assert_eq!(bf16::FRAC_PI_6, frac_pi_6);
838         assert_eq!(bf16::FRAC_PI_8, frac_pi_8);
839         assert_eq!(bf16::LN_10, ln_10);
840         assert_eq!(bf16::LN_2, ln_2);
841         assert_eq!(bf16::LOG10_E, log10_e);
842         assert_eq!(bf16::LOG10_2, log10_2);
843         assert_eq!(bf16::LOG2_E, log2_e);
844         assert_eq!(bf16::LOG2_10, log2_10);
845         assert_eq!(bf16::SQRT_2, sqrt_2);
846     }
847 
848     #[test]
test_nan_conversion_to_smaller()849     fn test_nan_conversion_to_smaller() {
850         let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64);
851         let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64);
852         let nan32 = f32::from_bits(0x7F80_0001u32);
853         let neg_nan32 = f32::from_bits(0xFF80_0001u32);
854         let nan32_from_64 = nan64 as f32;
855         let neg_nan32_from_64 = neg_nan64 as f32;
856         let nan16_from_64 = bf16::from_f64(nan64);
857         let neg_nan16_from_64 = bf16::from_f64(neg_nan64);
858         let nan16_from_32 = bf16::from_f32(nan32);
859         let neg_nan16_from_32 = bf16::from_f32(neg_nan32);
860 
861         assert!(nan64.is_nan() && nan64.is_sign_positive());
862         assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative());
863         assert!(nan32.is_nan() && nan32.is_sign_positive());
864         assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
865         assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive());
866         assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative());
867         assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive());
868         assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative());
869         assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive());
870         assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative());
871     }
872 
873     #[test]
test_nan_conversion_to_larger()874     fn test_nan_conversion_to_larger() {
875         let nan16 = bf16::from_bits(0x7F81u16);
876         let neg_nan16 = bf16::from_bits(0xFF81u16);
877         let nan32 = f32::from_bits(0x7F80_0001u32);
878         let neg_nan32 = f32::from_bits(0xFF80_0001u32);
879         let nan32_from_16 = f32::from(nan16);
880         let neg_nan32_from_16 = f32::from(neg_nan16);
881         let nan64_from_16 = f64::from(nan16);
882         let neg_nan64_from_16 = f64::from(neg_nan16);
883         let nan64_from_32 = f64::from(nan32);
884         let neg_nan64_from_32 = f64::from(neg_nan32);
885 
886         assert!(nan16.is_nan() && nan16.is_sign_positive());
887         assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative());
888         assert!(nan32.is_nan() && nan32.is_sign_positive());
889         assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative());
890         assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive());
891         assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative());
892         assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive());
893         assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative());
894         assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive());
895         assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative());
896     }
897 
898     #[test]
test_bf16_to_f32()899     fn test_bf16_to_f32() {
900         let f = bf16::from_f32(7.0);
901         assert_eq!(f.to_f32(), 7.0f32);
902 
903         // 7.1 is NOT exactly representable in 16-bit, it's rounded
904         let f = bf16::from_f32(7.1);
905         let diff = (f.to_f32() - 7.1f32).abs();
906         // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
907         assert!(diff <= 4.0 * bf16::EPSILON.to_f32());
908 
909         let tiny32 = f32::from_bits(0x0001_0000u32);
910         assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32);
911         assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32);
912 
913         assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32));
914         assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32));
915     }
916 
917     #[test]
test_bf16_to_f64()918     fn test_bf16_to_f64() {
919         let f = bf16::from_f64(7.0);
920         assert_eq!(f.to_f64(), 7.0f64);
921 
922         // 7.1 is NOT exactly representable in 16-bit, it's rounded
923         let f = bf16::from_f64(7.1);
924         let diff = (f.to_f64() - 7.1f64).abs();
925         // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1
926         assert!(diff <= 4.0 * bf16::EPSILON.to_f64());
927 
928         let tiny64 = 2.0f64.powi(-133);
929         assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64);
930         assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64);
931 
932         assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64));
933         assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64));
934     }
935 
936     #[test]
test_comparisons()937     fn test_comparisons() {
938         let zero = bf16::from_f64(0.0);
939         let one = bf16::from_f64(1.0);
940         let neg_zero = bf16::from_f64(-0.0);
941         let neg_one = bf16::from_f64(-1.0);
942 
943         assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal));
944         assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal));
945         assert!(zero == neg_zero);
946         assert!(neg_zero == zero);
947         assert!(!(zero != neg_zero));
948         assert!(!(neg_zero != zero));
949         assert!(!(zero < neg_zero));
950         assert!(!(neg_zero < zero));
951         assert!(zero <= neg_zero);
952         assert!(neg_zero <= zero);
953         assert!(!(zero > neg_zero));
954         assert!(!(neg_zero > zero));
955         assert!(zero >= neg_zero);
956         assert!(neg_zero >= zero);
957 
958         assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater));
959         assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less));
960         assert!(!(one == neg_zero));
961         assert!(!(neg_zero == one));
962         assert!(one != neg_zero);
963         assert!(neg_zero != one);
964         assert!(!(one < neg_zero));
965         assert!(neg_zero < one);
966         assert!(!(one <= neg_zero));
967         assert!(neg_zero <= one);
968         assert!(one > neg_zero);
969         assert!(!(neg_zero > one));
970         assert!(one >= neg_zero);
971         assert!(!(neg_zero >= one));
972 
973         assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater));
974         assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less));
975         assert!(!(one == neg_one));
976         assert!(!(neg_one == one));
977         assert!(one != neg_one);
978         assert!(neg_one != one);
979         assert!(!(one < neg_one));
980         assert!(neg_one < one);
981         assert!(!(one <= neg_one));
982         assert!(neg_one <= one);
983         assert!(one > neg_one);
984         assert!(!(neg_one > one));
985         assert!(one >= neg_one);
986         assert!(!(neg_one >= one));
987     }
988 
989     #[test]
990     #[allow(clippy::erasing_op, clippy::identity_op)]
round_to_even_f32()991     fn round_to_even_f32() {
992         // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
993         let min_sub = bf16::from_bits(1);
994         let min_sub_f = (-133f32).exp2();
995         assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits());
996         assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits());
997 
998         // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
999         // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1000         // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1001         assert_eq!(
1002             bf16::from_f32(min_sub_f * 0.49).to_bits(),
1003             min_sub.to_bits() * 0
1004         );
1005         assert_eq!(
1006             bf16::from_f32(min_sub_f * 0.50).to_bits(),
1007             min_sub.to_bits() * 0
1008         );
1009         assert_eq!(
1010             bf16::from_f32(min_sub_f * 0.51).to_bits(),
1011             min_sub.to_bits() * 1
1012         );
1013 
1014         // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1015         // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1016         // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1017         assert_eq!(
1018             bf16::from_f32(min_sub_f * 1.49).to_bits(),
1019             min_sub.to_bits() * 1
1020         );
1021         assert_eq!(
1022             bf16::from_f32(min_sub_f * 1.50).to_bits(),
1023             min_sub.to_bits() * 2
1024         );
1025         assert_eq!(
1026             bf16::from_f32(min_sub_f * 1.51).to_bits(),
1027             min_sub.to_bits() * 2
1028         );
1029 
1030         // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1031         // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1032         // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1033         assert_eq!(
1034             bf16::from_f32(min_sub_f * 2.49).to_bits(),
1035             min_sub.to_bits() * 2
1036         );
1037         assert_eq!(
1038             bf16::from_f32(min_sub_f * 2.50).to_bits(),
1039             min_sub.to_bits() * 2
1040         );
1041         assert_eq!(
1042             bf16::from_f32(min_sub_f * 2.51).to_bits(),
1043             min_sub.to_bits() * 3
1044         );
1045 
1046         assert_eq!(
1047             bf16::from_f32(250.49f32).to_bits(),
1048             bf16::from_f32(250.0).to_bits()
1049         );
1050         assert_eq!(
1051             bf16::from_f32(250.50f32).to_bits(),
1052             bf16::from_f32(250.0).to_bits()
1053         );
1054         assert_eq!(
1055             bf16::from_f32(250.51f32).to_bits(),
1056             bf16::from_f32(251.0).to_bits()
1057         );
1058         assert_eq!(
1059             bf16::from_f32(251.49f32).to_bits(),
1060             bf16::from_f32(251.0).to_bits()
1061         );
1062         assert_eq!(
1063             bf16::from_f32(251.50f32).to_bits(),
1064             bf16::from_f32(252.0).to_bits()
1065         );
1066         assert_eq!(
1067             bf16::from_f32(251.51f32).to_bits(),
1068             bf16::from_f32(252.0).to_bits()
1069         );
1070         assert_eq!(
1071             bf16::from_f32(252.49f32).to_bits(),
1072             bf16::from_f32(252.0).to_bits()
1073         );
1074         assert_eq!(
1075             bf16::from_f32(252.50f32).to_bits(),
1076             bf16::from_f32(252.0).to_bits()
1077         );
1078         assert_eq!(
1079             bf16::from_f32(252.51f32).to_bits(),
1080             bf16::from_f32(253.0).to_bits()
1081         );
1082     }
1083 
1084     #[test]
1085     #[allow(clippy::erasing_op, clippy::identity_op)]
round_to_even_f64()1086     fn round_to_even_f64() {
1087         // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133
1088         let min_sub = bf16::from_bits(1);
1089         let min_sub_f = (-133f64).exp2();
1090         assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits());
1091         assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits());
1092 
1093         // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding)
1094         // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even)
1095         // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up)
1096         assert_eq!(
1097             bf16::from_f64(min_sub_f * 0.49).to_bits(),
1098             min_sub.to_bits() * 0
1099         );
1100         assert_eq!(
1101             bf16::from_f64(min_sub_f * 0.50).to_bits(),
1102             min_sub.to_bits() * 0
1103         );
1104         assert_eq!(
1105             bf16::from_f64(min_sub_f * 0.51).to_bits(),
1106             min_sub.to_bits() * 1
1107         );
1108 
1109         // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding)
1110         // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even)
1111         // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up)
1112         assert_eq!(
1113             bf16::from_f64(min_sub_f * 1.49).to_bits(),
1114             min_sub.to_bits() * 1
1115         );
1116         assert_eq!(
1117             bf16::from_f64(min_sub_f * 1.50).to_bits(),
1118             min_sub.to_bits() * 2
1119         );
1120         assert_eq!(
1121             bf16::from_f64(min_sub_f * 1.51).to_bits(),
1122             min_sub.to_bits() * 2
1123         );
1124 
1125         // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding)
1126         // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even)
1127         // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up)
1128         assert_eq!(
1129             bf16::from_f64(min_sub_f * 2.49).to_bits(),
1130             min_sub.to_bits() * 2
1131         );
1132         assert_eq!(
1133             bf16::from_f64(min_sub_f * 2.50).to_bits(),
1134             min_sub.to_bits() * 2
1135         );
1136         assert_eq!(
1137             bf16::from_f64(min_sub_f * 2.51).to_bits(),
1138             min_sub.to_bits() * 3
1139         );
1140 
1141         assert_eq!(
1142             bf16::from_f64(250.49f64).to_bits(),
1143             bf16::from_f64(250.0).to_bits()
1144         );
1145         assert_eq!(
1146             bf16::from_f64(250.50f64).to_bits(),
1147             bf16::from_f64(250.0).to_bits()
1148         );
1149         assert_eq!(
1150             bf16::from_f64(250.51f64).to_bits(),
1151             bf16::from_f64(251.0).to_bits()
1152         );
1153         assert_eq!(
1154             bf16::from_f64(251.49f64).to_bits(),
1155             bf16::from_f64(251.0).to_bits()
1156         );
1157         assert_eq!(
1158             bf16::from_f64(251.50f64).to_bits(),
1159             bf16::from_f64(252.0).to_bits()
1160         );
1161         assert_eq!(
1162             bf16::from_f64(251.51f64).to_bits(),
1163             bf16::from_f64(252.0).to_bits()
1164         );
1165         assert_eq!(
1166             bf16::from_f64(252.49f64).to_bits(),
1167             bf16::from_f64(252.0).to_bits()
1168         );
1169         assert_eq!(
1170             bf16::from_f64(252.50f64).to_bits(),
1171             bf16::from_f64(252.0).to_bits()
1172         );
1173         assert_eq!(
1174             bf16::from_f64(252.51f64).to_bits(),
1175             bf16::from_f64(253.0).to_bits()
1176         );
1177     }
1178 
1179     impl quickcheck::Arbitrary for bf16 {
arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self1180         fn arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self {
1181             use rand::Rng;
1182             bf16(g.gen())
1183         }
1184     }
1185 
1186     #[quickcheck]
qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool1187     fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool {
1188         let roundtrip = bf16::from_f32(f.to_f32());
1189         if f.is_nan() {
1190             roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1191         } else {
1192             f.0 == roundtrip.0
1193         }
1194     }
1195 
1196     #[quickcheck]
qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool1197     fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool {
1198         let roundtrip = bf16::from_f64(f.to_f64());
1199         if f.is_nan() {
1200             roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative()
1201         } else {
1202             f.0 == roundtrip.0
1203         }
1204     }
1205 }
1206