1 //! Parses hexadecimal float literals.
2 //! There are two functions `parse_hexf32` and `parse_hexf64` provided for each type.
3 //!
4 //! ```rust
5 //! use hexf_parse::*;
6 //! assert_eq!(parse_hexf32("0x1.99999ap-4", false), Ok(0.1f32));
7 //! assert_eq!(parse_hexf64("0x1.999999999999ap-4", false), Ok(0.1f64));
8 //! ```
9 //!
10 //! An additional `bool` parameter can be set to true if you want to allow underscores.
11 //!
12 //! ```rust
13 //! use hexf_parse::*;
14 //! assert!(parse_hexf64("0x0.1_7p8", false).is_err());
15 //! assert_eq!(parse_hexf64("0x0.1_7p8", true), Ok(23.0f64));
16 //! ```
17 //!
18 //! The error is reported via an opaque `ParseHexfError` type.
19
20 use std::{f32, f64, fmt, isize, str};
21
22 /// An opaque error type from `parse_hexf32` and `parse_hexf64`.
23 #[derive(Debug, Clone, PartialEq, Eq)]
24 pub struct ParseHexfError {
25 kind: ParseHexfErrorKind,
26 }
27
28 #[derive(Debug, Clone, PartialEq, Eq)]
29 enum ParseHexfErrorKind {
30 Empty,
31 Invalid,
32 Inexact,
33 }
34
35 const EMPTY: ParseHexfError = ParseHexfError {
36 kind: ParseHexfErrorKind::Empty,
37 };
38 const INVALID: ParseHexfError = ParseHexfError {
39 kind: ParseHexfErrorKind::Invalid,
40 };
41 const INEXACT: ParseHexfError = ParseHexfError {
42 kind: ParseHexfErrorKind::Inexact,
43 };
44
45 impl ParseHexfError {
text(&self) -> &'static str46 fn text(&self) -> &'static str {
47 match self.kind {
48 ParseHexfErrorKind::Empty => "cannot parse float from empty string",
49 ParseHexfErrorKind::Invalid => "invalid hexadecimal float literal",
50 ParseHexfErrorKind::Inexact => "cannot exactly represent float in target type",
51 }
52 }
53 }
54
55 impl fmt::Display for ParseHexfError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result56 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57 fmt::Display::fmt(self.text(), f)
58 }
59 }
60
61 impl std::error::Error for ParseHexfError {
description(&self) -> &'static str62 fn description(&self) -> &'static str {
63 self.text()
64 }
65 }
66
parse(s: &[u8], allow_underscore: bool) -> Result<(bool, u64, isize), ParseHexfError>67 fn parse(s: &[u8], allow_underscore: bool) -> Result<(bool, u64, isize), ParseHexfError> {
68 // ^[+-]?
69 let (s, negative) = match s.split_first() {
70 Some((&b'+', s)) => (s, false),
71 Some((&b'-', s)) => (s, true),
72 Some(_) => (s, false),
73 None => return Err(EMPTY),
74 };
75
76 // 0[xX]
77 if !(s.starts_with(b"0x") || s.starts_with(b"0X")) {
78 return Err(INVALID);
79 }
80
81 // ([0-9a-fA-F][0-9a-fA-F_]*)?
82 let mut s = &s[2..];
83 let mut acc = 0; // the accumulated mantissa
84 let mut digit_seen = false;
85 loop {
86 let (s_, digit) = match s.split_first() {
87 Some((&c @ b'0'..=b'9', s)) => (s, c - b'0'),
88 Some((&c @ b'a'..=b'f', s)) => (s, c - b'a' + 10),
89 Some((&c @ b'A'..=b'F', s)) => (s, c - b'A' + 10),
90 Some((&b'_', s_)) if allow_underscore && digit_seen => {
91 s = s_;
92 continue;
93 }
94 _ => break,
95 };
96
97 s = s_;
98 digit_seen = true;
99
100 // if `acc << 4` fails, mantissa definitely exceeds 64 bits so we should bail out
101 if acc >> 60 != 0 {
102 return Err(INEXACT);
103 }
104 acc = acc << 4 | digit as u64;
105 }
106
107 // (\.[0-9a-fA-F][0-9a-fA-F_]*)?
108 // we want to ignore trailing zeroes but shifting at each digit will overflow first.
109 // therefore we separately count the number of zeroes and flush it on non-zero digits.
110 let mut nfracs = 0isize; // this is suboptimal but also practical, see below
111 let mut nzeroes = 0isize;
112 let mut frac_digit_seen = false;
113 if s.starts_with(b".") {
114 s = &s[1..];
115 loop {
116 let (s_, digit) = match s.split_first() {
117 Some((&c @ b'0'..=b'9', s)) => (s, c - b'0'),
118 Some((&c @ b'a'..=b'f', s)) => (s, c - b'a' + 10),
119 Some((&c @ b'A'..=b'F', s)) => (s, c - b'A' + 10),
120 Some((&b'_', s_)) if allow_underscore && frac_digit_seen => {
121 s = s_;
122 continue;
123 }
124 _ => break,
125 };
126
127 s = s_;
128 frac_digit_seen = true;
129
130 if digit == 0 {
131 nzeroes = nzeroes.checked_add(1).ok_or(INEXACT)?;
132 } else {
133 // flush nzeroes
134 let nnewdigits = nzeroes.checked_add(1).ok_or(INEXACT)?;
135 nfracs = nfracs.checked_add(nnewdigits).ok_or(INEXACT)?;
136 nzeroes = 0;
137
138 // if the accumulator is non-zero, the shift cannot exceed 64
139 // (therefore the number of new digits cannot exceed 16).
140 // this will catch e.g. `0.40000....00001` with sufficiently many zeroes
141 if acc != 0 {
142 if nnewdigits >= 16 || acc >> (64 - nnewdigits * 4) != 0 {
143 return Err(INEXACT);
144 }
145 acc = acc << (nnewdigits * 4);
146 }
147 acc |= digit as u64;
148 }
149 }
150 }
151
152 // at least one digit should be present
153 if !(digit_seen || frac_digit_seen) {
154 return Err(INVALID);
155 }
156
157 // [pP]
158 let s = match s.split_first() {
159 Some((&b'P', s)) | Some((&b'p', s)) => s,
160 _ => return Err(INVALID),
161 };
162
163 // [+-]?
164 let (mut s, negative_exponent) = match s.split_first() {
165 Some((&b'+', s)) => (s, false),
166 Some((&b'-', s)) => (s, true),
167 Some(_) => (s, false),
168 None => return Err(INVALID),
169 };
170
171 // [0-9_]*[0-9][0-9_]*$
172 let mut digit_seen = false;
173 let mut exponent = 0isize; // this is suboptimal but also practical, see below
174 loop {
175 let (s_, digit) = match s.split_first() {
176 Some((&c @ b'0'..=b'9', s)) => (s, c - b'0'),
177 Some((&b'_', s_)) if allow_underscore => {
178 s = s_;
179 continue;
180 }
181 None if digit_seen => break,
182 // no more bytes expected, and at least one exponent digit should be present
183 _ => return Err(INVALID),
184 };
185
186 s = s_;
187 digit_seen = true;
188
189 // if we have no non-zero digits at this point, ignore the exponent :-)
190 if acc != 0 {
191 exponent = exponent
192 .checked_mul(10)
193 .and_then(|v| v.checked_add(digit as isize))
194 .ok_or(INEXACT)?;
195 }
196 }
197 if negative_exponent {
198 exponent = -exponent;
199 }
200
201 if acc == 0 {
202 // ignore the exponent as above
203 Ok((negative, 0, 0))
204 } else {
205 // the exponent should be biased by (nfracs * 4) to match with the mantissa read.
206 // we still miss valid inputs like `0.0000...0001pX` where the input is filling
207 // at least 1/4 of the total addressable memory, but I dare not handle them!
208 let exponent = nfracs
209 .checked_mul(4)
210 .and_then(|v| exponent.checked_sub(v))
211 .ok_or(INEXACT)?;
212 Ok((negative, acc, exponent))
213 }
214 }
215
216 #[test]
test_parse()217 fn test_parse() {
218 assert_eq!(parse(b"", false), Err(EMPTY));
219 assert_eq!(parse(b" ", false), Err(INVALID));
220 assert_eq!(parse(b"3.14", false), Err(INVALID));
221 assert_eq!(parse(b"0x3.14", false), Err(INVALID));
222 assert_eq!(parse(b"0x3.14fp+3", false), Ok((false, 0x314f, 3 - 12)));
223 assert_eq!(parse(b" 0x3.14p+3", false), Err(INVALID));
224 assert_eq!(parse(b"0x3.14p+3 ", false), Err(INVALID));
225 assert_eq!(parse(b"+0x3.14fp+3", false), Ok((false, 0x314f, 3 - 12)));
226 assert_eq!(parse(b"-0x3.14fp+3", false), Ok((true, 0x314f, 3 - 12)));
227 assert_eq!(parse(b"0xAbC.p1", false), Ok((false, 0xabc, 1)));
228 assert_eq!(parse(b"0x0.7p1", false), Ok((false, 0x7, 1 - 4)));
229 assert_eq!(parse(b"0x.dEfP-1", false), Ok((false, 0xdef, -1 - 12)));
230 assert_eq!(parse(b"0x.p1", false), Err(INVALID));
231 assert_eq!(parse(b"0x.P1", false), Err(INVALID));
232 assert_eq!(parse(b"0xp1", false), Err(INVALID));
233 assert_eq!(parse(b"0xP1", false), Err(INVALID));
234 assert_eq!(parse(b"0x0p", false), Err(INVALID));
235 assert_eq!(parse(b"0xp", false), Err(INVALID));
236 assert_eq!(parse(b"0x.p", false), Err(INVALID));
237 assert_eq!(parse(b"0x0p1", false), Ok((false, 0, 0)));
238 assert_eq!(parse(b"0x0P1", false), Ok((false, 0, 0)));
239 assert_eq!(parse(b"0x0.p1", false), Ok((false, 0, 0)));
240 assert_eq!(parse(b"0x0.P1", false), Ok((false, 0, 0)));
241 assert_eq!(parse(b"0x0.0p1", false), Ok((false, 0, 0)));
242 assert_eq!(parse(b"0x0.0P1", false), Ok((false, 0, 0)));
243 assert_eq!(parse(b"0x.0p1", false), Ok((false, 0, 0)));
244 assert_eq!(parse(b"0x.0P1", false), Ok((false, 0, 0)));
245 assert_eq!(parse(b"0x0p0", false), Ok((false, 0, 0)));
246 assert_eq!(parse(b"0x0.p999999999", false), Ok((false, 0, 0)));
247 assert_eq!(
248 parse(b"0x0.p99999999999999999999999999999", false),
249 Ok((false, 0, 0))
250 );
251 assert_eq!(
252 parse(b"0x0.p-99999999999999999999999999999", false),
253 Ok((false, 0, 0))
254 );
255 assert_eq!(
256 parse(b"0x1.p99999999999999999999999999999", false),
257 Err(INEXACT)
258 );
259 assert_eq!(
260 parse(b"0x1.p-99999999999999999999999999999", false),
261 Err(INEXACT)
262 );
263 assert_eq!(
264 parse(b"0x4.00000000000000000000p55", false),
265 Ok((false, 4, 55))
266 );
267 assert_eq!(
268 parse(b"0x4.00001000000000000000p55", false),
269 Ok((false, 0x400001, 55 - 20))
270 );
271 assert_eq!(parse(b"0x4.00000000000000000001p55", false), Err(INEXACT));
272
273 // underscore insertion
274 assert_eq!(
275 parse(b"-0x3____.1_4___p+___5___", true),
276 Ok((true, 0x314, 5 - 8))
277 );
278 assert_eq!(parse(b"-_0x3.14p+5", true), Err(INVALID));
279 assert_eq!(parse(b"_0x3.14p+5", true), Err(INVALID));
280 assert_eq!(parse(b"0x_3.14p+5", true), Err(INVALID));
281 assert_eq!(parse(b"0x3._14p+5", true), Err(INVALID));
282 assert_eq!(parse(b"0x3.14p_+5", true), Err(INVALID));
283 assert_eq!(parse(b"-0x____.1_4___p+___5___", true), Err(INVALID));
284 assert_eq!(parse(b"-0x3____.____p+___5___", true), Err(INVALID));
285 assert_eq!(parse(b"-0x3____.1_4___p+______", true), Err(INVALID));
286 assert_eq!(parse(b"0x_p0", false), Err(INVALID));
287 assert_eq!(parse(b"0x_0p0", true), Err(INVALID));
288 assert_eq!(parse(b"0x_p0", true), Err(INVALID));
289 assert_eq!(parse(b"0x._p0", true), Err(INVALID));
290 assert_eq!(parse(b"0x._0p0", true), Err(INVALID));
291 assert_eq!(parse(b"0x0._0p0", true), Err(INVALID));
292 assert_eq!(parse(b"0x0_p0", true), Ok((false, 0, 0)));
293 assert_eq!(parse(b"0x.0_p0", true), Ok((false, 0, 0)));
294 assert_eq!(parse(b"0x0.0_p0", true), Ok((false, 0, 0)));
295
296 // issues
297 // #11 (https://github.com/lifthrasiir/hexf/issues/11)
298 assert_eq!(parse(b"0x1p-149", false), parse(b"0x1.0p-149", false));
299 }
300
301 macro_rules! define_convert {
302 ($name:ident => $f:ident) => {
303 fn $name(negative: bool, mantissa: u64, exponent: isize) -> Result<$f, ParseHexfError> {
304 // guard the exponent with the definitely safe range (we will exactly bound it later)
305 if exponent < -0xffff || exponent > 0xffff {
306 return Err(INEXACT);
307 }
308
309 // strip the trailing zeroes in mantissa and adjust exponent.
310 // we do this because a unit in the least significant bit of mantissa is
311 // always safe to represent while one in the most significant bit isn't.
312 let trailing = mantissa.trailing_zeros() & 63; // guard mantissa=0 case
313 let mantissa = mantissa >> trailing;
314 let exponent = exponent + trailing as isize;
315
316 // normalize the exponent that the number is (1.xxxx * 2^normalexp),
317 // and check for the mantissa and exponent ranges
318 let leading = mantissa.leading_zeros();
319 let normalexp = exponent + (63 - leading as isize);
320 let mantissasize = if normalexp < $f::MIN_EXP as isize - $f::MANTISSA_DIGITS as isize {
321 // the number is smaller than the minimal denormal number
322 return Err(INEXACT);
323 } else if normalexp < ($f::MIN_EXP - 1) as isize {
324 // the number is denormal, the # of bits in the mantissa is:
325 // - minimum (1) at MIN_EXP - MANTISSA_DIGITS
326 // - maximum (MANTISSA_DIGITS - 1) at MIN_EXP - 2
327 $f::MANTISSA_DIGITS as isize - $f::MIN_EXP as isize + normalexp + 1
328 } else if normalexp < $f::MAX_EXP as isize {
329 // the number is normal, the # of bits in the mantissa is fixed
330 $f::MANTISSA_DIGITS as isize
331 } else {
332 // the number is larger than the maximal denormal number
333 // ($f::MAX_EXP denotes NaN and infinities here)
334 return Err(INEXACT);
335 };
336
337 if mantissa >> mantissasize == 0 {
338 let mut mantissa = mantissa as $f;
339 if negative {
340 mantissa = -mantissa;
341 }
342 // yes, powi somehow does not work!
343 Ok(mantissa * (2.0 as $f).powf(exponent as $f))
344 } else {
345 Err(INEXACT)
346 }
347 }
348 };
349 }
350
351 define_convert!(convert_hexf32 => f32);
352 define_convert!(convert_hexf64 => f64);
353
354 #[test]
test_convert_hexf32()355 fn test_convert_hexf32() {
356 assert_eq!(convert_hexf32(false, 0, 0), Ok(0.0));
357 assert_eq!(convert_hexf32(false, 1, 0), Ok(1.0));
358 assert_eq!(convert_hexf32(false, 10, 0), Ok(10.0));
359 assert_eq!(convert_hexf32(false, 10, 1), Ok(20.0));
360 assert_eq!(convert_hexf32(false, 10, -1), Ok(5.0));
361 assert_eq!(convert_hexf32(true, 0, 0), Ok(-0.0));
362 assert_eq!(convert_hexf32(true, 1, 0), Ok(-1.0));
363
364 // negative zeroes
365 assert_eq!(convert_hexf32(false, 0, 0).unwrap().signum(), 1.0);
366 assert_eq!(convert_hexf32(true, 0, 0).unwrap().signum(), -1.0);
367
368 // normal truncation
369 assert_eq!(
370 convert_hexf32(false, 0x0000_0000_00ff_ffff, 0),
371 Ok(16777215.0)
372 );
373 assert_eq!(
374 convert_hexf32(false, 0x0000_0000_01ff_ffff, 0),
375 Err(INEXACT)
376 );
377 assert_eq!(
378 convert_hexf32(false, 0xffff_ff00_0000_0000, -40),
379 Ok(16777215.0)
380 );
381 assert_eq!(
382 convert_hexf32(false, 0xffff_ff80_0000_0000, -40),
383 Err(INEXACT)
384 );
385
386 // denormal truncation
387 assert!(convert_hexf32(false, 0x0000_0000_007f_ffff, -149).is_ok());
388 assert!(convert_hexf32(false, 0x0000_0000_00ff_ffff, -150).is_err());
389 assert!(convert_hexf32(false, 0x0000_0000_00ff_fffe, -150).is_ok());
390 assert!(convert_hexf32(false, 0xffff_ff00_0000_0000, -190).is_err());
391 assert!(convert_hexf32(false, 0xffff_fe00_0000_0000, -190).is_ok());
392
393 // minimum
394 assert!(convert_hexf32(false, 0x0000_0000_0000_0001, -149).is_ok());
395 assert!(convert_hexf32(false, 0x0000_0000_0000_0001, -150).is_err());
396 assert!(convert_hexf32(false, 0x0000_0000_0000_0002, -150).is_ok());
397 assert!(convert_hexf32(false, 0x0000_0000_0000_0002, -151).is_err());
398 assert!(convert_hexf32(false, 0x0000_0000_0000_0003, -150).is_err());
399 assert!(convert_hexf32(false, 0x0000_0000_0000_0003, -151).is_err());
400 assert!(convert_hexf32(false, 0x8000_0000_0000_0000, -212).is_ok());
401 assert!(convert_hexf32(false, 0x8000_0000_0000_0000, -213).is_err());
402
403 // maximum
404 assert_eq!(
405 convert_hexf32(false, 0x0000_0000_00ff_ffff, 104),
406 Ok(f32::MAX)
407 );
408 assert_eq!(
409 convert_hexf32(false, 0x0000_0000_01ff_ffff, 104),
410 Err(INEXACT)
411 );
412 assert_eq!(
413 convert_hexf32(false, 0x0000_0000_01ff_fffe, 104),
414 Err(INEXACT)
415 );
416 assert_eq!(
417 convert_hexf32(false, 0x0000_0000_0000_0001, 128),
418 Err(INEXACT)
419 );
420 assert_eq!(
421 convert_hexf32(false, 0x8000_0000_0000_0000, 65),
422 Err(INEXACT)
423 );
424 assert_eq!(
425 convert_hexf32(false, 0xffff_ff00_0000_0000, 64),
426 Ok(f32::MAX)
427 );
428 assert_eq!(
429 convert_hexf32(false, 0xffff_ff80_0000_0000, 64),
430 Err(INEXACT)
431 );
432 }
433
434 #[test]
test_convert_hexf64()435 fn test_convert_hexf64() {
436 assert_eq!(convert_hexf64(false, 0, 0), Ok(0.0));
437 assert_eq!(convert_hexf64(false, 1, 0), Ok(1.0));
438 assert_eq!(convert_hexf64(false, 10, 0), Ok(10.0));
439 assert_eq!(convert_hexf64(false, 10, 1), Ok(20.0));
440 assert_eq!(convert_hexf64(false, 10, -1), Ok(5.0));
441 assert_eq!(convert_hexf64(true, 0, 0), Ok(-0.0));
442 assert_eq!(convert_hexf64(true, 1, 0), Ok(-1.0));
443
444 // negative zeroes
445 assert_eq!(convert_hexf64(false, 0, 0).unwrap().signum(), 1.0);
446 assert_eq!(convert_hexf64(true, 0, 0).unwrap().signum(), -1.0);
447
448 // normal truncation
449 assert_eq!(
450 convert_hexf64(false, 0x001f_ffff_ffff_ffff, 0),
451 Ok(9007199254740991.0)
452 );
453 assert_eq!(
454 convert_hexf64(false, 0x003f_ffff_ffff_ffff, 0),
455 Err(INEXACT)
456 );
457 assert_eq!(
458 convert_hexf64(false, 0xffff_ffff_ffff_f800, -11),
459 Ok(9007199254740991.0)
460 );
461 assert_eq!(
462 convert_hexf64(false, 0xffff_ffff_ffff_fc00, -11),
463 Err(INEXACT)
464 );
465
466 // denormal truncation
467 assert!(convert_hexf64(false, 0x000f_ffff_ffff_ffff, -1074).is_ok());
468 assert!(convert_hexf64(false, 0x001f_ffff_ffff_ffff, -1075).is_err());
469 assert!(convert_hexf64(false, 0x001f_ffff_ffff_fffe, -1075).is_ok());
470 assert!(convert_hexf64(false, 0xffff_ffff_ffff_f800, -1086).is_err());
471 assert!(convert_hexf64(false, 0xffff_ffff_ffff_f000, -1086).is_ok());
472
473 // minimum
474 assert!(convert_hexf64(false, 0x0000_0000_0000_0001, -1074).is_ok());
475 assert!(convert_hexf64(false, 0x0000_0000_0000_0001, -1075).is_err());
476 assert!(convert_hexf64(false, 0x0000_0000_0000_0002, -1075).is_ok());
477 assert!(convert_hexf64(false, 0x0000_0000_0000_0002, -1076).is_err());
478 assert!(convert_hexf64(false, 0x0000_0000_0000_0003, -1075).is_err());
479 assert!(convert_hexf64(false, 0x0000_0000_0000_0003, -1076).is_err());
480 assert!(convert_hexf64(false, 0x8000_0000_0000_0000, -1137).is_ok());
481 assert!(convert_hexf64(false, 0x8000_0000_0000_0000, -1138).is_err());
482
483 // maximum
484 assert_eq!(
485 convert_hexf64(false, 0x001f_ffff_ffff_ffff, 971),
486 Ok(f64::MAX)
487 );
488 assert_eq!(
489 convert_hexf64(false, 0x003f_ffff_ffff_ffff, 971),
490 Err(INEXACT)
491 );
492 assert_eq!(
493 convert_hexf64(false, 0x003f_ffff_ffff_fffe, 971),
494 Err(INEXACT)
495 );
496 assert_eq!(
497 convert_hexf32(false, 0x0000_0000_0000_0001, 1024),
498 Err(INEXACT)
499 );
500 assert_eq!(
501 convert_hexf32(false, 0x8000_0000_0000_0000, 961),
502 Err(INEXACT)
503 );
504 assert_eq!(
505 convert_hexf64(false, 0xffff_ffff_ffff_f800, 960),
506 Ok(f64::MAX)
507 );
508 assert_eq!(
509 convert_hexf64(false, 0xffff_ffff_ffff_fc00, 960),
510 Err(INEXACT)
511 );
512 }
513
514 /// Tries to parse a hexadecimal float literal to `f32`.
515 /// The underscore is allowed only when `allow_underscore` is true.
parse_hexf32(s: &str, allow_underscore: bool) -> Result<f32, ParseHexfError>516 pub fn parse_hexf32(s: &str, allow_underscore: bool) -> Result<f32, ParseHexfError> {
517 let (negative, mantissa, exponent) = parse(s.as_bytes(), allow_underscore)?;
518 convert_hexf32(negative, mantissa, exponent)
519 }
520
521 /// Tries to parse a hexadecimal float literal to `f64`.
522 /// The underscore is allowed only when `allow_underscore` is true.
parse_hexf64(s: &str, allow_underscore: bool) -> Result<f64, ParseHexfError>523 pub fn parse_hexf64(s: &str, allow_underscore: bool) -> Result<f64, ParseHexfError> {
524 let (negative, mantissa, exponent) = parse(s.as_bytes(), allow_underscore)?;
525 convert_hexf64(negative, mantissa, exponent)
526 }
527
528 #[test]
test_parse_hexf()529 fn test_parse_hexf() {
530 // issues
531 // #6 (https://github.com/lifthrasiir/hexf/issues/6)
532 assert!(parse_hexf64("0x.000000000000000000102", false).is_err());
533 }
534