1 //! Lossy algorithms for string-to-float conversions.
2 
3 use atoi;
4 use util::*;
5 use super::exponent::parse_exponent;
6 
7 // FRACTION
8 
9 type Wrapped = WrappedFloat<f64>;
10 
11 /// Parse the integer portion of a positive, normal float string.
12 ///
13 /// Use a float since for large numbers, this may even overflow a u64.
14 #[inline]
parse_integer<'a>(radix: u32, bytes: &'a [u8]) -> (f64, &'a [u8])15 fn parse_integer<'a>(radix: u32, bytes: &'a [u8])
16     -> (f64, &'a [u8])
17 {
18     // Trim leading zeros, since we haven't parsed anything yet.
19     let bytes = ltrim_char_slice(bytes, b'0').0;
20 
21     let mut value = Wrapped::ZERO;
22     let (len, _) = atoi::unchecked_positive(&mut value, as_cast(radix), bytes);
23 
24     // We know this is always true, since `len` is the length processed
25     // from atoi, which must be <= bytes.len().
26     (value.into_inner(), &index!(bytes[len..]))
27 }
28 
29 /// Parse the fraction portion of a positive, normal float string.
30 ///
31 /// Parse separately from the integer portion, since the small
32 /// values for each may be too small to change the integer components
33 /// representation **immediately**.
34 #[inline]
parse_fraction<'a>(radix: u32, bytes: &'a [u8]) -> (f64, &'a [u8])35 fn parse_fraction<'a>(radix: u32, bytes: &'a [u8])
36     -> (f64, &'a [u8])
37 {
38     // Ensure if there's a decimal, there are trailing values, so
39     // invalid floats like "0." lead to an error.
40     if Some(&b'.') == bytes.get(0) {
41         // We know this must be true, since we just got the first value.
42         let mut bytes = &index!(bytes[1..]);
43         let first = bytes.as_ptr();
44         let mut fraction: f64 = 0.;
45         loop {
46             // Trim leading zeros, since that never gets called with the raw parser.
47             // Since if it's after the decimal place and this increments state.curr,
48             // but not first, this is safe.
49             bytes = ltrim_char_slice(bytes, b'0').0;
50 
51             // This would get better numerical precision using Horner's method,
52             // but that would require.
53             let mut value: u64 = 0;
54             // We know this is safe, since we grab 12 digits, or the length
55             // of the buffer, whichever is smaller.
56             let buf = &index!(bytes[..bytes.len().min(12)]);
57             let (processed, _) = atoi::unchecked_positive(&mut value, radix.as_u64(), buf);
58             // We know this is safe, since atoi returns a value <= buf.len().
59             bytes = &index!(bytes[processed..]);
60             let digits = distance(first, bytes.as_ptr()).try_i32_or_max();
61 
62             // Ignore leading 0s, just not we've passed them.
63             if value != 0 {
64                 fraction += f64::iterative_pow(value as f64, radix, -digits);
65             }
66 
67             // do/while condition
68             if char_to_digit(*bytes.get(0).unwrap_or(&b'\0')).as_u32() >= radix {
69                 break;
70             }
71         }
72         // Store frac component over the parsed digits.
73         (fraction, bytes)
74     } else {
75         (0.0, bytes)
76     }
77 }
78 
79 /// Parse the mantissa from a string.
80 ///
81 /// The float string must be non-special, non-zero, and positive.
82 #[inline]
parse_mantissa<'a>(radix: u32, bytes: &'a [u8]) -> (f64, &'a [u8])83 fn parse_mantissa<'a>(radix: u32, bytes: &'a [u8])
84     -> (f64, &'a [u8])
85 {
86     let (integer, bytes) = parse_integer(radix, bytes);
87     let (fraction, bytes) = parse_fraction(radix, bytes);
88 
89     (integer + fraction, bytes)
90 }
91 
92 // PARSE
93 
94 /// Parse the mantissa and exponent from a string.
95 ///
96 /// The float string must be non-special, non-zero, and positive.
97 #[inline]
parse_float<'a>(radix: u32, bytes: &'a [u8]) -> (f64, i32, &'a [u8])98 fn parse_float<'a>(radix: u32, bytes: &'a [u8])
99     -> (f64, i32, &'a [u8])
100 {
101     let (mantissa, bytes) = parse_mantissa(radix, bytes);
102     let (exponent, bytes) = parse_exponent(radix, bytes);
103 
104     (mantissa, exponent, bytes)
105 }
106 
107 // ATOF/ATOD
108 
109 /// Parse 32-bit float from string.
110 ///
111 /// The float string must be non-special, non-zero, and positive.
112 #[inline]
atof(radix: u32, bytes: &[u8], sign: Sign) -> (f32, usize)113 pub(crate) fn atof(radix: u32, bytes: &[u8], sign: Sign)
114     -> (f32, usize)
115 {
116     let (value, len) = atod(radix, bytes, sign);
117     (value as f32, len)
118 }
119 
120 /// Parse 64-bit float from string.
121 ///
122 /// The float string must be non-special, non-zero, and positive.
123 #[inline]
atod(radix: u32, bytes: &[u8], _: Sign) -> (f64, usize)124 pub(crate) fn atod(radix: u32, bytes: &[u8], _: Sign)
125     -> (f64, usize)
126 {
127     let (mut value, exponent, slc) = parse_float(radix, bytes);
128     if exponent != 0 && value != 0.0 {
129         value = value.iterative_pow(radix, exponent);
130     }
131     (value, bytes.len() - slc.len())
132 }
133 
134 #[inline]
atof_lossy(radix: u32, bytes: &[u8], sign: Sign) -> (f32, usize)135 pub(crate) fn atof_lossy(radix: u32, bytes: &[u8], sign: Sign)
136     -> (f32, usize)
137 {
138     atof(radix, bytes, sign)
139 }
140 
141 #[inline]
atod_lossy(radix: u32, bytes: &[u8], sign: Sign) -> (f64, usize)142 pub(crate) fn atod_lossy(radix: u32, bytes: &[u8], sign: Sign)
143     -> (f64, usize)
144 {
145     atod(radix, bytes, sign)
146 }
147 
148 // TESTS
149 // -----
150 
151 #[cfg(test)]
152 mod tests {
153     use super::*;
154 
check_parse_integer(radix: u32, s: &str, tup: (f64, usize))155     fn check_parse_integer(radix: u32, s: &str, tup: (f64, usize)) {
156         let (value, slc) = parse_integer(radix, s.as_bytes());
157         assert_eq!(value, tup.0);
158         assert_eq!(s.len() - slc.len(), tup.1);
159     }
160 
161     #[test]
parse_integer_test()162     fn parse_integer_test() {
163         check_parse_integer(10, "1.2345", (1.0, 1));
164         check_parse_integer(10, "12.345", (12.0, 2));
165         check_parse_integer(10, "12345.6789", (12345.0, 5));
166     }
167 
check_parse_fraction(radix: u32, s: &str, tup: (f64, usize))168     fn check_parse_fraction(radix: u32, s: &str, tup: (f64, usize)) {
169         let (value, slc) = parse_fraction(radix, s.as_bytes());
170         assert_eq!(value, tup.0);
171         assert_eq!(s.len() - slc.len(), tup.1);
172     }
173 
174     #[test]
parse_fraction_test()175     fn parse_fraction_test() {
176         check_parse_fraction(10, ".2345", (0.2345, 5));
177         check_parse_fraction(10, ".345", (0.345, 4));
178         check_parse_fraction(10, ".6789", (0.6789, 5));
179     }
180 
check_parse_mantissa(radix: u32, s: &str, tup: (f64, usize))181     fn check_parse_mantissa(radix: u32, s: &str, tup: (f64, usize)) {
182         let (value, slc) = parse_mantissa(radix, s.as_bytes());
183         assert_eq!(value, tup.0);
184         assert_eq!(s.len() - slc.len(), tup.1);
185     }
186 
187     #[test]
parse_mantissa_test()188     fn parse_mantissa_test() {
189         check_parse_mantissa(10, "1.2345", (1.2345, 6));
190         check_parse_mantissa(10, "12.345", (12.345, 6));
191         check_parse_mantissa(10, "12345.6789", (12345.6789, 10));
192     }
193 
check_parse_float(radix: u32, s: &str, tup: (f64, i32, usize))194     fn check_parse_float(radix: u32, s: &str, tup: (f64, i32, usize)) {
195         let (value, exponent, slc) = parse_float(radix, s.as_bytes());
196         assert_eq!(value, tup.0);
197         assert_eq!(exponent, tup.1);
198         assert_eq!(s.len() - slc.len(), tup.2);
199     }
200 
201     #[test]
parse_float_test()202     fn parse_float_test() {
203         check_parse_float(10, "1.2345", (1.2345, 0, 6));
204         check_parse_float(10, "12.345", (12.345, 0, 6));
205         check_parse_float(10, "12345.6789", (12345.6789, 0, 10));
206         check_parse_float(10, "1.2345e10", (1.2345, 10, 9));
207     }
208 
check_atof(radix: u32, s: &str, tup: (f32, usize))209     fn check_atof(radix: u32, s: &str, tup: (f32, usize)) {
210         let (value, len) = atof(radix, s.as_bytes(), Sign::Positive);
211         assert_eq!(value, tup.0);
212         assert_eq!(len, tup.1);
213     }
214 
215     #[test]
atof_test()216     fn atof_test() {
217         check_atof(10, "1.2345", (1.2345, 6));
218         check_atof(10, "12.345", (12.345, 6));
219         check_atof(10, "12345.6789", (12345.6789, 10));
220         check_atof(10, "1.2345e10", (1.2345e10, 9));
221     }
222 
check_atod(radix: u32, s: &str, tup: (f64, usize))223     fn check_atod(radix: u32, s: &str, tup: (f64, usize)) {
224         let (value, len) = atod(radix, s.as_bytes(), Sign::Positive);
225         assert_eq!(value, tup.0);
226         assert_eq!(len, tup.1);
227     }
228 
229     #[test]
atod_test()230     fn atod_test() {
231         check_atod(10, "1.2345", (1.2345, 6));
232         check_atod(10, "12.345", (12.345, 6));
233         check_atod(10, "12345.6789", (12345.6789, 10));
234         check_atod(10, "1.2345e10", (1.2345e10, 9));
235     }
236 
237     // Lossy
238     // Just a synonym for the regular overloads, since we're not using the
239     // correct feature. Use the same tests.
240 
check_atof_lossy(radix: u32, s: &str, tup: (f32, usize))241     fn check_atof_lossy(radix: u32, s: &str, tup: (f32, usize)) {
242         let (value, len) = atof_lossy(radix, s.as_bytes(), Sign::Positive);
243         assert_f32_eq!(value, tup.0);
244         assert_eq!(len, tup.1);
245     }
246 
247     #[test]
atof_lossy_test()248     fn atof_lossy_test() {
249         check_atof_lossy(10, "1.2345", (1.2345, 6));
250         check_atof_lossy(10, "12.345", (12.345, 6));
251         check_atof_lossy(10, "12345.6789", (12345.6789, 10));
252         check_atof_lossy(10, "1.2345e10", (1.2345e10, 9));
253     }
254 
check_atod_lossy(radix: u32, s: &str, tup: (f64, usize))255     fn check_atod_lossy(radix: u32, s: &str, tup: (f64, usize)) {
256         let (value, len) = atod_lossy(radix, s.as_bytes(), Sign::Positive);
257         assert_f64_eq!(value, tup.0);
258         assert_eq!(len, tup.1);
259     }
260 
261     #[test]
atod_lossy_test()262     fn atod_lossy_test() {
263         check_atod_lossy(10, "1.2345", (1.2345, 6));
264         check_atod_lossy(10, "12.345", (12.345, 6));
265         check_atod_lossy(10, "12345.6789", (12345.6789, 10));
266         check_atod_lossy(10, "1.2345e10", (1.2345e10, 9));
267     }
268 }
269