1 #![allow(clippy::identity_op)]
2 
3 use std::{
4     char::from_u32 as char_from_u32,
5     fmt::{Display, Formatter, Result as FmtResult},
6     str::{from_utf8, from_utf8_unchecked, FromStr},
7 };
8 
9 use crate::{
10     error::{Error, ErrorCode, Result},
11     extensions::Extensions,
12 };
13 
14 // We have the following char categories.
15 const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
16 const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-]
17 const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
18 const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
19 const IDENT_RAW_CHAR: u8 = 1 << 4; // [A-Za-z_0-9\.+-]
20 const WHITESPACE_CHAR: u8 = 1 << 5; // [\n\t\r ]
21 
22 // We encode each char as belonging to some number of these categories.
23 const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [0-9]
24 const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [ABCDFabcdf]
25 const UNDER: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [_]
26 const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [Ee]
27 const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [G-Zg-z]
28 const PUNCT: u8 = FLOAT_CHAR | IDENT_RAW_CHAR; // [\.+-]
29 const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
30 const _____: u8 = 0; // everything else
31 
32 // Table of encodings, for fast predicates. (Non-ASCII and special chars are
33 // shown with '·' in the comment.)
34 #[rustfmt::skip]
35 const ENCODINGS: [u8; 256] = [
36 /*                     0      1      2      3      4      5      6      7      8      9    */
37 /*   0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
38 /*  10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
39 /*  20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
40 /*  30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
41 /*  40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
42 /*  50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
43 /*  60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
44 /*  70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
45 /*  80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
46 /*  90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
47 /* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
48 /* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
49 /* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
50 /* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
51 /* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
52 /* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
53 /* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
54 /* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
55 /* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
56 /* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
57 /* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
58 /* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
59 /* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
60 /* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
61 /* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
62 /* 250+: ·········· */ _____, _____, _____, _____, _____, _____
63 ];
64 
is_int_char(c: u8) -> bool65 const fn is_int_char(c: u8) -> bool {
66     ENCODINGS[c as usize] & INT_CHAR != 0
67 }
68 
is_float_char(c: u8) -> bool69 const fn is_float_char(c: u8) -> bool {
70     ENCODINGS[c as usize] & FLOAT_CHAR != 0
71 }
72 
is_ident_first_char(c: u8) -> bool73 pub const fn is_ident_first_char(c: u8) -> bool {
74     ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
75 }
76 
is_ident_other_char(c: u8) -> bool77 pub const fn is_ident_other_char(c: u8) -> bool {
78     ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
79 }
80 
is_ident_raw_char(c: u8) -> bool81 const fn is_ident_raw_char(c: u8) -> bool {
82     ENCODINGS[c as usize] & IDENT_RAW_CHAR != 0
83 }
84 
is_whitespace_char(c: u8) -> bool85 const fn is_whitespace_char(c: u8) -> bool {
86     ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
87 }
88 
89 #[derive(Clone, Debug, PartialEq)]
90 pub enum AnyNum {
91     F32(f32),
92     F64(f64),
93     I8(i8),
94     U8(u8),
95     I16(i16),
96     U16(u16),
97     I32(i32),
98     U32(u32),
99     I64(i64),
100     U64(u64),
101     I128(i128),
102     U128(u128),
103 }
104 
105 #[derive(Clone, Copy, Debug)]
106 pub struct Bytes<'a> {
107     /// Bits set according to `Extension` enum.
108     pub exts: Extensions,
109     bytes: &'a [u8],
110     column: usize,
111     line: usize,
112 }
113 
114 impl<'a> Bytes<'a> {
new(bytes: &'a [u8]) -> Result<Self>115     pub fn new(bytes: &'a [u8]) -> Result<Self> {
116         let mut b = Bytes {
117             bytes,
118             column: 1,
119             exts: Extensions::empty(),
120             line: 1,
121         };
122 
123         b.skip_ws()?;
124         // Loop over all extensions attributes
125         loop {
126             let attribute = b.extensions()?;
127 
128             if attribute.is_empty() {
129                 break;
130             }
131 
132             b.exts |= attribute;
133             b.skip_ws()?;
134         }
135 
136         Ok(b)
137     }
138 
advance(&mut self, bytes: usize) -> Result<()>139     pub fn advance(&mut self, bytes: usize) -> Result<()> {
140         for _ in 0..bytes {
141             self.advance_single()?;
142         }
143 
144         Ok(())
145     }
146 
advance_single(&mut self) -> Result<()>147     pub fn advance_single(&mut self) -> Result<()> {
148         if self.peek_or_eof()? == b'\n' {
149             self.line += 1;
150             self.column = 1;
151         } else {
152             self.column += 1;
153         }
154 
155         self.bytes = &self.bytes[1..];
156 
157         Ok(())
158     }
159 
any_integer<T: Num>(&mut self, sign: i8) -> Result<T>160     fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
161         let base = if self.peek() == Some(b'0') {
162             match self.bytes.get(1).cloned() {
163                 Some(b'x') => 16,
164                 Some(b'b') => 2,
165                 Some(b'o') => 8,
166                 _ => 10,
167             }
168         } else {
169             10
170         };
171 
172         if base != 10 {
173             // If we have `0x45A` for example,
174             // cut it to `45A`.
175             let _ = self.advance(2);
176         }
177 
178         let num_bytes = self.next_bytes_contained_in(is_int_char);
179 
180         if num_bytes == 0 {
181             return self.err(ErrorCode::ExpectedInteger);
182         }
183 
184         let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
185 
186         if s.as_bytes()[0] == b'_' {
187             return self.err(ErrorCode::UnderscoreAtBeginning);
188         }
189 
190         fn calc_num<T: Num>(
191             bytes: &Bytes,
192             s: &str,
193             base: u8,
194             mut f: impl FnMut(&mut T, u8) -> bool,
195         ) -> Result<T> {
196             let mut num_acc = T::from_u8(0);
197 
198             for &byte in s.as_bytes() {
199                 if byte == b'_' {
200                     continue;
201                 }
202 
203                 if num_acc.checked_mul_ext(base) {
204                     return bytes.err(ErrorCode::IntegerOutOfBounds);
205                 }
206 
207                 let digit = bytes.decode_hex(byte)?;
208 
209                 if digit >= base {
210                     return bytes.err(ErrorCode::ExpectedInteger);
211                 }
212 
213                 if f(&mut num_acc, digit) {
214                     return bytes.err(ErrorCode::IntegerOutOfBounds);
215                 }
216             }
217 
218             Ok(num_acc)
219         }
220 
221         let res = if sign > 0 {
222             calc_num(&*self, s, base, T::checked_add_ext)
223         } else {
224             calc_num(&*self, s, base, T::checked_sub_ext)
225         };
226 
227         let _ = self.advance(num_bytes);
228 
229         res
230     }
231 
any_num(&mut self) -> Result<AnyNum>232     pub fn any_num(&mut self) -> Result<AnyNum> {
233         // We are not doing float comparisons here in the traditional sense.
234         // Instead, this code checks if a f64 fits inside an f32.
235         #[allow(clippy::float_cmp)]
236         fn any_float(f: f64) -> Result<AnyNum> {
237             if f == f64::from(f as f32) {
238                 Ok(AnyNum::F32(f as f32))
239             } else {
240                 Ok(AnyNum::F64(f))
241             }
242         }
243 
244         let bytes_backup = self.bytes;
245 
246         let first_byte = self.peek_or_eof()?;
247         let is_signed = first_byte == b'-' || first_byte == b'+';
248         let is_float = self.next_bytes_is_float();
249 
250         if is_float {
251             let f = self.float::<f64>()?;
252 
253             any_float(f)
254         } else {
255             let max_u8 = u128::from(std::u8::MAX);
256             let max_u16 = u128::from(std::u16::MAX);
257             let max_u32 = u128::from(std::u32::MAX);
258             let max_u64 = u128::from(std::u64::MAX);
259 
260             let min_i8 = i128::from(std::i8::MIN);
261             let max_i8 = i128::from(std::i8::MAX);
262             let min_i16 = i128::from(std::i16::MIN);
263             let max_i16 = i128::from(std::i16::MAX);
264             let min_i32 = i128::from(std::i32::MIN);
265             let max_i32 = i128::from(std::i32::MAX);
266             let min_i64 = i128::from(std::i64::MIN);
267             let max_i64 = i128::from(std::i64::MAX);
268 
269             if is_signed {
270                 match self.signed_integer::<i128>() {
271                     Ok(x) => {
272                         if x >= min_i8 && x <= max_i8 {
273                             Ok(AnyNum::I8(x as i8))
274                         } else if x >= min_i16 && x <= max_i16 {
275                             Ok(AnyNum::I16(x as i16))
276                         } else if x >= min_i32 && x <= max_i32 {
277                             Ok(AnyNum::I32(x as i32))
278                         } else if x >= min_i64 && x <= max_i64 {
279                             Ok(AnyNum::I64(x as i64))
280                         } else {
281                             Ok(AnyNum::I128(x))
282                         }
283                     }
284                     Err(_) => {
285                         self.bytes = bytes_backup;
286 
287                         any_float(self.float::<f64>()?)
288                     }
289                 }
290             } else {
291                 match self.unsigned_integer::<u128>() {
292                     Ok(x) => {
293                         if x <= max_u8 {
294                             Ok(AnyNum::U8(x as u8))
295                         } else if x <= max_u16 {
296                             Ok(AnyNum::U16(x as u16))
297                         } else if x <= max_u32 {
298                             Ok(AnyNum::U32(x as u32))
299                         } else if x <= max_u64 {
300                             Ok(AnyNum::U64(x as u64))
301                         } else {
302                             Ok(AnyNum::U128(x))
303                         }
304                     }
305                     Err(_) => {
306                         self.bytes = bytes_backup;
307 
308                         any_float(self.float::<f64>()?)
309                     }
310                 }
311             }
312         }
313     }
314 
bool(&mut self) -> Result<bool>315     pub fn bool(&mut self) -> Result<bool> {
316         if self.consume("true") {
317             Ok(true)
318         } else if self.consume("false") {
319             Ok(false)
320         } else {
321             self.err(ErrorCode::ExpectedBoolean)
322         }
323     }
324 
bytes(&self) -> &[u8]325     pub fn bytes(&self) -> &[u8] {
326         &self.bytes
327     }
328 
char(&mut self) -> Result<char>329     pub fn char(&mut self) -> Result<char> {
330         if !self.consume("'") {
331             return self.err(ErrorCode::ExpectedChar);
332         }
333 
334         let c = self.peek_or_eof()?;
335 
336         let c = if c == b'\\' {
337             let _ = self.advance(1);
338 
339             self.parse_escape()?
340         } else {
341             // Check where the end of the char (') is and try to
342             // interpret the rest as UTF-8
343 
344             let max = self.bytes.len().min(5);
345             let pos: usize = self.bytes[..max]
346                 .iter()
347                 .position(|&x| x == b'\'')
348                 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
349             let s = from_utf8(&self.bytes[0..pos]).map_err(|e| self.error(e.into()))?;
350             let mut chars = s.chars();
351 
352             let first = chars
353                 .next()
354                 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
355             if chars.next().is_some() {
356                 return self.err(ErrorCode::ExpectedChar);
357             }
358 
359             let _ = self.advance(pos);
360 
361             first
362         };
363 
364         if !self.consume("'") {
365             return self.err(ErrorCode::ExpectedChar);
366         }
367 
368         Ok(c)
369     }
370 
comma(&mut self) -> Result<bool>371     pub fn comma(&mut self) -> Result<bool> {
372         self.skip_ws()?;
373 
374         if self.consume(",") {
375             self.skip_ws()?;
376 
377             Ok(true)
378         } else {
379             Ok(false)
380         }
381     }
382 
383     /// Only returns true if the char after `ident` cannot belong
384     /// to an identifier.
check_ident(&mut self, ident: &str) -> bool385     pub fn check_ident(&mut self, ident: &str) -> bool {
386         self.test_for(ident) && !self.check_ident_other_char(ident.len())
387     }
388 
check_ident_other_char(&self, index: usize) -> bool389     fn check_ident_other_char(&self, index: usize) -> bool {
390         self.bytes
391             .get(index)
392             .map_or(false, |&b| is_ident_other_char(b))
393     }
394 
395     /// Should only be used on a working copy
check_tuple_struct(mut self) -> Result<bool>396     pub fn check_tuple_struct(mut self) -> Result<bool> {
397         if self.identifier().is_err() {
398             // if there's no field ident, this is a tuple struct
399             return Ok(true);
400         }
401 
402         self.skip_ws()?;
403 
404         // if there is no colon after the ident, this can only be a unit struct
405         self.eat_byte().map(|c| c != b':')
406     }
407 
408     /// Only returns true if the char after `ident` cannot belong
409     /// to an identifier.
consume_ident(&mut self, ident: &str) -> bool410     pub fn consume_ident(&mut self, ident: &str) -> bool {
411         if self.check_ident(ident) {
412             let _ = self.advance(ident.len());
413 
414             true
415         } else {
416             false
417         }
418     }
419 
consume(&mut self, s: &str) -> bool420     pub fn consume(&mut self, s: &str) -> bool {
421         if self.test_for(s) {
422             let _ = self.advance(s.len());
423 
424             true
425         } else {
426             false
427         }
428     }
429 
consume_all(&mut self, all: &[&str]) -> Result<bool>430     fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
431         all.iter()
432             .map(|elem| {
433                 if self.consume(elem) {
434                     self.skip_ws()?;
435 
436                     Ok(true)
437                 } else {
438                     Ok(false)
439                 }
440             })
441             .fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
442     }
443 
eat_byte(&mut self) -> Result<u8>444     pub fn eat_byte(&mut self) -> Result<u8> {
445         let peek = self.peek_or_eof()?;
446         let _ = self.advance_single();
447 
448         Ok(peek)
449     }
450 
err<T>(&self, kind: ErrorCode) -> Result<T>451     pub fn err<T>(&self, kind: ErrorCode) -> Result<T> {
452         Err(self.error(kind))
453     }
454 
error(&self, kind: ErrorCode) -> Error455     pub fn error(&self, kind: ErrorCode) -> Error {
456         Error {
457             code: kind,
458             position: Position {
459                 line: self.line,
460                 col: self.column,
461             },
462         }
463     }
464 
expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()>465     pub fn expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()> {
466         self.eat_byte()
467             .and_then(|b| if b == byte { Ok(()) } else { self.err(error) })
468     }
469 
470     /// Returns the extensions bit mask.
extensions(&mut self) -> Result<Extensions>471     fn extensions(&mut self) -> Result<Extensions> {
472         if self.peek() != Some(b'#') {
473             return Ok(Extensions::empty());
474         }
475 
476         if !self.consume_all(&["#", "!", "[", "enable", "("])? {
477             return self.err(ErrorCode::ExpectedAttribute);
478         }
479 
480         self.skip_ws()?;
481         let mut extensions = Extensions::empty();
482 
483         loop {
484             let ident = self.identifier()?;
485             let extension = Extensions::from_ident(ident).ok_or_else(|| {
486                 self.error(ErrorCode::NoSuchExtension(
487                     from_utf8(ident).unwrap().to_owned(),
488                 ))
489             })?;
490 
491             extensions |= extension;
492 
493             let comma = self.comma()?;
494 
495             // If we have no comma but another item, return an error
496             if !comma && self.check_ident_other_char(0) {
497                 return self.err(ErrorCode::ExpectedComma);
498             }
499 
500             // If there's no comma, assume the list ended.
501             // If there is, it might be a trailing one, thus we only
502             // continue the loop if we get an ident char.
503             if !comma || !self.check_ident_other_char(0) {
504                 break;
505             }
506         }
507 
508         self.skip_ws()?;
509 
510         if self.consume_all(&[")", "]"])? {
511             Ok(extensions)
512         } else {
513             Err(self.error(ErrorCode::ExpectedAttributeEnd))
514         }
515     }
516 
float<T>(&mut self) -> Result<T> where T: FromStr,517     pub fn float<T>(&mut self) -> Result<T>
518     where
519         T: FromStr,
520     {
521         for literal in &["inf", "-inf", "NaN"] {
522             if self.consume_ident(literal) {
523                 return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
524             }
525         }
526 
527         let num_bytes = self.next_bytes_contained_in(is_float_char);
528 
529         let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
530         let res = FromStr::from_str(s).map_err(|_| self.error(ErrorCode::ExpectedFloat));
531 
532         let _ = self.advance(num_bytes);
533 
534         res
535     }
536 
identifier(&mut self) -> Result<&'a [u8]>537     pub fn identifier(&mut self) -> Result<&'a [u8]> {
538         let next = self.peek_or_eof()?;
539         if !is_ident_first_char(next) {
540             return self.err(ErrorCode::ExpectedIdentifier);
541         }
542 
543         // If the next two bytes signify the start of a raw string literal,
544         // return an error.
545         let length = if next == b'r' {
546             match self
547                 .bytes
548                 .get(1)
549                 .ok_or_else(|| self.error(ErrorCode::Eof))?
550             {
551                 b'"' => return self.err(ErrorCode::ExpectedIdentifier),
552                 b'#' => {
553                     let after_next = self.bytes.get(2).cloned().unwrap_or_default();
554                     //Note: it's important to check this before advancing forward, so that
555                     // the value-type deserializer can fall back to parsing it differently.
556                     if !is_ident_raw_char(after_next) {
557                         return self.err(ErrorCode::ExpectedIdentifier);
558                     }
559                     // skip "r#"
560                     let _ = self.advance(2);
561                     self.next_bytes_contained_in(is_ident_raw_char)
562                 }
563                 _ => self.next_bytes_contained_in(is_ident_other_char),
564             }
565         } else {
566             self.next_bytes_contained_in(is_ident_other_char)
567         };
568 
569         let ident = &self.bytes[..length];
570         let _ = self.advance(length);
571 
572         Ok(ident)
573     }
574 
next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize575     pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
576         self.bytes.iter().take_while(|&&b| allowed(b)).count()
577     }
578 
next_bytes_is_float(&self) -> bool579     pub fn next_bytes_is_float(&self) -> bool {
580         if let Some(byte) = self.peek() {
581             let skip = match byte {
582                 b'+' | b'-' => 1,
583                 _ => 0,
584             };
585             let flen = self
586                 .bytes
587                 .iter()
588                 .skip(skip)
589                 .take_while(|&&b| is_float_char(b))
590                 .count();
591             let ilen = self
592                 .bytes
593                 .iter()
594                 .skip(skip)
595                 .take_while(|&&b| is_int_char(b))
596                 .count();
597             flen > ilen
598         } else {
599             false
600         }
601     }
602 
skip_ws(&mut self) -> Result<()>603     pub fn skip_ws(&mut self) -> Result<()> {
604         while self.peek().map_or(false, is_whitespace_char) {
605             let _ = self.advance_single();
606         }
607 
608         if self.skip_comment()? {
609             self.skip_ws()?;
610         }
611 
612         Ok(())
613     }
614 
peek(&self) -> Option<u8>615     pub fn peek(&self) -> Option<u8> {
616         self.bytes.get(0).cloned()
617     }
618 
peek_or_eof(&self) -> Result<u8>619     pub fn peek_or_eof(&self) -> Result<u8> {
620         self.bytes
621             .get(0)
622             .cloned()
623             .ok_or_else(|| self.error(ErrorCode::Eof))
624     }
625 
signed_integer<T>(&mut self) -> Result<T> where T: Num,626     pub fn signed_integer<T>(&mut self) -> Result<T>
627     where
628         T: Num,
629     {
630         match self.peek_or_eof()? {
631             b'+' => {
632                 let _ = self.advance_single();
633 
634                 self.any_integer(1)
635             }
636             b'-' => {
637                 let _ = self.advance_single();
638 
639                 self.any_integer(-1)
640             }
641             _ => self.any_integer(1),
642         }
643     }
644 
string(&mut self) -> Result<ParsedStr<'a>>645     pub fn string(&mut self) -> Result<ParsedStr<'a>> {
646         if self.consume("\"") {
647             self.escaped_string()
648         } else if self.consume("r") {
649             self.raw_string()
650         } else {
651             self.err(ErrorCode::ExpectedString)
652         }
653     }
654 
escaped_string(&mut self) -> Result<ParsedStr<'a>>655     fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
656         use std::iter::repeat;
657 
658         let (i, end_or_escape) = self
659             .bytes
660             .iter()
661             .enumerate()
662             .find(|&(_, &b)| b == b'\\' || b == b'"')
663             .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
664 
665         if *end_or_escape == b'"' {
666             let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
667 
668             // Advance by the number of bytes of the string
669             // + 1 for the `"`.
670             let _ = self.advance(i + 1);
671 
672             Ok(ParsedStr::Slice(s))
673         } else {
674             let mut i = i;
675             let mut s: Vec<_> = self.bytes[..i].to_vec();
676 
677             loop {
678                 let _ = self.advance(i + 1);
679                 let character = self.parse_escape()?;
680                 match character.len_utf8() {
681                     1 => s.push(character as u8),
682                     len => {
683                         let start = s.len();
684                         s.extend(repeat(0).take(len));
685                         character.encode_utf8(&mut s[start..]);
686                     }
687                 }
688 
689                 let (new_i, end_or_escape) = self
690                     .bytes
691                     .iter()
692                     .enumerate()
693                     .find(|&(_, &b)| b == b'\\' || b == b'"')
694                     .ok_or(ErrorCode::Eof)
695                     .map_err(|e| self.error(e))?;
696 
697                 i = new_i;
698                 s.extend_from_slice(&self.bytes[..i]);
699 
700                 if *end_or_escape == b'"' {
701                     let _ = self.advance(i + 1);
702 
703                     let s = String::from_utf8(s).map_err(|e| self.error(e.into()))?;
704                     break Ok(ParsedStr::Allocated(s));
705                 }
706             }
707         }
708     }
709 
raw_string(&mut self) -> Result<ParsedStr<'a>>710     fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
711         let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
712         let hashes = &self.bytes[..num_hashes];
713         let _ = self.advance(num_hashes);
714 
715         if !self.consume("\"") {
716             return self.err(ErrorCode::ExpectedString);
717         }
718 
719         let ending = [&[b'"'], hashes].concat();
720         let i = self
721             .bytes
722             .windows(num_hashes + 1)
723             .position(|window| window == ending.as_slice())
724             .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
725 
726         let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
727 
728         // Advance by the number of bytes of the string
729         // + `num_hashes` + 1 for the `"`.
730         let _ = self.advance(i + num_hashes + 1);
731 
732         Ok(ParsedStr::Slice(s))
733     }
734 
test_for(&self, s: &str) -> bool735     fn test_for(&self, s: &str) -> bool {
736         s.bytes()
737             .enumerate()
738             .all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
739     }
740 
unsigned_integer<T: Num>(&mut self) -> Result<T>741     pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
742         self.any_integer(1)
743     }
744 
decode_ascii_escape(&mut self) -> Result<u8>745     fn decode_ascii_escape(&mut self) -> Result<u8> {
746         let mut n = 0;
747         for _ in 0..2 {
748             n <<= 4;
749             let byte = self.eat_byte()?;
750             let decoded = self.decode_hex(byte)?;
751             n |= decoded;
752         }
753 
754         Ok(n)
755     }
756 
757     #[inline]
decode_hex(&self, c: u8) -> Result<u8>758     fn decode_hex(&self, c: u8) -> Result<u8> {
759         match c {
760             c @ b'0'..=b'9' => Ok(c - b'0'),
761             c @ b'a'..=b'f' => Ok(10 + c - b'a'),
762             c @ b'A'..=b'F' => Ok(10 + c - b'A'),
763             _ => self.err(ErrorCode::InvalidEscape("Non-hex digit found")),
764         }
765     }
766 
parse_escape(&mut self) -> Result<char>767     fn parse_escape(&mut self) -> Result<char> {
768         let c = match self.eat_byte()? {
769             b'\'' => '\'',
770             b'"' => '"',
771             b'\\' => '\\',
772             b'n' => '\n',
773             b'r' => '\r',
774             b't' => '\t',
775             b'x' => self.decode_ascii_escape()? as char,
776             b'u' => {
777                 self.expect_byte(b'{', ErrorCode::InvalidEscape("Missing {"))?;
778 
779                 let mut bytes: u32 = 0;
780                 let mut num_digits = 0;
781 
782                 while num_digits < 6 {
783                     let byte = self.peek_or_eof()?;
784 
785                     if byte == b'}' {
786                         break;
787                     } else {
788                         self.advance_single()?;
789                     }
790 
791                     let byte = self.decode_hex(byte)?;
792                     bytes <<= 4;
793                     bytes |= u32::from(byte);
794 
795                     num_digits += 1;
796                 }
797 
798                 if num_digits == 0 {
799                     return self.err(ErrorCode::InvalidEscape(
800                         "Expected 1-6 digits, got 0 digits",
801                     ));
802                 }
803 
804                 self.expect_byte(b'}', ErrorCode::InvalidEscape("No } at the end"))?;
805                 char_from_u32(bytes)
806                     .ok_or_else(|| self.error(ErrorCode::InvalidEscape("Not a valid char")))?
807             }
808             _ => {
809                 return self.err(ErrorCode::InvalidEscape("Unknown escape character"));
810             }
811         };
812 
813         Ok(c)
814     }
815 
skip_comment(&mut self) -> Result<bool>816     fn skip_comment(&mut self) -> Result<bool> {
817         if self.consume("/") {
818             match self.eat_byte()? {
819                 b'/' => {
820                     let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
821 
822                     let _ = self.advance(bytes);
823                 }
824                 b'*' => {
825                     let mut level = 1;
826 
827                     while level > 0 {
828                         let bytes = self
829                             .bytes
830                             .iter()
831                             .take_while(|&&b| b != b'/' && b != b'*')
832                             .count();
833 
834                         if self.bytes.is_empty() {
835                             return self.err(ErrorCode::UnclosedBlockComment);
836                         }
837 
838                         let _ = self.advance(bytes);
839 
840                         // check whether / or * and take action
841                         if self.consume("/*") {
842                             level += 1;
843                         } else if self.consume("*/") {
844                             level -= 1;
845                         } else {
846                             self.eat_byte()
847                                 .map_err(|_| self.error(ErrorCode::UnclosedBlockComment))?;
848                         }
849                     }
850                 }
851                 b => return self.err(ErrorCode::UnexpectedByte(b as char)),
852             }
853 
854             Ok(true)
855         } else {
856             Ok(false)
857         }
858     }
859 }
860 
861 pub trait Num {
from_u8(x: u8) -> Self862     fn from_u8(x: u8) -> Self;
863 
864     /// Returns `true` on overflow
checked_mul_ext(&mut self, x: u8) -> bool865     fn checked_mul_ext(&mut self, x: u8) -> bool;
866 
867     /// Returns `true` on overflow
checked_add_ext(&mut self, x: u8) -> bool868     fn checked_add_ext(&mut self, x: u8) -> bool;
869 
870     /// Returns `true` on overflow
checked_sub_ext(&mut self, x: u8) -> bool871     fn checked_sub_ext(&mut self, x: u8) -> bool;
872 }
873 
874 macro_rules! impl_num {
875     ($ty:ident) => {
876         impl Num for $ty {
877             fn from_u8(x: u8) -> Self {
878                 x as $ty
879             }
880 
881             fn checked_mul_ext(&mut self, x: u8) -> bool {
882                 match self.checked_mul(Self::from_u8(x)) {
883                     Some(n) => {
884                         *self = n;
885                         false
886                     }
887                     None => true,
888                 }
889             }
890 
891             fn checked_add_ext(&mut self, x: u8) -> bool {
892                 match self.checked_add(Self::from_u8(x)) {
893                     Some(n) => {
894                         *self = n;
895                         false
896                     }
897                     None => true,
898                 }
899             }
900 
901             fn checked_sub_ext(&mut self, x: u8) -> bool {
902                 match self.checked_sub(Self::from_u8(x)) {
903                     Some(n) => {
904                         *self = n;
905                         false
906                     }
907                     None => true,
908                 }
909             }
910         }
911     };
912     ($($tys:ident)*) => {
913         $( impl_num!($tys); )*
914     };
915 }
916 
917 impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
918 
919 #[derive(Clone, Debug)]
920 pub enum ParsedStr<'a> {
921     Allocated(String),
922     Slice(&'a str),
923 }
924 
925 #[derive(Clone, Copy, Debug, PartialEq)]
926 pub struct Position {
927     pub line: usize,
928     pub col: usize,
929 }
930 
931 impl Display for Position {
fmt(&self, f: &mut Formatter<'_>) -> FmtResult932     fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
933         write!(f, "{}:{}", self.line, self.col)
934     }
935 }
936 
937 #[cfg(test)]
938 mod tests {
939     use super::*;
940 
941     #[test]
decode_x10()942     fn decode_x10() {
943         let mut bytes = Bytes::new(b"10").unwrap();
944         assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
945     }
946 }
947