1 use std::{
2     char::from_u32 as char_from_u32,
3     fmt::{Display, Formatter, Result as FmtResult},
4     str::{from_utf8, from_utf8_unchecked, FromStr},
5 };
6 
7 use crate::{
8     error::{Error, ErrorCode, Result},
9     extensions::Extensions,
10 };
11 
12 // We have the following char categories.
13 const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
14 const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-]
15 const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
16 const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
17 const WHITESPACE_CHAR: u8 = 1 << 4; // [\n\t\r ]
18 
19 // We encode each char as belonging to some number of these categories.
20 const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR; // [0-9]
21 const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [ABCDFabcdf]
22 const UNDER: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [_]
23 const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [Ee]
24 const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [G-Zg-z]
25 const PUNCT: u8 = FLOAT_CHAR; // [\.+-]
26 const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
27 const _____: u8 = 0; // everything else
28 
29 // Table of encodings, for fast predicates. (Non-ASCII and special chars are
30 // shown with '·' in the comment.)
31 #[rustfmt::skip]
32 const ENCODINGS: [u8; 256] = [
33 /*                     0      1      2      3      4      5      6      7      8      9    */
34 /*   0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
35 /*  10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
36 /*  20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
37 /*  30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
38 /*  40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
39 /*  50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
40 /*  60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
41 /*  70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
42 /*  80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
43 /*  90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
44 /* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
45 /* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
46 /* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
47 /* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
48 /* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
49 /* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
50 /* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
51 /* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
52 /* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
53 /* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
54 /* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
55 /* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
56 /* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
57 /* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
58 /* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
59 /* 250+: ·········· */ _____, _____, _____, _____, _____, _____
60 ];
61 
is_int_char(c: u8) -> bool62 const fn is_int_char(c: u8) -> bool {
63     ENCODINGS[c as usize] & INT_CHAR != 0
64 }
65 
is_float_char(c: u8) -> bool66 const fn is_float_char(c: u8) -> bool {
67     ENCODINGS[c as usize] & FLOAT_CHAR != 0
68 }
69 
is_ident_first_char(c: u8) -> bool70 const fn is_ident_first_char(c: u8) -> bool {
71     ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
72 }
73 
is_ident_other_char(c: u8) -> bool74 const fn is_ident_other_char(c: u8) -> bool {
75     ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
76 }
77 
is_whitespace_char(c: u8) -> bool78 const fn is_whitespace_char(c: u8) -> bool {
79     ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
80 }
81 
82 #[derive(Clone, Debug, PartialEq)]
83 pub enum AnyNum {
84     F32(f32),
85     F64(f64),
86     I8(i8),
87     U8(u8),
88     I16(i16),
89     U16(u16),
90     I32(i32),
91     U32(u32),
92     I64(i64),
93     U64(u64),
94     I128(i128),
95     U128(u128),
96 }
97 
98 #[derive(Clone, Copy, Debug)]
99 pub struct Bytes<'a> {
100     /// Bits set according to `Extension` enum.
101     pub exts: Extensions,
102     bytes: &'a [u8],
103     column: usize,
104     line: usize,
105 }
106 
107 impl<'a> Bytes<'a> {
new(bytes: &'a [u8]) -> Result<Self>108     pub fn new(bytes: &'a [u8]) -> Result<Self> {
109         let mut b = Bytes {
110             bytes,
111             column: 1,
112             exts: Extensions::empty(),
113             line: 1,
114         };
115 
116         b.skip_ws()?;
117         // Loop over all extensions attributes
118         loop {
119             let attribute = b.extensions()?;
120 
121             if attribute.is_empty() {
122                 break;
123             }
124 
125             b.exts |= attribute;
126             b.skip_ws()?;
127         }
128 
129         Ok(b)
130     }
131 
advance(&mut self, bytes: usize) -> Result<()>132     pub fn advance(&mut self, bytes: usize) -> Result<()> {
133         for _ in 0..bytes {
134             self.advance_single()?;
135         }
136 
137         Ok(())
138     }
139 
advance_single(&mut self) -> Result<()>140     pub fn advance_single(&mut self) -> Result<()> {
141         if self.peek_or_eof()? == b'\n' {
142             self.line += 1;
143             self.column = 1;
144         } else {
145             self.column += 1;
146         }
147 
148         self.bytes = &self.bytes[1..];
149 
150         Ok(())
151     }
152 
any_integer<T: Num>(&mut self, sign: i8) -> Result<T>153     fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
154         let base = if self.peek() == Some(b'0') {
155             match self.bytes.get(1).cloned() {
156                 Some(b'x') => 16,
157                 Some(b'b') => 2,
158                 Some(b'o') => 8,
159                 _ => 10,
160             }
161         } else {
162             10
163         };
164 
165         if base != 10 {
166             // If we have `0x45A` for example,
167             // cut it to `45A`.
168             let _ = self.advance(2);
169         }
170 
171         let num_bytes = self.next_bytes_contained_in(is_int_char);
172 
173         if num_bytes == 0 {
174             return self.err(ErrorCode::ExpectedInteger);
175         }
176 
177         let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
178 
179         if s.as_bytes()[0] == b'_' {
180             return self.err(ErrorCode::UnderscoreAtBeginning);
181         }
182 
183         fn calc_num<T: Num>(
184             bytes: &Bytes,
185             s: &str,
186             base: u8,
187             mut f: impl FnMut(&mut T, u8) -> bool,
188         ) -> Result<T> {
189             let mut num_acc = T::from_u8(0);
190 
191             for &byte in s.as_bytes() {
192                 if byte == b'_' {
193                     continue;
194                 }
195 
196                 if num_acc.checked_mul_ext(base) {
197                     return bytes.err(ErrorCode::IntegerOutOfBounds);
198                 }
199 
200                 let digit = bytes.decode_hex(byte)?;
201 
202                 if digit >= base {
203                     return bytes.err(ErrorCode::ExpectedInteger);
204                 }
205 
206                 if f(&mut num_acc, digit) {
207                     return bytes.err(ErrorCode::IntegerOutOfBounds);
208                 }
209             }
210 
211             Ok(num_acc)
212         };
213 
214         let res = if sign > 0 {
215             calc_num(&*self, s, base, T::checked_add_ext)
216         } else {
217             calc_num(&*self, s, base, T::checked_sub_ext)
218         };
219 
220         let _ = self.advance(num_bytes);
221 
222         res
223     }
224 
any_num(&mut self) -> Result<AnyNum>225     pub fn any_num(&mut self) -> Result<AnyNum> {
226         // We are not doing float comparisons here in the traditional sense.
227         // Instead, this code checks if a f64 fits inside an f32.
228         #[allow(clippy::float_cmp)]
229         fn any_float(f: f64) -> Result<AnyNum> {
230             if f == f as f32 as f64 {
231                 Ok(AnyNum::F32(f as f32))
232             } else {
233                 Ok(AnyNum::F64(f))
234             }
235         }
236 
237         let bytes_backup = self.bytes;
238 
239         let first_byte = self.peek_or_eof()?;
240         let is_signed = first_byte == b'-' || first_byte == b'+';
241         let is_float = self.next_bytes_is_float();
242 
243         if is_float {
244             let f = self.float::<f64>()?;
245 
246             any_float(f)
247         } else {
248             let max_u8 = std::u8::MAX as u128;
249             let max_u16 = std::u16::MAX as u128;
250             let max_u32 = std::u32::MAX as u128;
251             let max_u64 = std::u64::MAX as u128;
252 
253             let min_i8 = std::i8::MIN as i128;
254             let max_i8 = std::i8::MAX as i128;
255             let min_i16 = std::i16::MIN as i128;
256             let max_i16 = std::i16::MAX as i128;
257             let min_i32 = std::i32::MIN as i128;
258             let max_i32 = std::i32::MAX as i128;
259             let min_i64 = std::i64::MIN as i128;
260             let max_i64 = std::i64::MAX as i128;
261 
262             if is_signed {
263                 match self.signed_integer::<i128>() {
264                     Ok(x) => {
265                         if x >= min_i8 && x <= max_i8 {
266                             Ok(AnyNum::I8(x as i8))
267                         } else if x >= min_i16 && x <= max_i16 {
268                             Ok(AnyNum::I16(x as i16))
269                         } else if x >= min_i32 && x <= max_i32 {
270                             Ok(AnyNum::I32(x as i32))
271                         } else if x >= min_i64 && x <= max_i64 {
272                             Ok(AnyNum::I64(x as i64))
273                         } else {
274                             Ok(AnyNum::I128(x))
275                         }
276                     }
277                     Err(_) => {
278                         self.bytes = bytes_backup;
279 
280                         any_float(self.float::<f64>()?)
281                     }
282                 }
283             } else {
284                 match self.unsigned_integer::<u128>() {
285                     Ok(x) => {
286                         if x <= max_u8 {
287                             Ok(AnyNum::U8(x as u8))
288                         } else if x <= max_u16 {
289                             Ok(AnyNum::U16(x as u16))
290                         } else if x <= max_u32 {
291                             Ok(AnyNum::U32(x as u32))
292                         } else if x <= max_u64 {
293                             Ok(AnyNum::U64(x as u64))
294                         } else {
295                             Ok(AnyNum::U128(x))
296                         }
297                     }
298                     Err(_) => {
299                         self.bytes = bytes_backup;
300 
301                         any_float(self.float::<f64>()?)
302                     }
303                 }
304             }
305         }
306     }
307 
bool(&mut self) -> Result<bool>308     pub fn bool(&mut self) -> Result<bool> {
309         if self.consume("true") {
310             Ok(true)
311         } else if self.consume("false") {
312             Ok(false)
313         } else {
314             self.err(ErrorCode::ExpectedBoolean)
315         }
316     }
317 
bytes(&self) -> &[u8]318     pub fn bytes(&self) -> &[u8] {
319         &self.bytes
320     }
321 
char(&mut self) -> Result<char>322     pub fn char(&mut self) -> Result<char> {
323         if !self.consume("'") {
324             return self.err(ErrorCode::ExpectedChar);
325         }
326 
327         let c = self.peek_or_eof()?;
328 
329         let c = if c == b'\\' {
330             let _ = self.advance(1);
331 
332             self.parse_escape()?
333         } else {
334             // Check where the end of the char (') is and try to
335             // interpret the rest as UTF-8
336 
337             let max = self.bytes.len().min(5);
338             let pos: usize = self.bytes[..max]
339                 .iter()
340                 .position(|&x| x == b'\'')
341                 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
342             let s = from_utf8(&self.bytes[0..pos]).map_err(|e| self.error(e.into()))?;
343             let mut chars = s.chars();
344 
345             let first = chars
346                 .next()
347                 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
348             if chars.next().is_some() {
349                 return self.err(ErrorCode::ExpectedChar);
350             }
351 
352             let _ = self.advance(pos);
353 
354             first
355         };
356 
357         if !self.consume("'") {
358             return self.err(ErrorCode::ExpectedChar);
359         }
360 
361         Ok(c)
362     }
363 
comma(&mut self) -> Result<bool>364     pub fn comma(&mut self) -> Result<bool> {
365         self.skip_ws()?;
366 
367         if self.consume(",") {
368             self.skip_ws()?;
369 
370             Ok(true)
371         } else {
372             Ok(false)
373         }
374     }
375 
376     /// Only returns true if the char after `ident` cannot belong
377     /// to an identifier.
check_ident(&mut self, ident: &str) -> bool378     pub fn check_ident(&mut self, ident: &str) -> bool {
379         self.test_for(ident) && !self.check_ident_other_char(ident.len())
380     }
381 
check_ident_other_char(&self, index: usize) -> bool382     fn check_ident_other_char(&self, index: usize) -> bool {
383         self.bytes
384             .get(index)
385             .map_or(false, |&b| is_ident_other_char(b))
386     }
387 
388     /// Should only be used on a working copy
check_tuple_struct(mut self) -> Result<bool>389     pub fn check_tuple_struct(mut self) -> Result<bool> {
390         if self.identifier().is_err() {
391             // if there's no field ident, this is a tuple struct
392             return Ok(true);
393         }
394 
395         self.skip_ws()?;
396 
397         // if there is no colon after the ident, this can only be a unit struct
398         self.eat_byte().map(|c| c != b':')
399     }
400 
401     /// Only returns true if the char after `ident` cannot belong
402     /// to an identifier.
consume_ident(&mut self, ident: &str) -> bool403     pub fn consume_ident(&mut self, ident: &str) -> bool {
404         if self.check_ident(ident) {
405             let _ = self.advance(ident.len());
406 
407             true
408         } else {
409             false
410         }
411     }
412 
consume(&mut self, s: &str) -> bool413     pub fn consume(&mut self, s: &str) -> bool {
414         if self.test_for(s) {
415             let _ = self.advance(s.len());
416 
417             true
418         } else {
419             false
420         }
421     }
422 
consume_all(&mut self, all: &[&str]) -> Result<bool>423     fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
424         all.iter()
425             .map(|elem| {
426                 if self.consume(elem) {
427                     self.skip_ws()?;
428 
429                     Ok(true)
430                 } else {
431                     Ok(false)
432                 }
433             })
434             .fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
435     }
436 
eat_byte(&mut self) -> Result<u8>437     pub fn eat_byte(&mut self) -> Result<u8> {
438         let peek = self.peek_or_eof()?;
439         let _ = self.advance_single();
440 
441         Ok(peek)
442     }
443 
err<T>(&self, kind: ErrorCode) -> Result<T>444     pub fn err<T>(&self, kind: ErrorCode) -> Result<T> {
445         Err(self.error(kind))
446     }
447 
error(&self, kind: ErrorCode) -> Error448     pub fn error(&self, kind: ErrorCode) -> Error {
449         Error {
450             code: kind,
451             position: Position {
452                 line: self.line,
453                 col: self.column,
454             },
455         }
456     }
457 
expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()>458     pub fn expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()> {
459         self.eat_byte()
460             .and_then(|b| if b == byte { Ok(()) } else { self.err(error) })
461     }
462 
463     /// Returns the extensions bit mask.
extensions(&mut self) -> Result<Extensions>464     fn extensions(&mut self) -> Result<Extensions> {
465         if self.peek() != Some(b'#') {
466             return Ok(Extensions::empty());
467         }
468 
469         if !self.consume_all(&["#", "!", "[", "enable", "("])? {
470             return self.err(ErrorCode::ExpectedAttribute);
471         }
472 
473         self.skip_ws()?;
474         let mut extensions = Extensions::empty();
475 
476         loop {
477             let ident = self.identifier()?;
478             let extension = Extensions::from_ident(ident).ok_or_else(|| {
479                 self.error(ErrorCode::NoSuchExtension(
480                     from_utf8(ident).unwrap().to_owned(),
481                 ))
482             })?;
483 
484             extensions |= extension;
485 
486             let comma = self.comma()?;
487 
488             // If we have no comma but another item, return an error
489             if !comma && self.check_ident_other_char(0) {
490                 return self.err(ErrorCode::ExpectedComma);
491             }
492 
493             // If there's no comma, assume the list ended.
494             // If there is, it might be a trailing one, thus we only
495             // continue the loop if we get an ident char.
496             if !comma || !self.check_ident_other_char(0) {
497                 break;
498             }
499         }
500 
501         self.skip_ws()?;
502 
503         if self.consume_all(&[")", "]"])? {
504             Ok(extensions)
505         } else {
506             Err(self.error(ErrorCode::ExpectedAttributeEnd))
507         }
508     }
509 
float<T>(&mut self) -> Result<T> where T: FromStr,510     pub fn float<T>(&mut self) -> Result<T>
511     where
512         T: FromStr,
513     {
514         for literal in &["inf", "-inf", "NaN"] {
515             if self.consume_ident(literal) {
516                 return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
517             }
518         }
519 
520         let num_bytes = self.next_bytes_contained_in(is_float_char);
521 
522         let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
523         let res = FromStr::from_str(s).map_err(|_| self.error(ErrorCode::ExpectedFloat));
524 
525         let _ = self.advance(num_bytes);
526 
527         res
528     }
529 
identifier(&mut self) -> Result<&'a [u8]>530     pub fn identifier(&mut self) -> Result<&'a [u8]> {
531         let bytes = self.identifier_len()?;
532         let ident = &self.bytes[..bytes];
533         let _ = self.advance(bytes);
534 
535         Ok(ident)
536     }
537 
identifier_len(&self) -> Result<usize>538     pub fn identifier_len(&self) -> Result<usize> {
539         let next = self.peek_or_eof()?;
540         if is_ident_first_char(next) {
541             // If the next two bytes signify the start of a raw string literal,
542             // return an error.
543             if next == b'r' {
544                 let second = self
545                     .bytes
546                     .get(1)
547                     .ok_or_else(|| self.error(ErrorCode::Eof))?;
548                 if *second == b'"' || *second == b'#' {
549                     return self.err(ErrorCode::ExpectedIdentifier);
550                 }
551             }
552 
553             let bytes = self.next_bytes_contained_in(is_ident_other_char);
554 
555             Ok(bytes)
556         } else {
557             self.err(ErrorCode::ExpectedIdentifier)
558         }
559     }
560 
next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize561     pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
562         self.bytes.iter().take_while(|&&b| allowed(b)).count()
563     }
564 
next_bytes_is_float(&self) -> bool565     pub fn next_bytes_is_float(&self) -> bool {
566         if let Some(byte) = self.peek() {
567             let skip = match byte {
568                 b'+' | b'-' => 1,
569                 _ => 0,
570             };
571             let flen = self
572                 .bytes
573                 .iter()
574                 .skip(skip)
575                 .take_while(|&&b| is_float_char(b))
576                 .count();
577             let ilen = self
578                 .bytes
579                 .iter()
580                 .skip(skip)
581                 .take_while(|&&b| is_int_char(b))
582                 .count();
583             flen > ilen
584         } else {
585             false
586         }
587     }
588 
skip_ws(&mut self) -> Result<()>589     pub fn skip_ws(&mut self) -> Result<()> {
590         while self.peek().map_or(false, |c| is_whitespace_char(c)) {
591             let _ = self.advance_single();
592         }
593 
594         if self.skip_comment()? {
595             self.skip_ws()?;
596         }
597 
598         Ok(())
599     }
600 
peek(&self) -> Option<u8>601     pub fn peek(&self) -> Option<u8> {
602         self.bytes.get(0).cloned()
603     }
604 
peek_or_eof(&self) -> Result<u8>605     pub fn peek_or_eof(&self) -> Result<u8> {
606         self.bytes
607             .get(0)
608             .cloned()
609             .ok_or_else(|| self.error(ErrorCode::Eof))
610     }
611 
signed_integer<T>(&mut self) -> Result<T> where T: Num,612     pub fn signed_integer<T>(&mut self) -> Result<T>
613     where
614         T: Num,
615     {
616         match self.peek_or_eof()? {
617             b'+' => {
618                 let _ = self.advance_single();
619 
620                 self.any_integer(1)
621             }
622             b'-' => {
623                 let _ = self.advance_single();
624 
625                 self.any_integer(-1)
626             }
627             _ => self.any_integer(1),
628         }
629     }
630 
string(&mut self) -> Result<ParsedStr<'a>>631     pub fn string(&mut self) -> Result<ParsedStr<'a>> {
632         if self.consume("\"") {
633             self.escaped_string()
634         } else if self.consume("r") {
635             self.raw_string()
636         } else {
637             self.err(ErrorCode::ExpectedString)
638         }
639     }
640 
escaped_string(&mut self) -> Result<ParsedStr<'a>>641     fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
642         use std::iter::repeat;
643 
644         let (i, end_or_escape) = self
645             .bytes
646             .iter()
647             .enumerate()
648             .find(|&(_, &b)| b == b'\\' || b == b'"')
649             .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
650 
651         if *end_or_escape == b'"' {
652             let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
653 
654             // Advance by the number of bytes of the string
655             // + 1 for the `"`.
656             let _ = self.advance(i + 1);
657 
658             Ok(ParsedStr::Slice(s))
659         } else {
660             let mut i = i;
661             let mut s: Vec<_> = self.bytes[..i].to_vec();
662 
663             loop {
664                 let _ = self.advance(i + 1);
665                 let character = self.parse_escape()?;
666                 match character.len_utf8() {
667                     1 => s.push(character as u8),
668                     len => {
669                         let start = s.len();
670                         s.extend(repeat(0).take(len));
671                         character.encode_utf8(&mut s[start..]);
672                     }
673                 }
674 
675                 let (new_i, end_or_escape) = self
676                     .bytes
677                     .iter()
678                     .enumerate()
679                     .find(|&(_, &b)| b == b'\\' || b == b'"')
680                     .ok_or(ErrorCode::Eof)
681                     .map_err(|e| self.error(e))?;
682 
683                 i = new_i;
684                 s.extend_from_slice(&self.bytes[..i]);
685 
686                 if *end_or_escape == b'"' {
687                     let _ = self.advance(i + 1);
688 
689                     let s = String::from_utf8(s).map_err(|e| self.error(e.into()))?;
690                     break Ok(ParsedStr::Allocated(s));
691                 }
692             }
693         }
694     }
695 
raw_string(&mut self) -> Result<ParsedStr<'a>>696     fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
697         let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
698         let hashes = &self.bytes[..num_hashes];
699         let _ = self.advance(num_hashes);
700 
701         if !self.consume("\"") {
702             return self.err(ErrorCode::ExpectedString);
703         }
704 
705         let ending = [&[b'"'], hashes].concat();
706         let i = self
707             .bytes
708             .windows(num_hashes + 1)
709             .position(|window| window == ending.as_slice())
710             .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
711 
712         let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
713 
714         // Advance by the number of bytes of the string
715         // + `num_hashes` + 1 for the `"`.
716         let _ = self.advance(i + num_hashes + 1);
717 
718         Ok(ParsedStr::Slice(s))
719     }
720 
test_for(&self, s: &str) -> bool721     fn test_for(&self, s: &str) -> bool {
722         s.bytes()
723             .enumerate()
724             .all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
725     }
726 
unsigned_integer<T: Num>(&mut self) -> Result<T>727     pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
728         self.any_integer(1)
729     }
730 
decode_ascii_escape(&mut self) -> Result<u8>731     fn decode_ascii_escape(&mut self) -> Result<u8> {
732         let mut n = 0;
733         for _ in 0..2 {
734             n <<= 4;
735             let byte = self.eat_byte()?;
736             let decoded = self.decode_hex(byte)?;
737             n |= decoded;
738         }
739 
740         Ok(n)
741     }
742 
743     #[inline]
decode_hex(&self, c: u8) -> Result<u8>744     fn decode_hex(&self, c: u8) -> Result<u8> {
745         match c {
746             c @ b'0'..=b'9' => Ok(c - b'0'),
747             c @ b'a'..=b'f' => Ok(10 + c - b'a'),
748             c @ b'A'..=b'F' => Ok(10 + c - b'A'),
749             _ => self.err(ErrorCode::InvalidEscape("Non-hex digit found")),
750         }
751     }
752 
parse_escape(&mut self) -> Result<char>753     fn parse_escape(&mut self) -> Result<char> {
754         let c = match self.eat_byte()? {
755             b'\'' => '\'',
756             b'"' => '"',
757             b'\\' => '\\',
758             b'n' => '\n',
759             b'r' => '\r',
760             b't' => '\t',
761             b'x' => self.decode_ascii_escape()? as char,
762             b'u' => {
763                 self.expect_byte(b'{', ErrorCode::InvalidEscape("Missing {"))?;
764 
765                 let mut bytes: u32 = 0;
766                 let mut num_digits = 0;
767 
768                 while num_digits < 6 {
769                     let byte = self.peek_or_eof()?;
770 
771                     if byte == b'}' {
772                         break;
773                     } else {
774                         self.advance_single()?;
775                     }
776 
777                     let byte = self.decode_hex(byte)?;
778                     bytes <<= 4;
779                     bytes |= byte as u32;
780 
781                     num_digits += 1;
782                 }
783 
784                 if num_digits == 0 {
785                     return self.err(ErrorCode::InvalidEscape(
786                         "Expected 1-6 digits, got 0 digits",
787                     ));
788                 }
789 
790                 self.expect_byte(b'}', ErrorCode::InvalidEscape("No } at the end"))?;
791                 char_from_u32(bytes)
792                     .ok_or_else(|| self.error(ErrorCode::InvalidEscape("Not a valid char")))?
793             }
794             _ => {
795                 return self.err(ErrorCode::InvalidEscape("Unknown escape character"));
796             }
797         };
798 
799         Ok(c)
800     }
801 
skip_comment(&mut self) -> Result<bool>802     fn skip_comment(&mut self) -> Result<bool> {
803         if self.consume("/") {
804             match self.eat_byte()? {
805                 b'/' => {
806                     let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
807 
808                     let _ = self.advance(bytes);
809                 }
810                 b'*' => {
811                     let mut level = 1;
812 
813                     while level > 0 {
814                         let bytes = self
815                             .bytes
816                             .iter()
817                             .take_while(|&&b| b != b'/' && b != b'*')
818                             .count();
819 
820                         if self.bytes.is_empty() {
821                             return self.err(ErrorCode::UnclosedBlockComment);
822                         }
823 
824                         let _ = self.advance(bytes);
825 
826                         // check whether / or * and take action
827                         if self.consume("/*") {
828                             level += 1;
829                         } else if self.consume("*/") {
830                             level -= 1;
831                         } else {
832                             self.eat_byte()
833                                 .map_err(|_| self.error(ErrorCode::UnclosedBlockComment))?;
834                         }
835                     }
836                 }
837                 b => return self.err(ErrorCode::UnexpectedByte(b as char)),
838             }
839 
840             Ok(true)
841         } else {
842             Ok(false)
843         }
844     }
845 }
846 
847 pub trait Num {
from_u8(x: u8) -> Self848     fn from_u8(x: u8) -> Self;
849 
850     /// Returns `true` on overflow
checked_mul_ext(&mut self, x: u8) -> bool851     fn checked_mul_ext(&mut self, x: u8) -> bool;
852 
853     /// Returns `true` on overflow
checked_add_ext(&mut self, x: u8) -> bool854     fn checked_add_ext(&mut self, x: u8) -> bool;
855 
856     /// Returns `true` on overflow
checked_sub_ext(&mut self, x: u8) -> bool857     fn checked_sub_ext(&mut self, x: u8) -> bool;
858 }
859 
860 macro_rules! impl_num {
861     ($ty:ident) => {
862         impl Num for $ty {
863             fn from_u8(x: u8) -> Self {
864                 x as $ty
865             }
866 
867             fn checked_mul_ext(&mut self, x: u8) -> bool {
868                 match self.checked_mul(Self::from_u8(x)) {
869                     Some(n) => {
870                         *self = n;
871                         false
872                     }
873                     None => true,
874                 }
875             }
876 
877             fn checked_add_ext(&mut self, x: u8) -> bool {
878                 match self.checked_add(Self::from_u8(x)) {
879                     Some(n) => {
880                         *self = n;
881                         false
882                     }
883                     None => true,
884                 }
885             }
886 
887             fn checked_sub_ext(&mut self, x: u8) -> bool {
888                 match self.checked_sub(Self::from_u8(x)) {
889                     Some(n) => {
890                         *self = n;
891                         false
892                     }
893                     None => true,
894                 }
895             }
896         }
897     };
898     ($($tys:ident)*) => {
899         $( impl_num!($tys); )*
900     };
901 }
902 
903 impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
904 
905 #[derive(Clone, Debug)]
906 pub enum ParsedStr<'a> {
907     Allocated(String),
908     Slice(&'a str),
909 }
910 
911 #[derive(Clone, Copy, Debug, PartialEq)]
912 pub struct Position {
913     pub line: usize,
914     pub col: usize,
915 }
916 
917 impl Display for Position {
fmt(&self, f: &mut Formatter<'_>) -> FmtResult918     fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
919         write!(f, "{}:{}", self.line, self.col)
920     }
921 }
922 
923 #[cfg(test)]
924 mod tests {
925     use super::*;
926 
927     #[test]
decode_x10()928     fn decode_x10() {
929         let mut bytes = Bytes::new(b"10").unwrap();
930         assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
931     }
932 }
933