1 #![allow(clippy::identity_op)]
2
3 use std::{
4 char::from_u32 as char_from_u32,
5 fmt::{Display, Formatter, Result as FmtResult},
6 str::{from_utf8, from_utf8_unchecked, FromStr},
7 };
8
9 use crate::{
10 error::{Error, ErrorCode, Result},
11 extensions::Extensions,
12 };
13
14 // We have the following char categories.
15 const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
16 const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-]
17 const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
18 const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
19 const IDENT_RAW_CHAR: u8 = 1 << 4; // [A-Za-z_0-9\.+-]
20 const WHITESPACE_CHAR: u8 = 1 << 5; // [\n\t\r ]
21
22 // We encode each char as belonging to some number of these categories.
23 const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [0-9]
24 const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [ABCDFabcdf]
25 const UNDER: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [_]
26 const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [Ee]
27 const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR | IDENT_RAW_CHAR; // [G-Zg-z]
28 const PUNCT: u8 = FLOAT_CHAR | IDENT_RAW_CHAR; // [\.+-]
29 const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
30 const _____: u8 = 0; // everything else
31
32 // Table of encodings, for fast predicates. (Non-ASCII and special chars are
33 // shown with '·' in the comment.)
34 #[rustfmt::skip]
35 const ENCODINGS: [u8; 256] = [
36 /* 0 1 2 3 4 5 6 7 8 9 */
37 /* 0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
38 /* 10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
39 /* 20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
40 /* 30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
41 /* 40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
42 /* 50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
43 /* 60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
44 /* 70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
45 /* 80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
46 /* 90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
47 /* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
48 /* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
49 /* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
50 /* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
51 /* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
52 /* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
53 /* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
54 /* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
55 /* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
56 /* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
57 /* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
58 /* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
59 /* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
60 /* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
61 /* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
62 /* 250+: ·········· */ _____, _____, _____, _____, _____, _____
63 ];
64
is_int_char(c: u8) -> bool65 const fn is_int_char(c: u8) -> bool {
66 ENCODINGS[c as usize] & INT_CHAR != 0
67 }
68
is_float_char(c: u8) -> bool69 const fn is_float_char(c: u8) -> bool {
70 ENCODINGS[c as usize] & FLOAT_CHAR != 0
71 }
72
is_ident_first_char(c: u8) -> bool73 pub const fn is_ident_first_char(c: u8) -> bool {
74 ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
75 }
76
is_ident_other_char(c: u8) -> bool77 pub const fn is_ident_other_char(c: u8) -> bool {
78 ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
79 }
80
is_ident_raw_char(c: u8) -> bool81 const fn is_ident_raw_char(c: u8) -> bool {
82 ENCODINGS[c as usize] & IDENT_RAW_CHAR != 0
83 }
84
is_whitespace_char(c: u8) -> bool85 const fn is_whitespace_char(c: u8) -> bool {
86 ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
87 }
88
89 #[derive(Clone, Debug, PartialEq)]
90 pub enum AnyNum {
91 F32(f32),
92 F64(f64),
93 I8(i8),
94 U8(u8),
95 I16(i16),
96 U16(u16),
97 I32(i32),
98 U32(u32),
99 I64(i64),
100 U64(u64),
101 I128(i128),
102 U128(u128),
103 }
104
105 #[derive(Clone, Copy, Debug)]
106 pub struct Bytes<'a> {
107 /// Bits set according to `Extension` enum.
108 pub exts: Extensions,
109 bytes: &'a [u8],
110 column: usize,
111 line: usize,
112 }
113
114 impl<'a> Bytes<'a> {
new(bytes: &'a [u8]) -> Result<Self>115 pub fn new(bytes: &'a [u8]) -> Result<Self> {
116 let mut b = Bytes {
117 bytes,
118 column: 1,
119 exts: Extensions::empty(),
120 line: 1,
121 };
122
123 b.skip_ws()?;
124 // Loop over all extensions attributes
125 loop {
126 let attribute = b.extensions()?;
127
128 if attribute.is_empty() {
129 break;
130 }
131
132 b.exts |= attribute;
133 b.skip_ws()?;
134 }
135
136 Ok(b)
137 }
138
advance(&mut self, bytes: usize) -> Result<()>139 pub fn advance(&mut self, bytes: usize) -> Result<()> {
140 for _ in 0..bytes {
141 self.advance_single()?;
142 }
143
144 Ok(())
145 }
146
advance_single(&mut self) -> Result<()>147 pub fn advance_single(&mut self) -> Result<()> {
148 if self.peek_or_eof()? == b'\n' {
149 self.line += 1;
150 self.column = 1;
151 } else {
152 self.column += 1;
153 }
154
155 self.bytes = &self.bytes[1..];
156
157 Ok(())
158 }
159
any_integer<T: Num>(&mut self, sign: i8) -> Result<T>160 fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
161 let base = if self.peek() == Some(b'0') {
162 match self.bytes.get(1).cloned() {
163 Some(b'x') => 16,
164 Some(b'b') => 2,
165 Some(b'o') => 8,
166 _ => 10,
167 }
168 } else {
169 10
170 };
171
172 if base != 10 {
173 // If we have `0x45A` for example,
174 // cut it to `45A`.
175 let _ = self.advance(2);
176 }
177
178 let num_bytes = self.next_bytes_contained_in(is_int_char);
179
180 if num_bytes == 0 {
181 return self.err(ErrorCode::ExpectedInteger);
182 }
183
184 let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
185
186 if s.as_bytes()[0] == b'_' {
187 return self.err(ErrorCode::UnderscoreAtBeginning);
188 }
189
190 fn calc_num<T: Num>(
191 bytes: &Bytes,
192 s: &str,
193 base: u8,
194 mut f: impl FnMut(&mut T, u8) -> bool,
195 ) -> Result<T> {
196 let mut num_acc = T::from_u8(0);
197
198 for &byte in s.as_bytes() {
199 if byte == b'_' {
200 continue;
201 }
202
203 if num_acc.checked_mul_ext(base) {
204 return bytes.err(ErrorCode::IntegerOutOfBounds);
205 }
206
207 let digit = bytes.decode_hex(byte)?;
208
209 if digit >= base {
210 return bytes.err(ErrorCode::ExpectedInteger);
211 }
212
213 if f(&mut num_acc, digit) {
214 return bytes.err(ErrorCode::IntegerOutOfBounds);
215 }
216 }
217
218 Ok(num_acc)
219 }
220
221 let res = if sign > 0 {
222 calc_num(&*self, s, base, T::checked_add_ext)
223 } else {
224 calc_num(&*self, s, base, T::checked_sub_ext)
225 };
226
227 let _ = self.advance(num_bytes);
228
229 res
230 }
231
any_num(&mut self) -> Result<AnyNum>232 pub fn any_num(&mut self) -> Result<AnyNum> {
233 // We are not doing float comparisons here in the traditional sense.
234 // Instead, this code checks if a f64 fits inside an f32.
235 #[allow(clippy::float_cmp)]
236 fn any_float(f: f64) -> Result<AnyNum> {
237 if f == f64::from(f as f32) {
238 Ok(AnyNum::F32(f as f32))
239 } else {
240 Ok(AnyNum::F64(f))
241 }
242 }
243
244 let bytes_backup = self.bytes;
245
246 let first_byte = self.peek_or_eof()?;
247 let is_signed = first_byte == b'-' || first_byte == b'+';
248 let is_float = self.next_bytes_is_float();
249
250 if is_float {
251 let f = self.float::<f64>()?;
252
253 any_float(f)
254 } else {
255 let max_u8 = u128::from(std::u8::MAX);
256 let max_u16 = u128::from(std::u16::MAX);
257 let max_u32 = u128::from(std::u32::MAX);
258 let max_u64 = u128::from(std::u64::MAX);
259
260 let min_i8 = i128::from(std::i8::MIN);
261 let max_i8 = i128::from(std::i8::MAX);
262 let min_i16 = i128::from(std::i16::MIN);
263 let max_i16 = i128::from(std::i16::MAX);
264 let min_i32 = i128::from(std::i32::MIN);
265 let max_i32 = i128::from(std::i32::MAX);
266 let min_i64 = i128::from(std::i64::MIN);
267 let max_i64 = i128::from(std::i64::MAX);
268
269 if is_signed {
270 match self.signed_integer::<i128>() {
271 Ok(x) => {
272 if x >= min_i8 && x <= max_i8 {
273 Ok(AnyNum::I8(x as i8))
274 } else if x >= min_i16 && x <= max_i16 {
275 Ok(AnyNum::I16(x as i16))
276 } else if x >= min_i32 && x <= max_i32 {
277 Ok(AnyNum::I32(x as i32))
278 } else if x >= min_i64 && x <= max_i64 {
279 Ok(AnyNum::I64(x as i64))
280 } else {
281 Ok(AnyNum::I128(x))
282 }
283 }
284 Err(_) => {
285 self.bytes = bytes_backup;
286
287 any_float(self.float::<f64>()?)
288 }
289 }
290 } else {
291 match self.unsigned_integer::<u128>() {
292 Ok(x) => {
293 if x <= max_u8 {
294 Ok(AnyNum::U8(x as u8))
295 } else if x <= max_u16 {
296 Ok(AnyNum::U16(x as u16))
297 } else if x <= max_u32 {
298 Ok(AnyNum::U32(x as u32))
299 } else if x <= max_u64 {
300 Ok(AnyNum::U64(x as u64))
301 } else {
302 Ok(AnyNum::U128(x))
303 }
304 }
305 Err(_) => {
306 self.bytes = bytes_backup;
307
308 any_float(self.float::<f64>()?)
309 }
310 }
311 }
312 }
313 }
314
bool(&mut self) -> Result<bool>315 pub fn bool(&mut self) -> Result<bool> {
316 if self.consume("true") {
317 Ok(true)
318 } else if self.consume("false") {
319 Ok(false)
320 } else {
321 self.err(ErrorCode::ExpectedBoolean)
322 }
323 }
324
bytes(&self) -> &[u8]325 pub fn bytes(&self) -> &[u8] {
326 &self.bytes
327 }
328
char(&mut self) -> Result<char>329 pub fn char(&mut self) -> Result<char> {
330 if !self.consume("'") {
331 return self.err(ErrorCode::ExpectedChar);
332 }
333
334 let c = self.peek_or_eof()?;
335
336 let c = if c == b'\\' {
337 let _ = self.advance(1);
338
339 self.parse_escape()?
340 } else {
341 // Check where the end of the char (') is and try to
342 // interpret the rest as UTF-8
343
344 let max = self.bytes.len().min(5);
345 let pos: usize = self.bytes[..max]
346 .iter()
347 .position(|&x| x == b'\'')
348 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
349 let s = from_utf8(&self.bytes[0..pos]).map_err(|e| self.error(e.into()))?;
350 let mut chars = s.chars();
351
352 let first = chars
353 .next()
354 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
355 if chars.next().is_some() {
356 return self.err(ErrorCode::ExpectedChar);
357 }
358
359 let _ = self.advance(pos);
360
361 first
362 };
363
364 if !self.consume("'") {
365 return self.err(ErrorCode::ExpectedChar);
366 }
367
368 Ok(c)
369 }
370
comma(&mut self) -> Result<bool>371 pub fn comma(&mut self) -> Result<bool> {
372 self.skip_ws()?;
373
374 if self.consume(",") {
375 self.skip_ws()?;
376
377 Ok(true)
378 } else {
379 Ok(false)
380 }
381 }
382
383 /// Only returns true if the char after `ident` cannot belong
384 /// to an identifier.
check_ident(&mut self, ident: &str) -> bool385 pub fn check_ident(&mut self, ident: &str) -> bool {
386 self.test_for(ident) && !self.check_ident_other_char(ident.len())
387 }
388
check_ident_other_char(&self, index: usize) -> bool389 fn check_ident_other_char(&self, index: usize) -> bool {
390 self.bytes
391 .get(index)
392 .map_or(false, |&b| is_ident_other_char(b))
393 }
394
395 /// Should only be used on a working copy
check_tuple_struct(mut self) -> Result<bool>396 pub fn check_tuple_struct(mut self) -> Result<bool> {
397 if self.identifier().is_err() {
398 // if there's no field ident, this is a tuple struct
399 return Ok(true);
400 }
401
402 self.skip_ws()?;
403
404 // if there is no colon after the ident, this can only be a unit struct
405 self.eat_byte().map(|c| c != b':')
406 }
407
408 /// Only returns true if the char after `ident` cannot belong
409 /// to an identifier.
consume_ident(&mut self, ident: &str) -> bool410 pub fn consume_ident(&mut self, ident: &str) -> bool {
411 if self.check_ident(ident) {
412 let _ = self.advance(ident.len());
413
414 true
415 } else {
416 false
417 }
418 }
419
consume(&mut self, s: &str) -> bool420 pub fn consume(&mut self, s: &str) -> bool {
421 if self.test_for(s) {
422 let _ = self.advance(s.len());
423
424 true
425 } else {
426 false
427 }
428 }
429
consume_all(&mut self, all: &[&str]) -> Result<bool>430 fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
431 all.iter()
432 .map(|elem| {
433 if self.consume(elem) {
434 self.skip_ws()?;
435
436 Ok(true)
437 } else {
438 Ok(false)
439 }
440 })
441 .fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
442 }
443
eat_byte(&mut self) -> Result<u8>444 pub fn eat_byte(&mut self) -> Result<u8> {
445 let peek = self.peek_or_eof()?;
446 let _ = self.advance_single();
447
448 Ok(peek)
449 }
450
err<T>(&self, kind: ErrorCode) -> Result<T>451 pub fn err<T>(&self, kind: ErrorCode) -> Result<T> {
452 Err(self.error(kind))
453 }
454
error(&self, kind: ErrorCode) -> Error455 pub fn error(&self, kind: ErrorCode) -> Error {
456 Error {
457 code: kind,
458 position: Position {
459 line: self.line,
460 col: self.column,
461 },
462 }
463 }
464
expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()>465 pub fn expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()> {
466 self.eat_byte()
467 .and_then(|b| if b == byte { Ok(()) } else { self.err(error) })
468 }
469
470 /// Returns the extensions bit mask.
extensions(&mut self) -> Result<Extensions>471 fn extensions(&mut self) -> Result<Extensions> {
472 if self.peek() != Some(b'#') {
473 return Ok(Extensions::empty());
474 }
475
476 if !self.consume_all(&["#", "!", "[", "enable", "("])? {
477 return self.err(ErrorCode::ExpectedAttribute);
478 }
479
480 self.skip_ws()?;
481 let mut extensions = Extensions::empty();
482
483 loop {
484 let ident = self.identifier()?;
485 let extension = Extensions::from_ident(ident).ok_or_else(|| {
486 self.error(ErrorCode::NoSuchExtension(
487 from_utf8(ident).unwrap().to_owned(),
488 ))
489 })?;
490
491 extensions |= extension;
492
493 let comma = self.comma()?;
494
495 // If we have no comma but another item, return an error
496 if !comma && self.check_ident_other_char(0) {
497 return self.err(ErrorCode::ExpectedComma);
498 }
499
500 // If there's no comma, assume the list ended.
501 // If there is, it might be a trailing one, thus we only
502 // continue the loop if we get an ident char.
503 if !comma || !self.check_ident_other_char(0) {
504 break;
505 }
506 }
507
508 self.skip_ws()?;
509
510 if self.consume_all(&[")", "]"])? {
511 Ok(extensions)
512 } else {
513 Err(self.error(ErrorCode::ExpectedAttributeEnd))
514 }
515 }
516
float<T>(&mut self) -> Result<T> where T: FromStr,517 pub fn float<T>(&mut self) -> Result<T>
518 where
519 T: FromStr,
520 {
521 for literal in &["inf", "-inf", "NaN"] {
522 if self.consume_ident(literal) {
523 return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
524 }
525 }
526
527 let num_bytes = self.next_bytes_contained_in(is_float_char);
528
529 let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
530 let res = FromStr::from_str(s).map_err(|_| self.error(ErrorCode::ExpectedFloat));
531
532 let _ = self.advance(num_bytes);
533
534 res
535 }
536
identifier(&mut self) -> Result<&'a [u8]>537 pub fn identifier(&mut self) -> Result<&'a [u8]> {
538 let next = self.peek_or_eof()?;
539 if !is_ident_first_char(next) {
540 return self.err(ErrorCode::ExpectedIdentifier);
541 }
542
543 // If the next two bytes signify the start of a raw string literal,
544 // return an error.
545 let length = if next == b'r' {
546 match self
547 .bytes
548 .get(1)
549 .ok_or_else(|| self.error(ErrorCode::Eof))?
550 {
551 b'"' => return self.err(ErrorCode::ExpectedIdentifier),
552 b'#' => {
553 let after_next = self.bytes.get(2).cloned().unwrap_or_default();
554 //Note: it's important to check this before advancing forward, so that
555 // the value-type deserializer can fall back to parsing it differently.
556 if !is_ident_raw_char(after_next) {
557 return self.err(ErrorCode::ExpectedIdentifier);
558 }
559 // skip "r#"
560 let _ = self.advance(2);
561 self.next_bytes_contained_in(is_ident_raw_char)
562 }
563 _ => self.next_bytes_contained_in(is_ident_other_char),
564 }
565 } else {
566 self.next_bytes_contained_in(is_ident_other_char)
567 };
568
569 let ident = &self.bytes[..length];
570 let _ = self.advance(length);
571
572 Ok(ident)
573 }
574
next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize575 pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
576 self.bytes.iter().take_while(|&&b| allowed(b)).count()
577 }
578
next_bytes_is_float(&self) -> bool579 pub fn next_bytes_is_float(&self) -> bool {
580 if let Some(byte) = self.peek() {
581 let skip = match byte {
582 b'+' | b'-' => 1,
583 _ => 0,
584 };
585 let flen = self
586 .bytes
587 .iter()
588 .skip(skip)
589 .take_while(|&&b| is_float_char(b))
590 .count();
591 let ilen = self
592 .bytes
593 .iter()
594 .skip(skip)
595 .take_while(|&&b| is_int_char(b))
596 .count();
597 flen > ilen
598 } else {
599 false
600 }
601 }
602
skip_ws(&mut self) -> Result<()>603 pub fn skip_ws(&mut self) -> Result<()> {
604 while self.peek().map_or(false, is_whitespace_char) {
605 let _ = self.advance_single();
606 }
607
608 if self.skip_comment()? {
609 self.skip_ws()?;
610 }
611
612 Ok(())
613 }
614
peek(&self) -> Option<u8>615 pub fn peek(&self) -> Option<u8> {
616 self.bytes.get(0).cloned()
617 }
618
peek_or_eof(&self) -> Result<u8>619 pub fn peek_or_eof(&self) -> Result<u8> {
620 self.bytes
621 .get(0)
622 .cloned()
623 .ok_or_else(|| self.error(ErrorCode::Eof))
624 }
625
signed_integer<T>(&mut self) -> Result<T> where T: Num,626 pub fn signed_integer<T>(&mut self) -> Result<T>
627 where
628 T: Num,
629 {
630 match self.peek_or_eof()? {
631 b'+' => {
632 let _ = self.advance_single();
633
634 self.any_integer(1)
635 }
636 b'-' => {
637 let _ = self.advance_single();
638
639 self.any_integer(-1)
640 }
641 _ => self.any_integer(1),
642 }
643 }
644
string(&mut self) -> Result<ParsedStr<'a>>645 pub fn string(&mut self) -> Result<ParsedStr<'a>> {
646 if self.consume("\"") {
647 self.escaped_string()
648 } else if self.consume("r") {
649 self.raw_string()
650 } else {
651 self.err(ErrorCode::ExpectedString)
652 }
653 }
654
escaped_string(&mut self) -> Result<ParsedStr<'a>>655 fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
656 use std::iter::repeat;
657
658 let (i, end_or_escape) = self
659 .bytes
660 .iter()
661 .enumerate()
662 .find(|&(_, &b)| b == b'\\' || b == b'"')
663 .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
664
665 if *end_or_escape == b'"' {
666 let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
667
668 // Advance by the number of bytes of the string
669 // + 1 for the `"`.
670 let _ = self.advance(i + 1);
671
672 Ok(ParsedStr::Slice(s))
673 } else {
674 let mut i = i;
675 let mut s: Vec<_> = self.bytes[..i].to_vec();
676
677 loop {
678 let _ = self.advance(i + 1);
679 let character = self.parse_escape()?;
680 match character.len_utf8() {
681 1 => s.push(character as u8),
682 len => {
683 let start = s.len();
684 s.extend(repeat(0).take(len));
685 character.encode_utf8(&mut s[start..]);
686 }
687 }
688
689 let (new_i, end_or_escape) = self
690 .bytes
691 .iter()
692 .enumerate()
693 .find(|&(_, &b)| b == b'\\' || b == b'"')
694 .ok_or(ErrorCode::Eof)
695 .map_err(|e| self.error(e))?;
696
697 i = new_i;
698 s.extend_from_slice(&self.bytes[..i]);
699
700 if *end_or_escape == b'"' {
701 let _ = self.advance(i + 1);
702
703 let s = String::from_utf8(s).map_err(|e| self.error(e.into()))?;
704 break Ok(ParsedStr::Allocated(s));
705 }
706 }
707 }
708 }
709
raw_string(&mut self) -> Result<ParsedStr<'a>>710 fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
711 let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
712 let hashes = &self.bytes[..num_hashes];
713 let _ = self.advance(num_hashes);
714
715 if !self.consume("\"") {
716 return self.err(ErrorCode::ExpectedString);
717 }
718
719 let ending = [&[b'"'], hashes].concat();
720 let i = self
721 .bytes
722 .windows(num_hashes + 1)
723 .position(|window| window == ending.as_slice())
724 .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
725
726 let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
727
728 // Advance by the number of bytes of the string
729 // + `num_hashes` + 1 for the `"`.
730 let _ = self.advance(i + num_hashes + 1);
731
732 Ok(ParsedStr::Slice(s))
733 }
734
test_for(&self, s: &str) -> bool735 fn test_for(&self, s: &str) -> bool {
736 s.bytes()
737 .enumerate()
738 .all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
739 }
740
unsigned_integer<T: Num>(&mut self) -> Result<T>741 pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
742 self.any_integer(1)
743 }
744
decode_ascii_escape(&mut self) -> Result<u8>745 fn decode_ascii_escape(&mut self) -> Result<u8> {
746 let mut n = 0;
747 for _ in 0..2 {
748 n <<= 4;
749 let byte = self.eat_byte()?;
750 let decoded = self.decode_hex(byte)?;
751 n |= decoded;
752 }
753
754 Ok(n)
755 }
756
757 #[inline]
decode_hex(&self, c: u8) -> Result<u8>758 fn decode_hex(&self, c: u8) -> Result<u8> {
759 match c {
760 c @ b'0'..=b'9' => Ok(c - b'0'),
761 c @ b'a'..=b'f' => Ok(10 + c - b'a'),
762 c @ b'A'..=b'F' => Ok(10 + c - b'A'),
763 _ => self.err(ErrorCode::InvalidEscape("Non-hex digit found")),
764 }
765 }
766
parse_escape(&mut self) -> Result<char>767 fn parse_escape(&mut self) -> Result<char> {
768 let c = match self.eat_byte()? {
769 b'\'' => '\'',
770 b'"' => '"',
771 b'\\' => '\\',
772 b'n' => '\n',
773 b'r' => '\r',
774 b't' => '\t',
775 b'x' => self.decode_ascii_escape()? as char,
776 b'u' => {
777 self.expect_byte(b'{', ErrorCode::InvalidEscape("Missing {"))?;
778
779 let mut bytes: u32 = 0;
780 let mut num_digits = 0;
781
782 while num_digits < 6 {
783 let byte = self.peek_or_eof()?;
784
785 if byte == b'}' {
786 break;
787 } else {
788 self.advance_single()?;
789 }
790
791 let byte = self.decode_hex(byte)?;
792 bytes <<= 4;
793 bytes |= u32::from(byte);
794
795 num_digits += 1;
796 }
797
798 if num_digits == 0 {
799 return self.err(ErrorCode::InvalidEscape(
800 "Expected 1-6 digits, got 0 digits",
801 ));
802 }
803
804 self.expect_byte(b'}', ErrorCode::InvalidEscape("No } at the end"))?;
805 char_from_u32(bytes)
806 .ok_or_else(|| self.error(ErrorCode::InvalidEscape("Not a valid char")))?
807 }
808 _ => {
809 return self.err(ErrorCode::InvalidEscape("Unknown escape character"));
810 }
811 };
812
813 Ok(c)
814 }
815
skip_comment(&mut self) -> Result<bool>816 fn skip_comment(&mut self) -> Result<bool> {
817 if self.consume("/") {
818 match self.eat_byte()? {
819 b'/' => {
820 let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
821
822 let _ = self.advance(bytes);
823 }
824 b'*' => {
825 let mut level = 1;
826
827 while level > 0 {
828 let bytes = self
829 .bytes
830 .iter()
831 .take_while(|&&b| b != b'/' && b != b'*')
832 .count();
833
834 if self.bytes.is_empty() {
835 return self.err(ErrorCode::UnclosedBlockComment);
836 }
837
838 let _ = self.advance(bytes);
839
840 // check whether / or * and take action
841 if self.consume("/*") {
842 level += 1;
843 } else if self.consume("*/") {
844 level -= 1;
845 } else {
846 self.eat_byte()
847 .map_err(|_| self.error(ErrorCode::UnclosedBlockComment))?;
848 }
849 }
850 }
851 b => return self.err(ErrorCode::UnexpectedByte(b as char)),
852 }
853
854 Ok(true)
855 } else {
856 Ok(false)
857 }
858 }
859 }
860
861 pub trait Num {
from_u8(x: u8) -> Self862 fn from_u8(x: u8) -> Self;
863
864 /// Returns `true` on overflow
checked_mul_ext(&mut self, x: u8) -> bool865 fn checked_mul_ext(&mut self, x: u8) -> bool;
866
867 /// Returns `true` on overflow
checked_add_ext(&mut self, x: u8) -> bool868 fn checked_add_ext(&mut self, x: u8) -> bool;
869
870 /// Returns `true` on overflow
checked_sub_ext(&mut self, x: u8) -> bool871 fn checked_sub_ext(&mut self, x: u8) -> bool;
872 }
873
874 macro_rules! impl_num {
875 ($ty:ident) => {
876 impl Num for $ty {
877 fn from_u8(x: u8) -> Self {
878 x as $ty
879 }
880
881 fn checked_mul_ext(&mut self, x: u8) -> bool {
882 match self.checked_mul(Self::from_u8(x)) {
883 Some(n) => {
884 *self = n;
885 false
886 }
887 None => true,
888 }
889 }
890
891 fn checked_add_ext(&mut self, x: u8) -> bool {
892 match self.checked_add(Self::from_u8(x)) {
893 Some(n) => {
894 *self = n;
895 false
896 }
897 None => true,
898 }
899 }
900
901 fn checked_sub_ext(&mut self, x: u8) -> bool {
902 match self.checked_sub(Self::from_u8(x)) {
903 Some(n) => {
904 *self = n;
905 false
906 }
907 None => true,
908 }
909 }
910 }
911 };
912 ($($tys:ident)*) => {
913 $( impl_num!($tys); )*
914 };
915 }
916
917 impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
918
919 #[derive(Clone, Debug)]
920 pub enum ParsedStr<'a> {
921 Allocated(String),
922 Slice(&'a str),
923 }
924
925 #[derive(Clone, Copy, Debug, PartialEq)]
926 pub struct Position {
927 pub line: usize,
928 pub col: usize,
929 }
930
931 impl Display for Position {
fmt(&self, f: &mut Formatter<'_>) -> FmtResult932 fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
933 write!(f, "{}:{}", self.line, self.col)
934 }
935 }
936
937 #[cfg(test)]
938 mod tests {
939 use super::*;
940
941 #[test]
decode_x10()942 fn decode_x10() {
943 let mut bytes = Bytes::new(b"10").unwrap();
944 assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
945 }
946 }
947