1 use std::{
2 char::from_u32 as char_from_u32,
3 fmt::{Display, Formatter, Result as FmtResult},
4 str::{from_utf8, from_utf8_unchecked, FromStr},
5 };
6
7 use crate::{
8 error::{Error, ErrorCode, Result},
9 extensions::Extensions,
10 };
11
12 // We have the following char categories.
13 const INT_CHAR: u8 = 1 << 0; // [0-9A-Fa-f_]
14 const FLOAT_CHAR: u8 = 1 << 1; // [0-9\.Ee+-]
15 const IDENT_FIRST_CHAR: u8 = 1 << 2; // [A-Za-z_]
16 const IDENT_OTHER_CHAR: u8 = 1 << 3; // [A-Za-z_0-9]
17 const WHITESPACE_CHAR: u8 = 1 << 4; // [\n\t\r ]
18
19 // We encode each char as belonging to some number of these categories.
20 const DIGIT: u8 = INT_CHAR | FLOAT_CHAR | IDENT_OTHER_CHAR; // [0-9]
21 const ABCDF: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [ABCDFabcdf]
22 const UNDER: u8 = INT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [_]
23 const E____: u8 = INT_CHAR | FLOAT_CHAR | IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [Ee]
24 const G2Z__: u8 = IDENT_FIRST_CHAR | IDENT_OTHER_CHAR; // [G-Zg-z]
25 const PUNCT: u8 = FLOAT_CHAR; // [\.+-]
26 const WS___: u8 = WHITESPACE_CHAR; // [\t\n\r ]
27 const _____: u8 = 0; // everything else
28
29 // Table of encodings, for fast predicates. (Non-ASCII and special chars are
30 // shown with '·' in the comment.)
31 #[rustfmt::skip]
32 const ENCODINGS: [u8; 256] = [
33 /* 0 1 2 3 4 5 6 7 8 9 */
34 /* 0+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, WS___,
35 /* 10+: ·········· */ WS___, _____, _____, WS___, _____, _____, _____, _____, _____, _____,
36 /* 20+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
37 /* 30+: ·· !"#$%&' */ _____, _____, WS___, _____, _____, _____, _____, _____, _____, _____,
38 /* 40+: ()*+,-./01 */ _____, _____, _____, PUNCT, _____, PUNCT, PUNCT, _____, DIGIT, DIGIT,
39 /* 50+: 23456789:; */ DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, _____, _____,
40 /* 60+: <=>?@ABCDE */ _____, _____, _____, _____, _____, ABCDF, ABCDF, ABCDF, ABCDF, E____,
41 /* 70+: FGHIJKLMNO */ ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
42 /* 80+: PQRSTUVWZY */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
43 /* 90+: Z[\]^_`abc */ G2Z__, _____, _____, _____, _____, UNDER, _____, ABCDF, ABCDF, ABCDF,
44 /* 100+: defghijklm */ ABCDF, E____, ABCDF, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
45 /* 110+: nopqrstuvw */ G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__, G2Z__,
46 /* 120+: xyz{|}~··· */ G2Z__, G2Z__, G2Z__, _____, _____, _____, _____, _____, _____, _____,
47 /* 130+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
48 /* 140+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
49 /* 150+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
50 /* 160+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
51 /* 170+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
52 /* 180+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
53 /* 190+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
54 /* 200+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
55 /* 210+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
56 /* 220+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
57 /* 230+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
58 /* 240+: ·········· */ _____, _____, _____, _____, _____, _____, _____, _____, _____, _____,
59 /* 250+: ·········· */ _____, _____, _____, _____, _____, _____
60 ];
61
is_int_char(c: u8) -> bool62 const fn is_int_char(c: u8) -> bool {
63 ENCODINGS[c as usize] & INT_CHAR != 0
64 }
65
is_float_char(c: u8) -> bool66 const fn is_float_char(c: u8) -> bool {
67 ENCODINGS[c as usize] & FLOAT_CHAR != 0
68 }
69
is_ident_first_char(c: u8) -> bool70 const fn is_ident_first_char(c: u8) -> bool {
71 ENCODINGS[c as usize] & IDENT_FIRST_CHAR != 0
72 }
73
is_ident_other_char(c: u8) -> bool74 const fn is_ident_other_char(c: u8) -> bool {
75 ENCODINGS[c as usize] & IDENT_OTHER_CHAR != 0
76 }
77
is_whitespace_char(c: u8) -> bool78 const fn is_whitespace_char(c: u8) -> bool {
79 ENCODINGS[c as usize] & WHITESPACE_CHAR != 0
80 }
81
82 #[derive(Clone, Debug, PartialEq)]
83 pub enum AnyNum {
84 F32(f32),
85 F64(f64),
86 I8(i8),
87 U8(u8),
88 I16(i16),
89 U16(u16),
90 I32(i32),
91 U32(u32),
92 I64(i64),
93 U64(u64),
94 I128(i128),
95 U128(u128),
96 }
97
98 #[derive(Clone, Copy, Debug)]
99 pub struct Bytes<'a> {
100 /// Bits set according to `Extension` enum.
101 pub exts: Extensions,
102 bytes: &'a [u8],
103 column: usize,
104 line: usize,
105 }
106
107 impl<'a> Bytes<'a> {
new(bytes: &'a [u8]) -> Result<Self>108 pub fn new(bytes: &'a [u8]) -> Result<Self> {
109 let mut b = Bytes {
110 bytes,
111 column: 1,
112 exts: Extensions::empty(),
113 line: 1,
114 };
115
116 b.skip_ws()?;
117 // Loop over all extensions attributes
118 loop {
119 let attribute = b.extensions()?;
120
121 if attribute.is_empty() {
122 break;
123 }
124
125 b.exts |= attribute;
126 b.skip_ws()?;
127 }
128
129 Ok(b)
130 }
131
advance(&mut self, bytes: usize) -> Result<()>132 pub fn advance(&mut self, bytes: usize) -> Result<()> {
133 for _ in 0..bytes {
134 self.advance_single()?;
135 }
136
137 Ok(())
138 }
139
advance_single(&mut self) -> Result<()>140 pub fn advance_single(&mut self) -> Result<()> {
141 if self.peek_or_eof()? == b'\n' {
142 self.line += 1;
143 self.column = 1;
144 } else {
145 self.column += 1;
146 }
147
148 self.bytes = &self.bytes[1..];
149
150 Ok(())
151 }
152
any_integer<T: Num>(&mut self, sign: i8) -> Result<T>153 fn any_integer<T: Num>(&mut self, sign: i8) -> Result<T> {
154 let base = if self.peek() == Some(b'0') {
155 match self.bytes.get(1).cloned() {
156 Some(b'x') => 16,
157 Some(b'b') => 2,
158 Some(b'o') => 8,
159 _ => 10,
160 }
161 } else {
162 10
163 };
164
165 if base != 10 {
166 // If we have `0x45A` for example,
167 // cut it to `45A`.
168 let _ = self.advance(2);
169 }
170
171 let num_bytes = self.next_bytes_contained_in(is_int_char);
172
173 if num_bytes == 0 {
174 return self.err(ErrorCode::ExpectedInteger);
175 }
176
177 let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
178
179 if s.as_bytes()[0] == b'_' {
180 return self.err(ErrorCode::UnderscoreAtBeginning);
181 }
182
183 fn calc_num<T: Num>(
184 bytes: &Bytes,
185 s: &str,
186 base: u8,
187 mut f: impl FnMut(&mut T, u8) -> bool,
188 ) -> Result<T> {
189 let mut num_acc = T::from_u8(0);
190
191 for &byte in s.as_bytes() {
192 if byte == b'_' {
193 continue;
194 }
195
196 if num_acc.checked_mul_ext(base) {
197 return bytes.err(ErrorCode::IntegerOutOfBounds);
198 }
199
200 let digit = bytes.decode_hex(byte)?;
201
202 if digit >= base {
203 return bytes.err(ErrorCode::ExpectedInteger);
204 }
205
206 if f(&mut num_acc, digit) {
207 return bytes.err(ErrorCode::IntegerOutOfBounds);
208 }
209 }
210
211 Ok(num_acc)
212 };
213
214 let res = if sign > 0 {
215 calc_num(&*self, s, base, T::checked_add_ext)
216 } else {
217 calc_num(&*self, s, base, T::checked_sub_ext)
218 };
219
220 let _ = self.advance(num_bytes);
221
222 res
223 }
224
any_num(&mut self) -> Result<AnyNum>225 pub fn any_num(&mut self) -> Result<AnyNum> {
226 // We are not doing float comparisons here in the traditional sense.
227 // Instead, this code checks if a f64 fits inside an f32.
228 #[allow(clippy::float_cmp)]
229 fn any_float(f: f64) -> Result<AnyNum> {
230 if f == f as f32 as f64 {
231 Ok(AnyNum::F32(f as f32))
232 } else {
233 Ok(AnyNum::F64(f))
234 }
235 }
236
237 let bytes_backup = self.bytes;
238
239 let first_byte = self.peek_or_eof()?;
240 let is_signed = first_byte == b'-' || first_byte == b'+';
241 let is_float = self.next_bytes_is_float();
242
243 if is_float {
244 let f = self.float::<f64>()?;
245
246 any_float(f)
247 } else {
248 let max_u8 = std::u8::MAX as u128;
249 let max_u16 = std::u16::MAX as u128;
250 let max_u32 = std::u32::MAX as u128;
251 let max_u64 = std::u64::MAX as u128;
252
253 let min_i8 = std::i8::MIN as i128;
254 let max_i8 = std::i8::MAX as i128;
255 let min_i16 = std::i16::MIN as i128;
256 let max_i16 = std::i16::MAX as i128;
257 let min_i32 = std::i32::MIN as i128;
258 let max_i32 = std::i32::MAX as i128;
259 let min_i64 = std::i64::MIN as i128;
260 let max_i64 = std::i64::MAX as i128;
261
262 if is_signed {
263 match self.signed_integer::<i128>() {
264 Ok(x) => {
265 if x >= min_i8 && x <= max_i8 {
266 Ok(AnyNum::I8(x as i8))
267 } else if x >= min_i16 && x <= max_i16 {
268 Ok(AnyNum::I16(x as i16))
269 } else if x >= min_i32 && x <= max_i32 {
270 Ok(AnyNum::I32(x as i32))
271 } else if x >= min_i64 && x <= max_i64 {
272 Ok(AnyNum::I64(x as i64))
273 } else {
274 Ok(AnyNum::I128(x))
275 }
276 }
277 Err(_) => {
278 self.bytes = bytes_backup;
279
280 any_float(self.float::<f64>()?)
281 }
282 }
283 } else {
284 match self.unsigned_integer::<u128>() {
285 Ok(x) => {
286 if x <= max_u8 {
287 Ok(AnyNum::U8(x as u8))
288 } else if x <= max_u16 {
289 Ok(AnyNum::U16(x as u16))
290 } else if x <= max_u32 {
291 Ok(AnyNum::U32(x as u32))
292 } else if x <= max_u64 {
293 Ok(AnyNum::U64(x as u64))
294 } else {
295 Ok(AnyNum::U128(x))
296 }
297 }
298 Err(_) => {
299 self.bytes = bytes_backup;
300
301 any_float(self.float::<f64>()?)
302 }
303 }
304 }
305 }
306 }
307
bool(&mut self) -> Result<bool>308 pub fn bool(&mut self) -> Result<bool> {
309 if self.consume("true") {
310 Ok(true)
311 } else if self.consume("false") {
312 Ok(false)
313 } else {
314 self.err(ErrorCode::ExpectedBoolean)
315 }
316 }
317
bytes(&self) -> &[u8]318 pub fn bytes(&self) -> &[u8] {
319 &self.bytes
320 }
321
char(&mut self) -> Result<char>322 pub fn char(&mut self) -> Result<char> {
323 if !self.consume("'") {
324 return self.err(ErrorCode::ExpectedChar);
325 }
326
327 let c = self.peek_or_eof()?;
328
329 let c = if c == b'\\' {
330 let _ = self.advance(1);
331
332 self.parse_escape()?
333 } else {
334 // Check where the end of the char (') is and try to
335 // interpret the rest as UTF-8
336
337 let max = self.bytes.len().min(5);
338 let pos: usize = self.bytes[..max]
339 .iter()
340 .position(|&x| x == b'\'')
341 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
342 let s = from_utf8(&self.bytes[0..pos]).map_err(|e| self.error(e.into()))?;
343 let mut chars = s.chars();
344
345 let first = chars
346 .next()
347 .ok_or_else(|| self.error(ErrorCode::ExpectedChar))?;
348 if chars.next().is_some() {
349 return self.err(ErrorCode::ExpectedChar);
350 }
351
352 let _ = self.advance(pos);
353
354 first
355 };
356
357 if !self.consume("'") {
358 return self.err(ErrorCode::ExpectedChar);
359 }
360
361 Ok(c)
362 }
363
comma(&mut self) -> Result<bool>364 pub fn comma(&mut self) -> Result<bool> {
365 self.skip_ws()?;
366
367 if self.consume(",") {
368 self.skip_ws()?;
369
370 Ok(true)
371 } else {
372 Ok(false)
373 }
374 }
375
376 /// Only returns true if the char after `ident` cannot belong
377 /// to an identifier.
check_ident(&mut self, ident: &str) -> bool378 pub fn check_ident(&mut self, ident: &str) -> bool {
379 self.test_for(ident) && !self.check_ident_other_char(ident.len())
380 }
381
check_ident_other_char(&self, index: usize) -> bool382 fn check_ident_other_char(&self, index: usize) -> bool {
383 self.bytes
384 .get(index)
385 .map_or(false, |&b| is_ident_other_char(b))
386 }
387
388 /// Should only be used on a working copy
check_tuple_struct(mut self) -> Result<bool>389 pub fn check_tuple_struct(mut self) -> Result<bool> {
390 if self.identifier().is_err() {
391 // if there's no field ident, this is a tuple struct
392 return Ok(true);
393 }
394
395 self.skip_ws()?;
396
397 // if there is no colon after the ident, this can only be a unit struct
398 self.eat_byte().map(|c| c != b':')
399 }
400
401 /// Only returns true if the char after `ident` cannot belong
402 /// to an identifier.
consume_ident(&mut self, ident: &str) -> bool403 pub fn consume_ident(&mut self, ident: &str) -> bool {
404 if self.check_ident(ident) {
405 let _ = self.advance(ident.len());
406
407 true
408 } else {
409 false
410 }
411 }
412
consume(&mut self, s: &str) -> bool413 pub fn consume(&mut self, s: &str) -> bool {
414 if self.test_for(s) {
415 let _ = self.advance(s.len());
416
417 true
418 } else {
419 false
420 }
421 }
422
consume_all(&mut self, all: &[&str]) -> Result<bool>423 fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
424 all.iter()
425 .map(|elem| {
426 if self.consume(elem) {
427 self.skip_ws()?;
428
429 Ok(true)
430 } else {
431 Ok(false)
432 }
433 })
434 .fold(Ok(true), |acc, x| acc.and_then(|val| x.map(|x| x && val)))
435 }
436
eat_byte(&mut self) -> Result<u8>437 pub fn eat_byte(&mut self) -> Result<u8> {
438 let peek = self.peek_or_eof()?;
439 let _ = self.advance_single();
440
441 Ok(peek)
442 }
443
err<T>(&self, kind: ErrorCode) -> Result<T>444 pub fn err<T>(&self, kind: ErrorCode) -> Result<T> {
445 Err(self.error(kind))
446 }
447
error(&self, kind: ErrorCode) -> Error448 pub fn error(&self, kind: ErrorCode) -> Error {
449 Error {
450 code: kind,
451 position: Position {
452 line: self.line,
453 col: self.column,
454 },
455 }
456 }
457
expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()>458 pub fn expect_byte(&mut self, byte: u8, error: ErrorCode) -> Result<()> {
459 self.eat_byte()
460 .and_then(|b| if b == byte { Ok(()) } else { self.err(error) })
461 }
462
463 /// Returns the extensions bit mask.
extensions(&mut self) -> Result<Extensions>464 fn extensions(&mut self) -> Result<Extensions> {
465 if self.peek() != Some(b'#') {
466 return Ok(Extensions::empty());
467 }
468
469 if !self.consume_all(&["#", "!", "[", "enable", "("])? {
470 return self.err(ErrorCode::ExpectedAttribute);
471 }
472
473 self.skip_ws()?;
474 let mut extensions = Extensions::empty();
475
476 loop {
477 let ident = self.identifier()?;
478 let extension = Extensions::from_ident(ident).ok_or_else(|| {
479 self.error(ErrorCode::NoSuchExtension(
480 from_utf8(ident).unwrap().to_owned(),
481 ))
482 })?;
483
484 extensions |= extension;
485
486 let comma = self.comma()?;
487
488 // If we have no comma but another item, return an error
489 if !comma && self.check_ident_other_char(0) {
490 return self.err(ErrorCode::ExpectedComma);
491 }
492
493 // If there's no comma, assume the list ended.
494 // If there is, it might be a trailing one, thus we only
495 // continue the loop if we get an ident char.
496 if !comma || !self.check_ident_other_char(0) {
497 break;
498 }
499 }
500
501 self.skip_ws()?;
502
503 if self.consume_all(&[")", "]"])? {
504 Ok(extensions)
505 } else {
506 Err(self.error(ErrorCode::ExpectedAttributeEnd))
507 }
508 }
509
float<T>(&mut self) -> Result<T> where T: FromStr,510 pub fn float<T>(&mut self) -> Result<T>
511 where
512 T: FromStr,
513 {
514 for literal in &["inf", "-inf", "NaN"] {
515 if self.consume_ident(literal) {
516 return FromStr::from_str(literal).map_err(|_| unreachable!()); // must not fail
517 }
518 }
519
520 let num_bytes = self.next_bytes_contained_in(is_float_char);
521
522 let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
523 let res = FromStr::from_str(s).map_err(|_| self.error(ErrorCode::ExpectedFloat));
524
525 let _ = self.advance(num_bytes);
526
527 res
528 }
529
identifier(&mut self) -> Result<&'a [u8]>530 pub fn identifier(&mut self) -> Result<&'a [u8]> {
531 let bytes = self.identifier_len()?;
532 let ident = &self.bytes[..bytes];
533 let _ = self.advance(bytes);
534
535 Ok(ident)
536 }
537
identifier_len(&self) -> Result<usize>538 pub fn identifier_len(&self) -> Result<usize> {
539 let next = self.peek_or_eof()?;
540 if is_ident_first_char(next) {
541 // If the next two bytes signify the start of a raw string literal,
542 // return an error.
543 if next == b'r' {
544 let second = self
545 .bytes
546 .get(1)
547 .ok_or_else(|| self.error(ErrorCode::Eof))?;
548 if *second == b'"' || *second == b'#' {
549 return self.err(ErrorCode::ExpectedIdentifier);
550 }
551 }
552
553 let bytes = self.next_bytes_contained_in(is_ident_other_char);
554
555 Ok(bytes)
556 } else {
557 self.err(ErrorCode::ExpectedIdentifier)
558 }
559 }
560
next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize561 pub fn next_bytes_contained_in(&self, allowed: fn(u8) -> bool) -> usize {
562 self.bytes.iter().take_while(|&&b| allowed(b)).count()
563 }
564
next_bytes_is_float(&self) -> bool565 pub fn next_bytes_is_float(&self) -> bool {
566 if let Some(byte) = self.peek() {
567 let skip = match byte {
568 b'+' | b'-' => 1,
569 _ => 0,
570 };
571 let flen = self
572 .bytes
573 .iter()
574 .skip(skip)
575 .take_while(|&&b| is_float_char(b))
576 .count();
577 let ilen = self
578 .bytes
579 .iter()
580 .skip(skip)
581 .take_while(|&&b| is_int_char(b))
582 .count();
583 flen > ilen
584 } else {
585 false
586 }
587 }
588
skip_ws(&mut self) -> Result<()>589 pub fn skip_ws(&mut self) -> Result<()> {
590 while self.peek().map_or(false, |c| is_whitespace_char(c)) {
591 let _ = self.advance_single();
592 }
593
594 if self.skip_comment()? {
595 self.skip_ws()?;
596 }
597
598 Ok(())
599 }
600
peek(&self) -> Option<u8>601 pub fn peek(&self) -> Option<u8> {
602 self.bytes.get(0).cloned()
603 }
604
peek_or_eof(&self) -> Result<u8>605 pub fn peek_or_eof(&self) -> Result<u8> {
606 self.bytes
607 .get(0)
608 .cloned()
609 .ok_or_else(|| self.error(ErrorCode::Eof))
610 }
611
signed_integer<T>(&mut self) -> Result<T> where T: Num,612 pub fn signed_integer<T>(&mut self) -> Result<T>
613 where
614 T: Num,
615 {
616 match self.peek_or_eof()? {
617 b'+' => {
618 let _ = self.advance_single();
619
620 self.any_integer(1)
621 }
622 b'-' => {
623 let _ = self.advance_single();
624
625 self.any_integer(-1)
626 }
627 _ => self.any_integer(1),
628 }
629 }
630
string(&mut self) -> Result<ParsedStr<'a>>631 pub fn string(&mut self) -> Result<ParsedStr<'a>> {
632 if self.consume("\"") {
633 self.escaped_string()
634 } else if self.consume("r") {
635 self.raw_string()
636 } else {
637 self.err(ErrorCode::ExpectedString)
638 }
639 }
640
escaped_string(&mut self) -> Result<ParsedStr<'a>>641 fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
642 use std::iter::repeat;
643
644 let (i, end_or_escape) = self
645 .bytes
646 .iter()
647 .enumerate()
648 .find(|&(_, &b)| b == b'\\' || b == b'"')
649 .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
650
651 if *end_or_escape == b'"' {
652 let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
653
654 // Advance by the number of bytes of the string
655 // + 1 for the `"`.
656 let _ = self.advance(i + 1);
657
658 Ok(ParsedStr::Slice(s))
659 } else {
660 let mut i = i;
661 let mut s: Vec<_> = self.bytes[..i].to_vec();
662
663 loop {
664 let _ = self.advance(i + 1);
665 let character = self.parse_escape()?;
666 match character.len_utf8() {
667 1 => s.push(character as u8),
668 len => {
669 let start = s.len();
670 s.extend(repeat(0).take(len));
671 character.encode_utf8(&mut s[start..]);
672 }
673 }
674
675 let (new_i, end_or_escape) = self
676 .bytes
677 .iter()
678 .enumerate()
679 .find(|&(_, &b)| b == b'\\' || b == b'"')
680 .ok_or(ErrorCode::Eof)
681 .map_err(|e| self.error(e))?;
682
683 i = new_i;
684 s.extend_from_slice(&self.bytes[..i]);
685
686 if *end_or_escape == b'"' {
687 let _ = self.advance(i + 1);
688
689 let s = String::from_utf8(s).map_err(|e| self.error(e.into()))?;
690 break Ok(ParsedStr::Allocated(s));
691 }
692 }
693 }
694 }
695
raw_string(&mut self) -> Result<ParsedStr<'a>>696 fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
697 let num_hashes = self.bytes.iter().take_while(|&&b| b == b'#').count();
698 let hashes = &self.bytes[..num_hashes];
699 let _ = self.advance(num_hashes);
700
701 if !self.consume("\"") {
702 return self.err(ErrorCode::ExpectedString);
703 }
704
705 let ending = [&[b'"'], hashes].concat();
706 let i = self
707 .bytes
708 .windows(num_hashes + 1)
709 .position(|window| window == ending.as_slice())
710 .ok_or_else(|| self.error(ErrorCode::ExpectedStringEnd))?;
711
712 let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;
713
714 // Advance by the number of bytes of the string
715 // + `num_hashes` + 1 for the `"`.
716 let _ = self.advance(i + num_hashes + 1);
717
718 Ok(ParsedStr::Slice(s))
719 }
720
test_for(&self, s: &str) -> bool721 fn test_for(&self, s: &str) -> bool {
722 s.bytes()
723 .enumerate()
724 .all(|(i, b)| self.bytes.get(i).map_or(false, |t| *t == b))
725 }
726
unsigned_integer<T: Num>(&mut self) -> Result<T>727 pub fn unsigned_integer<T: Num>(&mut self) -> Result<T> {
728 self.any_integer(1)
729 }
730
decode_ascii_escape(&mut self) -> Result<u8>731 fn decode_ascii_escape(&mut self) -> Result<u8> {
732 let mut n = 0;
733 for _ in 0..2 {
734 n <<= 4;
735 let byte = self.eat_byte()?;
736 let decoded = self.decode_hex(byte)?;
737 n |= decoded;
738 }
739
740 Ok(n)
741 }
742
743 #[inline]
decode_hex(&self, c: u8) -> Result<u8>744 fn decode_hex(&self, c: u8) -> Result<u8> {
745 match c {
746 c @ b'0'..=b'9' => Ok(c - b'0'),
747 c @ b'a'..=b'f' => Ok(10 + c - b'a'),
748 c @ b'A'..=b'F' => Ok(10 + c - b'A'),
749 _ => self.err(ErrorCode::InvalidEscape("Non-hex digit found")),
750 }
751 }
752
parse_escape(&mut self) -> Result<char>753 fn parse_escape(&mut self) -> Result<char> {
754 let c = match self.eat_byte()? {
755 b'\'' => '\'',
756 b'"' => '"',
757 b'\\' => '\\',
758 b'n' => '\n',
759 b'r' => '\r',
760 b't' => '\t',
761 b'x' => self.decode_ascii_escape()? as char,
762 b'u' => {
763 self.expect_byte(b'{', ErrorCode::InvalidEscape("Missing {"))?;
764
765 let mut bytes: u32 = 0;
766 let mut num_digits = 0;
767
768 while num_digits < 6 {
769 let byte = self.peek_or_eof()?;
770
771 if byte == b'}' {
772 break;
773 } else {
774 self.advance_single()?;
775 }
776
777 let byte = self.decode_hex(byte)?;
778 bytes <<= 4;
779 bytes |= byte as u32;
780
781 num_digits += 1;
782 }
783
784 if num_digits == 0 {
785 return self.err(ErrorCode::InvalidEscape(
786 "Expected 1-6 digits, got 0 digits",
787 ));
788 }
789
790 self.expect_byte(b'}', ErrorCode::InvalidEscape("No } at the end"))?;
791 char_from_u32(bytes)
792 .ok_or_else(|| self.error(ErrorCode::InvalidEscape("Not a valid char")))?
793 }
794 _ => {
795 return self.err(ErrorCode::InvalidEscape("Unknown escape character"));
796 }
797 };
798
799 Ok(c)
800 }
801
skip_comment(&mut self) -> Result<bool>802 fn skip_comment(&mut self) -> Result<bool> {
803 if self.consume("/") {
804 match self.eat_byte()? {
805 b'/' => {
806 let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();
807
808 let _ = self.advance(bytes);
809 }
810 b'*' => {
811 let mut level = 1;
812
813 while level > 0 {
814 let bytes = self
815 .bytes
816 .iter()
817 .take_while(|&&b| b != b'/' && b != b'*')
818 .count();
819
820 if self.bytes.is_empty() {
821 return self.err(ErrorCode::UnclosedBlockComment);
822 }
823
824 let _ = self.advance(bytes);
825
826 // check whether / or * and take action
827 if self.consume("/*") {
828 level += 1;
829 } else if self.consume("*/") {
830 level -= 1;
831 } else {
832 self.eat_byte()
833 .map_err(|_| self.error(ErrorCode::UnclosedBlockComment))?;
834 }
835 }
836 }
837 b => return self.err(ErrorCode::UnexpectedByte(b as char)),
838 }
839
840 Ok(true)
841 } else {
842 Ok(false)
843 }
844 }
845 }
846
847 pub trait Num {
from_u8(x: u8) -> Self848 fn from_u8(x: u8) -> Self;
849
850 /// Returns `true` on overflow
checked_mul_ext(&mut self, x: u8) -> bool851 fn checked_mul_ext(&mut self, x: u8) -> bool;
852
853 /// Returns `true` on overflow
checked_add_ext(&mut self, x: u8) -> bool854 fn checked_add_ext(&mut self, x: u8) -> bool;
855
856 /// Returns `true` on overflow
checked_sub_ext(&mut self, x: u8) -> bool857 fn checked_sub_ext(&mut self, x: u8) -> bool;
858 }
859
860 macro_rules! impl_num {
861 ($ty:ident) => {
862 impl Num for $ty {
863 fn from_u8(x: u8) -> Self {
864 x as $ty
865 }
866
867 fn checked_mul_ext(&mut self, x: u8) -> bool {
868 match self.checked_mul(Self::from_u8(x)) {
869 Some(n) => {
870 *self = n;
871 false
872 }
873 None => true,
874 }
875 }
876
877 fn checked_add_ext(&mut self, x: u8) -> bool {
878 match self.checked_add(Self::from_u8(x)) {
879 Some(n) => {
880 *self = n;
881 false
882 }
883 None => true,
884 }
885 }
886
887 fn checked_sub_ext(&mut self, x: u8) -> bool {
888 match self.checked_sub(Self::from_u8(x)) {
889 Some(n) => {
890 *self = n;
891 false
892 }
893 None => true,
894 }
895 }
896 }
897 };
898 ($($tys:ident)*) => {
899 $( impl_num!($tys); )*
900 };
901 }
902
903 impl_num!(u8 u16 u32 u64 u128 i8 i16 i32 i64 i128);
904
905 #[derive(Clone, Debug)]
906 pub enum ParsedStr<'a> {
907 Allocated(String),
908 Slice(&'a str),
909 }
910
911 #[derive(Clone, Copy, Debug, PartialEq)]
912 pub struct Position {
913 pub line: usize,
914 pub col: usize,
915 }
916
917 impl Display for Position {
fmt(&self, f: &mut Formatter<'_>) -> FmtResult918 fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
919 write!(f, "{}:{}", self.line, self.col)
920 }
921 }
922
923 #[cfg(test)]
924 mod tests {
925 use super::*;
926
927 #[test]
decode_x10()928 fn decode_x10() {
929 let mut bytes = Bytes::new(b"10").unwrap();
930 assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
931 }
932 }
933