1 use std::char;
2 use std::convert::TryFrom;
3 use std::f64;
4 use std::fmt;
5 use std::num::ParseFloatError;
6 use std::num::ParseIntError;
7 
8 use super::float;
9 use super::loc::Loc;
10 use super::loc::FIRST_COL;
11 use super::str_lit::StrLit;
12 use super::str_lit::StrLitDecodeError;
13 use super::token::Token;
14 use super::token::TokenWithLocation;
15 use super::ParserLanguage;
16 use crate::text_format::lexer::JsonNumberLit;
17 
18 #[derive(Debug)]
19 pub enum LexerError {
20     IncorrectInput, // TODO: something better than this
21     UnexpectedEof,
22     ExpectChar(char),
23     ParseIntError,
24     ParseFloatError,
25     IncorrectFloatLit, // TODO: how it is different from ParseFloatError?
26     IncorrectJsonEscape,
27     IncorrectJsonNumber,
28     IncorrectUnicodeChar,
29     ExpectHexDigit,
30     ExpectOctDigit,
31     ExpectDecDigit,
32     StrLitDecodeError(StrLitDecodeError),
33     ExpectedIdent,
34 }
35 
36 impl fmt::Display for LexerError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result37     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38         match self {
39             LexerError::IncorrectInput => write!(f, "Incorrect input"),
40             LexerError::UnexpectedEof => write!(f, "Unexpected EOF"),
41             LexerError::ExpectChar(c) => write!(f, "Expecting char: {}", c),
42             LexerError::ParseIntError => write!(f, "Parse int error"),
43             LexerError::ParseFloatError => write!(f, "Parse float error"),
44             LexerError::IncorrectFloatLit => write!(f, "Incorrect float literal"),
45             LexerError::IncorrectJsonEscape => write!(f, "Incorrect JSON escape"),
46             LexerError::IncorrectJsonNumber => write!(f, "Incorrect JSON number"),
47             LexerError::IncorrectUnicodeChar => write!(f, "Incorrect Unicode char"),
48             LexerError::ExpectHexDigit => write!(f, "Expecting hex digit"),
49             LexerError::ExpectOctDigit => write!(f, "Expecting oct digit"),
50             LexerError::ExpectDecDigit => write!(f, "Expecting dec digit"),
51             LexerError::StrLitDecodeError(e) => write!(f, "{}", e),
52             LexerError::ExpectedIdent => write!(f, "Expecting identifier"),
53         }
54     }
55 }
56 
57 impl std::error::Error for LexerError {}
58 
59 pub type LexerResult<T> = Result<T, LexerError>;
60 
61 impl From<StrLitDecodeError> for LexerError {
from(e: StrLitDecodeError) -> Self62     fn from(e: StrLitDecodeError) -> Self {
63         LexerError::StrLitDecodeError(e)
64     }
65 }
66 
67 impl From<ParseIntError> for LexerError {
from(_: ParseIntError) -> Self68     fn from(_: ParseIntError) -> Self {
69         LexerError::ParseIntError
70     }
71 }
72 
73 impl From<ParseFloatError> for LexerError {
from(_: ParseFloatError) -> Self74     fn from(_: ParseFloatError) -> Self {
75         LexerError::ParseFloatError
76     }
77 }
78 
79 impl From<float::ProtobufFloatParseError> for LexerError {
from(_: float::ProtobufFloatParseError) -> Self80     fn from(_: float::ProtobufFloatParseError) -> Self {
81         LexerError::IncorrectFloatLit
82     }
83 }
84 
85 #[derive(Copy, Clone)]
86 pub struct Lexer<'a> {
87     language: ParserLanguage,
88     input: &'a str,
89     pos: usize,
90     pub loc: Loc,
91 }
92 
is_letter(c: char) -> bool93 fn is_letter(c: char) -> bool {
94     c.is_alphabetic() || c == '_'
95 }
96 
97 impl<'a> Lexer<'a> {
new(input: &'a str, language: ParserLanguage) -> Lexer<'a>98     pub fn new(input: &'a str, language: ParserLanguage) -> Lexer<'a> {
99         Lexer {
100             language,
101             input,
102             pos: 0,
103             loc: Loc::start(),
104         }
105     }
106 
107     /// No more chars
eof(&self) -> bool108     pub fn eof(&self) -> bool {
109         self.pos == self.input.len()
110     }
111 
112     /// Remaining chars
rem_chars(&self) -> &'a str113     fn rem_chars(&self) -> &'a str {
114         &self.input[self.pos..]
115     }
116 
lookahead_char_is<P: FnOnce(char) -> bool>(&self, p: P) -> bool117     pub fn lookahead_char_is<P: FnOnce(char) -> bool>(&self, p: P) -> bool {
118         self.lookahead_char().map_or(false, p)
119     }
120 
lookahead_char_is_in(&self, alphabet: &str) -> bool121     fn lookahead_char_is_in(&self, alphabet: &str) -> bool {
122         self.lookahead_char_is(|c| alphabet.contains(c))
123     }
124 
next_char_opt(&mut self) -> Option<char>125     fn next_char_opt(&mut self) -> Option<char> {
126         let rem = self.rem_chars();
127         if rem.is_empty() {
128             None
129         } else {
130             let mut char_indices = rem.char_indices();
131             let (_, c) = char_indices.next().unwrap();
132             let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len());
133             self.pos += c_len;
134             if c == '\n' {
135                 self.loc.line += 1;
136                 self.loc.col = FIRST_COL;
137             } else {
138                 self.loc.col += 1;
139             }
140             Some(c)
141         }
142     }
143 
next_char(&mut self) -> LexerResult<char>144     fn next_char(&mut self) -> LexerResult<char> {
145         self.next_char_opt().ok_or(LexerError::UnexpectedEof)
146     }
147 
148     /// Skip whitespaces
skip_whitespaces(&mut self)149     fn skip_whitespaces(&mut self) {
150         self.take_while(|c| c.is_whitespace());
151     }
152 
skip_c_comment(&mut self) -> LexerResult<()>153     fn skip_c_comment(&mut self) -> LexerResult<()> {
154         if self.skip_if_lookahead_is_str("/*") {
155             let end = "*/";
156             match self.rem_chars().find(end) {
157                 None => Err(LexerError::UnexpectedEof),
158                 Some(len) => {
159                     let new_pos = self.pos + len + end.len();
160                     self.skip_to_pos(new_pos);
161                     Ok(())
162                 }
163             }
164         } else {
165             Ok(())
166         }
167     }
168 
skip_cpp_comment(&mut self)169     fn skip_cpp_comment(&mut self) {
170         if self.skip_if_lookahead_is_str("//") {
171             loop {
172                 match self.next_char_opt() {
173                     Some('\n') | None => break,
174                     _ => {}
175                 }
176             }
177         }
178     }
179 
skip_sh_comment(&mut self)180     fn skip_sh_comment(&mut self) {
181         if self.skip_if_lookahead_is_str("#") {
182             loop {
183                 match self.next_char_opt() {
184                     Some('\n') | None => break,
185                     _ => {}
186                 }
187             }
188         }
189     }
190 
skip_comment(&mut self) -> LexerResult<()>191     fn skip_comment(&mut self) -> LexerResult<()> {
192         match self.language {
193             ParserLanguage::Proto => {
194                 self.skip_c_comment()?;
195                 self.skip_cpp_comment();
196             }
197             ParserLanguage::TextFormat => {
198                 self.skip_sh_comment();
199             }
200             ParserLanguage::Json => {}
201         }
202         Ok(())
203     }
204 
skip_ws(&mut self) -> LexerResult<()>205     pub fn skip_ws(&mut self) -> LexerResult<()> {
206         loop {
207             let pos = self.pos;
208             self.skip_whitespaces();
209             self.skip_comment()?;
210             if pos == self.pos {
211                 // Did not advance
212                 return Ok(());
213             }
214         }
215     }
216 
take_while<F>(&mut self, f: F) -> &'a str where F: Fn(char) -> bool,217     pub fn take_while<F>(&mut self, f: F) -> &'a str
218     where
219         F: Fn(char) -> bool,
220     {
221         let start = self.pos;
222         while self.lookahead_char().map(&f) == Some(true) {
223             self.next_char_opt().unwrap();
224         }
225         let end = self.pos;
226         &self.input[start..end]
227     }
228 
lookahead_char(&self) -> Option<char>229     fn lookahead_char(&self) -> Option<char> {
230         self.clone().next_char_opt()
231     }
232 
lookahead_is_str(&self, s: &str) -> bool233     fn lookahead_is_str(&self, s: &str) -> bool {
234         self.rem_chars().starts_with(s)
235     }
236 
skip_if_lookahead_is_str(&mut self, s: &str) -> bool237     fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool {
238         if self.lookahead_is_str(s) {
239             let new_pos = self.pos + s.len();
240             self.skip_to_pos(new_pos);
241             true
242         } else {
243             false
244         }
245     }
246 
next_char_if<P>(&mut self, p: P) -> Option<char> where P: FnOnce(char) -> bool,247     fn next_char_if<P>(&mut self, p: P) -> Option<char>
248     where
249         P: FnOnce(char) -> bool,
250     {
251         let mut clone = self.clone();
252         match clone.next_char_opt() {
253             Some(c) if p(c) => {
254                 *self = clone;
255                 Some(c)
256             }
257             _ => None,
258         }
259     }
260 
next_char_if_eq(&mut self, expect: char) -> bool261     pub fn next_char_if_eq(&mut self, expect: char) -> bool {
262         self.next_char_if(|c| c == expect) != None
263     }
264 
next_char_if_in(&mut self, alphabet: &str) -> Option<char>265     fn next_char_if_in(&mut self, alphabet: &str) -> Option<char> {
266         for c in alphabet.chars() {
267             if self.next_char_if_eq(c) {
268                 return Some(c);
269             }
270         }
271         None
272     }
273 
next_char_expect_eq(&mut self, expect: char) -> LexerResult<()>274     fn next_char_expect_eq(&mut self, expect: char) -> LexerResult<()> {
275         if self.next_char_if_eq(expect) {
276             Ok(())
277         } else {
278             Err(LexerError::ExpectChar(expect))
279         }
280     }
281 
next_char_expect<P>(&mut self, expect: P, err: LexerError) -> LexerResult<char> where P: FnOnce(char) -> bool,282     fn next_char_expect<P>(&mut self, expect: P, err: LexerError) -> LexerResult<char>
283     where
284         P: FnOnce(char) -> bool,
285     {
286         self.next_char_if(expect).ok_or(err)
287     }
288 
289     // str functions
290 
291     /// properly update line and column
skip_to_pos(&mut self, new_pos: usize) -> &'a str292     fn skip_to_pos(&mut self, new_pos: usize) -> &'a str {
293         assert!(new_pos >= self.pos);
294         assert!(new_pos <= self.input.len());
295         let pos = self.pos;
296         while self.pos != new_pos {
297             self.next_char_opt().unwrap();
298         }
299         &self.input[pos..new_pos]
300     }
301 
302     // Protobuf grammar
303 
304     // char functions
305 
306     // letter = "A" … "Z" | "a" … "z"
307     // https://github.com/google/protobuf/issues/4565
next_letter_opt(&mut self) -> Option<char>308     fn next_letter_opt(&mut self) -> Option<char> {
309         self.next_char_if(is_letter)
310     }
311 
312     // capitalLetter =  "A" … "Z"
_next_capital_letter_opt(&mut self) -> Option<char>313     fn _next_capital_letter_opt(&mut self) -> Option<char> {
314         self.next_char_if(|c| c >= 'A' && c <= 'Z')
315     }
316 
next_ident_part(&mut self) -> Option<char>317     fn next_ident_part(&mut self) -> Option<char> {
318         self.next_char_if(|c| c.is_ascii_alphanumeric() || c == '_')
319     }
320 
321     // Identifiers
322 
323     // ident = letter { letter | decimalDigit | "_" }
next_ident_opt(&mut self) -> LexerResult<Option<String>>324     fn next_ident_opt(&mut self) -> LexerResult<Option<String>> {
325         if let Some(c) = self.next_letter_opt() {
326             let mut ident = String::new();
327             ident.push(c);
328             while let Some(c) = self.next_ident_part() {
329                 ident.push(c);
330             }
331             Ok(Some(ident))
332         } else {
333             Ok(None)
334         }
335     }
336 
337     // Integer literals
338 
339     // hexLit     = "0" ( "x" | "X" ) hexDigit { hexDigit }
next_hex_lit_opt(&mut self) -> LexerResult<Option<u64>>340     fn next_hex_lit_opt(&mut self) -> LexerResult<Option<u64>> {
341         Ok(
342             if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") {
343                 let s = self.take_while(|c| c.is_ascii_hexdigit());
344                 Some(u64::from_str_radix(s, 16)? as u64)
345             } else {
346                 None
347             },
348         )
349     }
350 
351     // decimalLit = ( "1" … "9" ) { decimalDigit }
352     // octalLit   = "0" { octalDigit }
next_decimal_octal_lit_opt(&mut self) -> LexerResult<Option<u64>>353     fn next_decimal_octal_lit_opt(&mut self) -> LexerResult<Option<u64>> {
354         // do not advance on number parse error
355         let mut clone = self.clone();
356 
357         let pos = clone.pos;
358 
359         Ok(if clone.next_char_if(|c| c.is_ascii_digit()) != None {
360             clone.take_while(|c| c.is_ascii_digit());
361             let value = clone.input[pos..clone.pos].parse()?;
362             *self = clone;
363             Some(value)
364         } else {
365             None
366         })
367     }
368 
369     // hexDigit     = "0" … "9" | "A" … "F" | "a" … "f"
next_hex_digit(&mut self) -> LexerResult<u32>370     fn next_hex_digit(&mut self) -> LexerResult<u32> {
371         let mut clone = self.clone();
372         let r = match clone.next_char()? {
373             c if c >= '0' && c <= '9' => c as u32 - b'0' as u32,
374             c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10,
375             c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10,
376             _ => return Err(LexerError::ExpectHexDigit),
377         };
378         *self = clone;
379         Ok(r)
380     }
381 
382     // octalDigit   = "0" … "7"
next_octal_digit(&mut self) -> LexerResult<u32>383     fn next_octal_digit(&mut self) -> LexerResult<u32> {
384         self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectOctDigit)
385             .map(|c| c as u32 - '0' as u32)
386     }
387 
388     // decimalDigit = "0" … "9"
next_decimal_digit(&mut self) -> LexerResult<u32>389     fn next_decimal_digit(&mut self) -> LexerResult<u32> {
390         self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectDecDigit)
391             .map(|c| c as u32 - '0' as u32)
392     }
393 
394     // decimals  = decimalDigit { decimalDigit }
next_decimal_digits(&mut self) -> LexerResult<()>395     fn next_decimal_digits(&mut self) -> LexerResult<()> {
396         self.next_decimal_digit()?;
397         self.take_while(|c| c >= '0' && c <= '9');
398         Ok(())
399     }
400 
401     // intLit     = decimalLit | octalLit | hexLit
next_int_lit_opt(&mut self) -> LexerResult<Option<u64>>402     pub fn next_int_lit_opt(&mut self) -> LexerResult<Option<u64>> {
403         assert_ne!(ParserLanguage::Json, self.language);
404 
405         self.skip_ws()?;
406         if let Some(i) = self.next_hex_lit_opt()? {
407             return Ok(Some(i));
408         }
409         if let Some(i) = self.next_decimal_octal_lit_opt()? {
410             return Ok(Some(i));
411         }
412         Ok(None)
413     }
414 
415     // Floating-point literals
416 
417     // exponent  = ( "e" | "E" ) [ "+" | "-" ] decimals
next_exponent_opt(&mut self) -> LexerResult<Option<()>>418     fn next_exponent_opt(&mut self) -> LexerResult<Option<()>> {
419         if self.next_char_if_in("eE") != None {
420             self.next_char_if_in("+-");
421             self.next_decimal_digits()?;
422             Ok(Some(()))
423         } else {
424             Ok(None)
425         }
426     }
427 
428     // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan"
next_float_lit(&mut self) -> LexerResult<()>429     fn next_float_lit(&mut self) -> LexerResult<()> {
430         assert_ne!(ParserLanguage::Json, self.language);
431 
432         // "inf" and "nan" are handled as part of ident
433         if self.next_char_if_eq('.') {
434             self.next_decimal_digits()?;
435             self.next_exponent_opt()?;
436         } else {
437             self.next_decimal_digits()?;
438             if self.next_char_if_eq('.') {
439                 self.next_decimal_digits()?;
440                 self.next_exponent_opt()?;
441             } else {
442                 if self.next_exponent_opt()? == None {
443                     return Err(LexerError::IncorrectFloatLit);
444                 }
445             }
446         }
447         Ok(())
448     }
449 
450     // String literals
451 
452     // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/
453     // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit
454     // https://github.com/google/protobuf/issues/4560
455     // octEscape = '\' octalDigit octalDigit octalDigit
456     // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
457     // quote = "'" | '"'
next_byte_value(&mut self) -> LexerResult<u8>458     pub fn next_byte_value(&mut self) -> LexerResult<u8> {
459         match self.next_char()? {
460             '\\' => {
461                 match self.next_char()? {
462                     '\'' => Ok(b'\''),
463                     '"' => Ok(b'"'),
464                     '\\' => Ok(b'\\'),
465                     'a' => Ok(b'\x07'),
466                     'b' => Ok(b'\x08'),
467                     'f' => Ok(b'\x0c'),
468                     'n' => Ok(b'\n'),
469                     'r' => Ok(b'\r'),
470                     't' => Ok(b'\t'),
471                     'v' => Ok(b'\x0b'),
472                     'x' => {
473                         let d1 = self.next_hex_digit()? as u8;
474                         let d2 = self.next_hex_digit()? as u8;
475                         Ok(((d1 << 4) | d2) as u8)
476                     }
477                     d if d >= '0' && d <= '7' => {
478                         let mut r = d as u8 - b'0';
479                         for _ in 0..2 {
480                             match self.next_octal_digit() {
481                                 Err(_) => break,
482                                 Ok(d) => r = (r << 3) + d as u8,
483                             }
484                         }
485                         Ok(r)
486                     }
487                     // https://github.com/google/protobuf/issues/4562
488                     // TODO: overflow
489                     c => Ok(c as u8),
490                 }
491             }
492             '\n' | '\0' => Err(LexerError::IncorrectInput),
493             // TODO: check overflow
494             c => Ok(c as u8),
495         }
496     }
497 
char_try_from(i: u32) -> LexerResult<char>498     fn char_try_from(i: u32) -> LexerResult<char> {
499         char::try_from(i).map_err(|_| LexerError::IncorrectUnicodeChar)
500     }
501 
next_json_char_value(&mut self) -> LexerResult<char>502     pub fn next_json_char_value(&mut self) -> LexerResult<char> {
503         match self.next_char()? {
504             '\\' => match self.next_char()? {
505                 '"' => Ok('"'),
506                 '\'' => Ok('\''),
507                 '\\' => Ok('\\'),
508                 '/' => Ok('/'),
509                 'b' => Ok('\x08'),
510                 'f' => Ok('\x0c'),
511                 'n' => Ok('\n'),
512                 'r' => Ok('\r'),
513                 't' => Ok('\t'),
514                 'u' => {
515                     let mut v = 0;
516                     for _ in 0..4 {
517                         let digit = self.next_hex_digit()?;
518                         v = v * 16 + digit;
519                     }
520                     Self::char_try_from(v)
521                 }
522                 _ => Err(LexerError::IncorrectJsonEscape),
523             },
524             c => Ok(c),
525         }
526     }
527 
528     // https://github.com/google/protobuf/issues/4564
529     // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' )
next_str_lit_raw(&mut self) -> LexerResult<String>530     fn next_str_lit_raw(&mut self) -> LexerResult<String> {
531         let mut raw = String::new();
532 
533         let mut first = true;
534         loop {
535             if !first {
536                 self.skip_ws()?;
537             }
538 
539             let start = self.pos;
540 
541             let q = match self.next_char_if_in("'\"") {
542                 Some(q) => q,
543                 None if !first => break,
544                 None => return Err(LexerError::IncorrectInput),
545             };
546             first = false;
547             while self.lookahead_char() != Some(q) {
548                 self.next_byte_value()?;
549             }
550             self.next_char_expect_eq(q)?;
551 
552             raw.push_str(&self.input[start + 1..self.pos - 1]);
553         }
554         Ok(raw)
555     }
556 
next_str_lit_raw_opt(&mut self) -> LexerResult<Option<String>>557     fn next_str_lit_raw_opt(&mut self) -> LexerResult<Option<String>> {
558         if self.lookahead_char_is_in("'\"") {
559             Ok(Some(self.next_str_lit_raw()?))
560         } else {
561             Ok(None)
562         }
563     }
564 
565     /// Parse next token as JSON number
next_json_number_opt(&mut self) -> LexerResult<Option<JsonNumberLit>>566     fn next_json_number_opt(&mut self) -> LexerResult<Option<JsonNumberLit>> {
567         assert_eq!(ParserLanguage::Json, self.language);
568 
569         fn is_digit(c: char) -> bool {
570             c >= '0' && c <= '9'
571         }
572 
573         fn is_digit_1_9(c: char) -> bool {
574             c >= '1' && c <= '9'
575         }
576 
577         if !self.lookahead_char_is_in("-0123456789") {
578             return Ok(None);
579         }
580 
581         let mut s = String::new();
582         if self.next_char_if_eq('-') {
583             s.push('-');
584         }
585 
586         if self.next_char_if_eq('0') {
587             s.push('0');
588         } else {
589             s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?);
590             while let Some(c) = self.next_char_if(is_digit) {
591                 s.push(c);
592             }
593         }
594 
595         if self.next_char_if_eq('.') {
596             s.push('.');
597             s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?);
598             while let Some(c) = self.next_char_if(is_digit) {
599                 s.push(c);
600             }
601         }
602 
603         if let Some(c) = self.next_char_if_in("eE") {
604             s.push(c);
605             if let Some(c) = self.next_char_if_in("+-") {
606                 s.push(c);
607             }
608             s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?);
609             while let Some(c) = self.next_char_if(is_digit) {
610                 s.push(c);
611             }
612         }
613 
614         Ok(Some(JsonNumberLit(s)))
615     }
616 
next_token_inner(&mut self) -> LexerResult<Token>617     fn next_token_inner(&mut self) -> LexerResult<Token> {
618         if self.language == ParserLanguage::Json {
619             if let Some(v) = self.next_json_number_opt()? {
620                 return Ok(Token::JsonNumber(v));
621             }
622         }
623 
624         if let Some(ident) = self.next_ident_opt()? {
625             let token = if self.language != ParserLanguage::Json && ident == float::PROTOBUF_NAN {
626                 Token::FloatLit(f64::NAN)
627             } else if self.language != ParserLanguage::Json && ident == float::PROTOBUF_INF {
628                 Token::FloatLit(f64::INFINITY)
629             } else {
630                 Token::Ident(ident.to_owned())
631             };
632             return Ok(token);
633         }
634 
635         if self.language != ParserLanguage::Json {
636             let mut clone = self.clone();
637             let pos = clone.pos;
638             if let Ok(_) = clone.next_float_lit() {
639                 let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?;
640                 *self = clone;
641                 return Ok(Token::FloatLit(f));
642             }
643 
644             if let Some(lit) = self.next_int_lit_opt()? {
645                 return Ok(Token::IntLit(lit));
646             }
647         }
648 
649         if let Some(escaped) = self.next_str_lit_raw_opt()? {
650             return Ok(Token::StrLit(StrLit { escaped }));
651         }
652 
653         // This branch must be after str lit
654         if let Some(c) = self.next_char_if(|c| c.is_ascii_punctuation()) {
655             return Ok(Token::Symbol(c));
656         }
657 
658         if let Some(ident) = self.next_ident_opt()? {
659             return Ok(Token::Ident(ident));
660         }
661 
662         Err(LexerError::IncorrectInput)
663     }
664 
next_token(&mut self) -> LexerResult<Option<TokenWithLocation>>665     pub fn next_token(&mut self) -> LexerResult<Option<TokenWithLocation>> {
666         self.skip_ws()?;
667         let loc = self.loc;
668 
669         Ok(if self.eof() {
670             None
671         } else {
672             let token = self.next_token_inner()?;
673             // Skip whitespace here to update location
674             // to the beginning of the next token
675             self.skip_ws()?;
676             Some(TokenWithLocation { token, loc })
677         })
678     }
679 }
680 
681 #[cfg(test)]
682 mod test {
683     use super::*;
684 
lex<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult<R>,685     fn lex<P, R>(input: &str, parse_what: P) -> R
686     where
687         P: FnOnce(&mut Lexer) -> LexerResult<R>,
688     {
689         let mut lexer = Lexer::new(input, ParserLanguage::Proto);
690         let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
691         assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
692         r
693     }
694 
lex_opt<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult<Option<R>>,695     fn lex_opt<P, R>(input: &str, parse_what: P) -> R
696     where
697         P: FnOnce(&mut Lexer) -> LexerResult<Option<R>>,
698     {
699         let mut lexer = Lexer::new(input, ParserLanguage::Proto);
700         let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
701         let r = o.expect(&format!("lexer returned none at {}", lexer.loc));
702         assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
703         r
704     }
705 
706     #[test]
test_lexer_int_lit()707     fn test_lexer_int_lit() {
708         let msg = r#"10"#;
709         let mess = lex_opt(msg, |p| p.next_int_lit_opt());
710         assert_eq!(10, mess);
711     }
712 
713     #[test]
test_lexer_float_lit()714     fn test_lexer_float_lit() {
715         let msg = r#"12.3"#;
716         let mess = lex(msg, |p| p.next_token_inner());
717         assert_eq!(Token::FloatLit(12.3), mess);
718     }
719 
720     #[test]
test_lexer_float_lit_leading_zeros_in_exp()721     fn test_lexer_float_lit_leading_zeros_in_exp() {
722         let msg = r#"1e00009"#;
723         let mess = lex(msg, |p| p.next_token_inner());
724         assert_eq!(Token::FloatLit(1_000_000_000.0), mess);
725     }
726 }
727