1 use std::f64;
2 use std::fmt;
3 use std::num::ParseIntError;
4 use std::str;
5 
6 use model::*;
7 use protobuf_codegen::float;
8 use str_lit::*;
9 
10 const FIRST_LINE: u32 = 1;
11 const FIRST_COL: u32 = 1;
12 
13 /// Location in file
14 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
15 pub struct Loc {
16     /// 1-based
17     pub line: u32,
18     /// 1-based
19     pub col: u32,
20 }
21 
22 impl fmt::Display for Loc {
23     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
24         write!(f, "{}:{}", self.line, self.col)
25     }
26 }
27 
28 impl Loc {
29     pub fn start() -> Loc {
30         Loc {
31             line: FIRST_LINE,
32             col: FIRST_COL,
33         }
34     }
35 }
36 
37 /// Basic information about parsing error.
38 #[derive(Debug)]
39 pub enum ParserError {
40     IncorrectInput,
41     IncorrectFloatLit,
42     NotUtf8,
43     ExpectChar(char),
44     ExpectConstant,
45     ExpectIdent,
46     ExpectHexDigit,
47     ExpectOctDigit,
48     ExpectDecDigit,
49     UnknownSyntax,
50     UnexpectedEof,
51     ParseIntError,
52     IntegerOverflow,
53     LabelNotAllowed,
54     LabelRequired,
55     InternalError,
56     StrLitDecodeError(StrLitDecodeError),
57     GroupNameShouldStartWithUpperCase,
58     MapFieldNotAllowed,
59 }
60 
61 #[derive(Debug)]
62 pub struct ParserErrorWithLocation {
63     pub error: ParserError,
64     /// 1-based
65     pub line: u32,
66     /// 1-based
67     pub col: u32,
68 }
69 
70 impl From<StrLitDecodeError> for ParserError {
71     fn from(e: StrLitDecodeError) -> Self {
72         ParserError::StrLitDecodeError(e)
73     }
74 }
75 
76 impl From<ParseIntError> for ParserError {
77     fn from(_: ParseIntError) -> Self {
78         ParserError::ParseIntError
79     }
80 }
81 
82 impl From<float::ProtobufFloatParseError> for ParserError {
83     fn from(_: float::ProtobufFloatParseError) -> Self {
84         ParserError::IncorrectFloatLit
85     }
86 }
87 
88 pub type ParserResult<T> = Result<T, ParserError>;
89 
90 trait ToU8 {
91     fn to_u8(&self) -> ParserResult<u8>;
92 }
93 
94 trait ToI32 {
95     fn to_i32(&self) -> ParserResult<i32>;
96 }
97 
98 trait ToI64 {
99     fn to_i64(&self) -> ParserResult<i64>;
100 }
101 
102 trait ToChar {
103     fn to_char(&self) -> ParserResult<char>;
104 }
105 
106 impl ToI32 for u64 {
107     fn to_i32(&self) -> ParserResult<i32> {
108         if *self <= i32::max_value() as u64 {
109             Ok(*self as i32)
110         } else {
111             Err(ParserError::IntegerOverflow)
112         }
113     }
114 }
115 
116 impl ToI32 for i64 {
117     fn to_i32(&self) -> ParserResult<i32> {
118         if *self <= i32::max_value() as i64 && *self >= i32::min_value() as i64 {
119             Ok(*self as i32)
120         } else {
121             Err(ParserError::IntegerOverflow)
122         }
123     }
124 }
125 
126 impl ToI64 for u64 {
127     fn to_i64(&self) -> Result<i64, ParserError> {
128         if *self <= i64::max_value() as u64 {
129             Ok(*self as i64)
130         } else {
131             Err(ParserError::IntegerOverflow)
132         }
133     }
134 }
135 
136 impl ToChar for u8 {
137     fn to_char(&self) -> Result<char, ParserError> {
138         if *self <= 0x7f {
139             Ok(*self as char)
140         } else {
141             Err(ParserError::NotUtf8)
142         }
143     }
144 }
145 
146 impl ToU8 for u32 {
147     fn to_u8(&self) -> Result<u8, ParserError> {
148         if *self as u8 as u32 == *self {
149             Ok(*self as u8)
150         } else {
151             Err(ParserError::IntegerOverflow)
152         }
153     }
154 }
155 
156 trait U64Extensions {
157     fn neg(&self) -> ParserResult<i64>;
158 }
159 
160 impl U64Extensions for u64 {
161     fn neg(&self) -> ParserResult<i64> {
162         if *self <= 0x7fff_ffff_ffff_ffff {
163             Ok(-(*self as i64))
164         } else if *self == 0x8000_0000_0000_0000 {
165             Ok(-0x8000_0000_0000_0000)
166         } else {
167             Err(ParserError::IntegerOverflow)
168         }
169     }
170 }
171 
172 #[derive(Clone, Debug, PartialEq)]
173 enum Token {
174     Ident(String),
175     Symbol(char),
176     IntLit(u64),
177     // including quotes
178     StrLit(StrLit),
179     FloatLit(f64),
180 }
181 
182 impl Token {
183     /// Back to original
184     fn format(&self) -> String {
185         match self {
186             &Token::Ident(ref s) => s.clone(),
187             &Token::Symbol(c) => c.to_string(),
188             &Token::IntLit(ref i) => i.to_string(),
189             &Token::StrLit(ref s) => s.quoted(),
190             &Token::FloatLit(ref f) => f.to_string(),
191         }
192     }
193 
194     fn to_num_lit(&self) -> ParserResult<NumLit> {
195         match self {
196             &Token::IntLit(i) => Ok(NumLit::U64(i)),
197             &Token::FloatLit(f) => Ok(NumLit::F64(f)),
198             _ => Err(ParserError::IncorrectInput),
199         }
200     }
201 }
202 
203 #[derive(Clone)]
204 struct TokenWithLocation {
205     token: Token,
206     loc: Loc,
207 }
208 
209 #[derive(Copy, Clone)]
210 pub struct Lexer<'a> {
211     pub input: &'a str,
212     pub pos: usize,
213     pub loc: Loc,
214 }
215 
216 fn is_letter(c: char) -> bool {
217     c.is_alphabetic() || c == '_'
218 }
219 
220 impl<'a> Lexer<'a> {
221     /// No more chars
222     pub fn eof(&self) -> bool {
223         self.pos == self.input.len()
224     }
225 
226     /// Remaining chars
227     fn rem_chars(&self) -> &'a str {
228         &self.input[self.pos..]
229     }
230 
231     fn lookahead_char_is_in(&self, alphabet: &str) -> bool {
232         self.lookahead_char()
233             .map_or(false, |c| alphabet.contains(c))
234     }
235 
236     fn next_char_opt(&mut self) -> Option<char> {
237         let rem = self.rem_chars();
238         if rem.is_empty() {
239             None
240         } else {
241             let mut char_indices = rem.char_indices();
242             let (_, c) = char_indices.next().unwrap();
243             let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len());
244             self.pos += c_len;
245             if c == '\n' {
246                 self.loc.line += 1;
247                 self.loc.col = FIRST_COL;
248             } else {
249                 self.loc.col += 1;
250             }
251             Some(c)
252         }
253     }
254 
255     fn next_char(&mut self) -> ParserResult<char> {
256         self.next_char_opt().ok_or(ParserError::UnexpectedEof)
257     }
258 
259     /// Skip whitespaces
260     fn skip_whitespaces(&mut self) {
261         self.take_while(|c| c.is_whitespace());
262     }
263 
264     fn skip_comment(&mut self) -> ParserResult<()> {
265         if self.skip_if_lookahead_is_str("/*") {
266             let end = "*/";
267             match self.rem_chars().find(end) {
268                 None => Err(ParserError::UnexpectedEof),
269                 Some(len) => {
270                     let new_pos = self.pos + len + end.len();
271                     self.skip_to_pos(new_pos);
272                     Ok(())
273                 }
274             }
275         } else {
276             Ok(())
277         }
278     }
279 
280     fn skip_block_comment(&mut self) {
281         if self.skip_if_lookahead_is_str("//") {
282             loop {
283                 match self.next_char_opt() {
284                     Some('\n') | None => break,
285                     _ => {}
286                 }
287             }
288         }
289     }
290 
291     fn skip_ws(&mut self) -> ParserResult<()> {
292         loop {
293             let pos = self.pos;
294             self.skip_whitespaces();
295             self.skip_comment()?;
296             self.skip_block_comment();
297             if pos == self.pos {
298                 // Did not advance
299                 return Ok(());
300             }
301         }
302     }
303 
304     fn take_while<F>(&mut self, f: F) -> &'a str
305     where
306         F: Fn(char) -> bool,
307     {
308         let start = self.pos;
309         while self.lookahead_char().map(&f) == Some(true) {
310             self.next_char_opt().unwrap();
311         }
312         let end = self.pos;
313         &self.input[start..end]
314     }
315 
316     fn lookahead_char(&self) -> Option<char> {
317         self.clone().next_char_opt()
318     }
319 
320     fn lookahead_is_str(&self, s: &str) -> bool {
321         self.rem_chars().starts_with(s)
322     }
323 
324     fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool {
325         if self.lookahead_is_str(s) {
326             let new_pos = self.pos + s.len();
327             self.skip_to_pos(new_pos);
328             true
329         } else {
330             false
331         }
332     }
333 
334     fn next_char_if<P>(&mut self, p: P) -> Option<char>
335     where
336         P: FnOnce(char) -> bool,
337     {
338         let mut clone = self.clone();
339         match clone.next_char_opt() {
340             Some(c) if p(c) => {
341                 *self = clone;
342                 Some(c)
343             }
344             _ => None,
345         }
346     }
347 
348     fn next_char_if_eq(&mut self, expect: char) -> bool {
349         self.next_char_if(|c| c == expect) != None
350     }
351 
352     fn next_char_if_in(&mut self, alphabet: &str) -> Option<char> {
353         for c in alphabet.chars() {
354             if self.next_char_if_eq(c) {
355                 return Some(c);
356             }
357         }
358         None
359     }
360 
361     fn next_char_expect_eq(&mut self, expect: char) -> ParserResult<()> {
362         if self.next_char_if_eq(expect) {
363             Ok(())
364         } else {
365             Err(ParserError::ExpectChar(expect))
366         }
367     }
368 
369     // str functions
370 
371     /// properly update line and column
372     fn skip_to_pos(&mut self, new_pos: usize) -> &'a str {
373         assert!(new_pos >= self.pos);
374         assert!(new_pos <= self.input.len());
375         let pos = self.pos;
376         while self.pos != new_pos {
377             self.next_char_opt().unwrap();
378         }
379         &self.input[pos..new_pos]
380     }
381 
382     // Protobuf grammar
383 
384     // char functions
385 
386     // letter = "A" … "Z" | "a" … "z"
387     // https://github.com/google/protobuf/issues/4565
388     fn next_letter_opt(&mut self) -> Option<char> {
389         self.next_char_if(is_letter)
390     }
391 
392     // capitalLetter =  "A" … "Z"
393     fn _next_capital_letter_opt(&mut self) -> Option<char> {
394         self.next_char_if(|c| c >= 'A' && c <= 'Z')
395     }
396 
397     fn is_ascii_alphanumeric(c: char) -> bool {
398         (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')
399     }
400 
401     fn next_ident_part(&mut self) -> Option<char> {
402         self.next_char_if(|c| Lexer::is_ascii_alphanumeric(c) || c == '_')
403     }
404 
405     // Identifiers
406 
407     // ident = letter { letter | decimalDigit | "_" }
408     fn next_ident_opt(&mut self) -> ParserResult<Option<String>> {
409         if let Some(c) = self.next_letter_opt() {
410             let mut ident = String::new();
411             ident.push(c);
412             while let Some(c) = self.next_ident_part() {
413                 ident.push(c);
414             }
415             Ok(Some(ident))
416         } else {
417             Ok(None)
418         }
419     }
420 
421     // Integer literals
422 
423     fn is_ascii_hexdigit(c: char) -> bool {
424         (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
425     }
426 
427     // hexLit     = "0" ( "x" | "X" ) hexDigit { hexDigit }
428     fn next_hex_lit(&mut self) -> ParserResult<Option<u64>> {
429         Ok(
430             if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") {
431                 let s = self.take_while(Lexer::is_ascii_hexdigit);
432                 Some(u64::from_str_radix(s, 16)? as u64)
433             } else {
434                 None
435             },
436         )
437     }
438 
439     fn is_ascii_digit(c: char) -> bool {
440         c >= '0' && c <= '9'
441     }
442 
443     // decimalLit = ( "1" … "9" ) { decimalDigit }
444     // octalLit   = "0" { octalDigit }
445     fn next_decimal_octal_lit(&mut self) -> ParserResult<Option<u64>> {
446         // do not advance on number parse error
447         let mut clone = self.clone();
448 
449         let pos = clone.pos;
450 
451         Ok(if clone.next_char_if(Lexer::is_ascii_digit) != None {
452             clone.take_while(Lexer::is_ascii_digit);
453             let value = clone.input[pos..clone.pos].parse()?;
454             *self = clone;
455             Some(value)
456         } else {
457             None
458         })
459     }
460 
461     // hexDigit     = "0" … "9" | "A" … "F" | "a" … "f"
462     fn next_hex_digit(&mut self) -> ParserResult<u32> {
463         let mut clone = self.clone();
464         let r = match clone.next_char()? {
465             c if c >= '0' && c <= '9' => c as u32 - b'0' as u32,
466             c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10,
467             c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10,
468             _ => return Err(ParserError::ExpectHexDigit),
469         };
470         *self = clone;
471         Ok(r)
472     }
473 
474     // octalDigit   = "0" … "7"
475     fn next_octal_digit(&mut self) -> ParserResult<u32> {
476         let mut clone = self.clone();
477         let r = match clone.next_char()? {
478             c if c >= '0' && c <= '7' => c as u32 - b'0' as u32,
479             _ => return Err(ParserError::ExpectOctDigit),
480         };
481         *self = clone;
482         Ok(r)
483     }
484 
485     // decimalDigit = "0" … "9"
486     fn next_decimal_digit(&mut self) -> ParserResult<u32> {
487         let mut clone = self.clone();
488         let r = match clone.next_char()? {
489             c if c >= '0' && c <= '9' => c as u32 - '0' as u32,
490             _ => return Err(ParserError::ExpectDecDigit),
491         };
492         *self = clone;
493         Ok(r)
494     }
495 
496     // decimals  = decimalDigit { decimalDigit }
497     fn next_decimal_digits(&mut self) -> ParserResult<()> {
498         self.next_decimal_digit()?;
499         self.take_while(|c| c >= '0' && c <= '9');
500         Ok(())
501     }
502 
503     // intLit     = decimalLit | octalLit | hexLit
504     fn next_int_lit_opt(&mut self) -> ParserResult<Option<u64>> {
505         self.skip_ws()?;
506         if let Some(i) = self.next_hex_lit()? {
507             return Ok(Some(i));
508         }
509         if let Some(i) = self.next_decimal_octal_lit()? {
510             return Ok(Some(i));
511         }
512         Ok(None)
513     }
514 
515     // Floating-point literals
516 
517     // exponent  = ( "e" | "E" ) [ "+" | "-" ] decimals
518     fn next_exponent_opt(&mut self) -> ParserResult<Option<()>> {
519         if self.next_char_if_in("eE") != None {
520             self.next_char_if_in("+-");
521             self.next_decimal_digits()?;
522             Ok(Some(()))
523         } else {
524             Ok(None)
525         }
526     }
527 
528     // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan"
529     fn next_float_lit(&mut self) -> ParserResult<()> {
530         // "inf" and "nan" are handled as part of ident
531         if self.next_char_if_eq('.') {
532             self.next_decimal_digits()?;
533             self.next_exponent_opt()?;
534         } else {
535             self.next_decimal_digits()?;
536             if self.next_char_if_eq('.') {
537                 self.next_decimal_digits()?;
538                 self.next_exponent_opt()?;
539             } else {
540                 if self.next_exponent_opt()? == None {
541                     return Err(ParserError::IncorrectFloatLit);
542                 }
543             }
544         }
545         Ok(())
546     }
547 
548     // String literals
549 
550     // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/
551     // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit
552     // https://github.com/google/protobuf/issues/4560
553     // octEscape = '\' octalDigit octalDigit octalDigit
554     // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
555     // quote = "'" | '"'
556     pub fn next_char_value(&mut self) -> ParserResult<char> {
557         match self.next_char()? {
558             '\\' => {
559                 match self.next_char()? {
560                     '\'' => Ok('\''),
561                     '"' => Ok('"'),
562                     '\\' => Ok('\\'),
563                     'a' => Ok('\x07'),
564                     'b' => Ok('\x08'),
565                     'f' => Ok('\x0c'),
566                     'n' => Ok('\n'),
567                     'r' => Ok('\r'),
568                     't' => Ok('\t'),
569                     'v' => Ok('\x0b'),
570                     'x' => {
571                         let d1 = self.next_hex_digit()? as u8;
572                         let d2 = self.next_hex_digit()? as u8;
573                         // TODO: do not decode as char if > 0x80
574                         Ok(((d1 << 4) | d2) as char)
575                     }
576                     d if d >= '0' && d <= '7' => {
577                         let mut r = d as u8 - b'0';
578                         for _ in 0..2 {
579                             match self.next_octal_digit() {
580                                 Err(_) => break,
581                                 Ok(d) => r = (r << 3) + d as u8,
582                             }
583                         }
584                         // TODO: do not decode as char if > 0x80
585                         Ok(r as char)
586                     }
587                     // https://github.com/google/protobuf/issues/4562
588                     c => Ok(c),
589                 }
590             }
591             '\n' | '\0' => Err(ParserError::IncorrectInput),
592             c => Ok(c),
593         }
594     }
595 
596     // https://github.com/google/protobuf/issues/4564
597     // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' )
598     fn next_str_lit_raw(&mut self) -> ParserResult<String> {
599         let mut raw = String::new();
600 
601         let mut first = true;
602         loop {
603             if !first {
604                 self.skip_ws()?;
605             }
606 
607             let start = self.pos;
608 
609             let q = match self.next_char_if_in("'\"") {
610                 Some(q) => q,
611                 None if !first => break,
612                 None => return Err(ParserError::IncorrectInput),
613             };
614             first = false;
615             while self.lookahead_char() != Some(q) {
616                 self.next_char_value()?;
617             }
618             self.next_char_expect_eq(q)?;
619 
620             raw.push_str(&self.input[start + 1..self.pos - 1]);
621         }
622         Ok(raw)
623     }
624 
625     fn next_str_lit_raw_opt(&mut self) -> ParserResult<Option<String>> {
626         if self.lookahead_char_is_in("'\"") {
627             Ok(Some(self.next_str_lit_raw()?))
628         } else {
629             Ok(None)
630         }
631     }
632 
633     fn is_ascii_punctuation(c: char) -> bool {
634         match c {
635             '.' | ',' | ':' | ';' | '/' | '\\' | '=' | '%' | '+' | '-' | '*' | '<' | '>' | '('
636             | ')' | '{' | '}' | '[' | ']' => true,
637             _ => false,
638         }
639     }
640 
641     fn next_token_inner(&mut self) -> ParserResult<Token> {
642         if let Some(ident) = self.next_ident_opt()? {
643             let token = if ident == float::PROTOBUF_NAN {
644                 Token::FloatLit(f64::NAN)
645             } else if ident == float::PROTOBUF_INF {
646                 Token::FloatLit(f64::INFINITY)
647             } else {
648                 Token::Ident(ident.to_owned())
649             };
650             return Ok(token);
651         }
652 
653         let mut clone = self.clone();
654         let pos = clone.pos;
655         if let Ok(_) = clone.next_float_lit() {
656             let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?;
657             *self = clone;
658             return Ok(Token::FloatLit(f));
659         }
660 
661         if let Some(lit) = self.next_int_lit_opt()? {
662             return Ok(Token::IntLit(lit));
663         }
664 
665         if let Some(escaped) = self.next_str_lit_raw_opt()? {
666             return Ok(Token::StrLit(StrLit { escaped }));
667         }
668 
669         // This branch must be after str lit
670         if let Some(c) = self.next_char_if(Lexer::is_ascii_punctuation) {
671             return Ok(Token::Symbol(c));
672         }
673 
674         if let Some(ident) = self.next_ident_opt()? {
675             return Ok(Token::Ident(ident));
676         }
677 
678         Err(ParserError::IncorrectInput)
679     }
680 
681     fn next_token(&mut self) -> ParserResult<Option<TokenWithLocation>> {
682         self.skip_ws()?;
683         let loc = self.loc;
684 
685         Ok(if self.eof() {
686             None
687         } else {
688             let token = self.next_token_inner()?;
689             // Skip whitespace here to update location
690             // to the beginning of the next token
691             self.skip_ws()?;
692             Some(TokenWithLocation { token, loc })
693         })
694     }
695 }
696 
697 #[derive(Clone)]
698 pub struct Parser<'a> {
699     lexer: Lexer<'a>,
700     syntax: Syntax,
701     next_token: Option<TokenWithLocation>,
702 }
703 
704 #[derive(Copy, Clone)]
705 enum MessageBodyParseMode {
706     MessageProto2,
707     MessageProto3,
708     Oneof,
709     ExtendProto2,
710     ExtendProto3,
711 }
712 
713 impl MessageBodyParseMode {
714     fn label_allowed(&self, label: Rule) -> bool {
715         match label {
716             Rule::Repeated => match *self {
717                 MessageBodyParseMode::MessageProto2
718                 | MessageBodyParseMode::MessageProto3
719                 | MessageBodyParseMode::ExtendProto2
720                 | MessageBodyParseMode::ExtendProto3 => true,
721                 MessageBodyParseMode::Oneof => false,
722             },
723             Rule::Optional | Rule::Required => match *self {
724                 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true,
725                 MessageBodyParseMode::MessageProto3
726                 | MessageBodyParseMode::ExtendProto3
727                 | MessageBodyParseMode::Oneof => false,
728             },
729         }
730     }
731 
732     fn some_label_required(&self) -> bool {
733         match *self {
734             MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true,
735             MessageBodyParseMode::MessageProto3
736             | MessageBodyParseMode::ExtendProto3
737             | MessageBodyParseMode::Oneof => false,
738         }
739     }
740 
741     fn map_allowed(&self) -> bool {
742         match *self {
743             MessageBodyParseMode::MessageProto2
744             | MessageBodyParseMode::MessageProto3
745             | MessageBodyParseMode::ExtendProto2
746             | MessageBodyParseMode::ExtendProto3 => true,
747             MessageBodyParseMode::Oneof => false,
748         }
749     }
750 
751     fn is_most_non_fields_allowed(&self) -> bool {
752         match *self {
753             MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::MessageProto3 => true,
754             MessageBodyParseMode::ExtendProto2
755             | MessageBodyParseMode::ExtendProto3
756             | MessageBodyParseMode::Oneof => false,
757         }
758     }
759 
760     fn is_option_allowed(&self) -> bool {
761         match *self {
762             MessageBodyParseMode::MessageProto2
763             | MessageBodyParseMode::MessageProto3
764             | MessageBodyParseMode::Oneof => true,
765             MessageBodyParseMode::ExtendProto2 | MessageBodyParseMode::ExtendProto3 => false,
766         }
767     }
768 }
769 
770 #[derive(Default)]
771 pub struct MessageBody {
772     pub fields: Vec<Field>,
773     pub oneofs: Vec<OneOf>,
774     pub reserved_nums: Vec<FieldNumberRange>,
775     pub reserved_names: Vec<String>,
776     pub messages: Vec<Message>,
777     pub enums: Vec<Enumeration>,
778     pub options: Vec<ProtobufOption>,
779 }
780 
781 #[derive(Copy, Clone)]
782 enum NumLit {
783     U64(u64),
784     F64(f64),
785 }
786 
787 impl NumLit {
788     fn to_option_value(&self, sign_is_plus: bool) -> ParserResult<ProtobufConstant> {
789         Ok(match (*self, sign_is_plus) {
790             (NumLit::U64(u), true) => ProtobufConstant::U64(u),
791             (NumLit::F64(f), true) => ProtobufConstant::F64(f),
792             (NumLit::U64(u), false) => ProtobufConstant::I64(u.neg()?),
793             (NumLit::F64(f), false) => ProtobufConstant::F64(-f),
794         })
795     }
796 }
797 
798 impl<'a> Parser<'a> {
799     pub fn new(input: &'a str) -> Parser<'a> {
800         Parser {
801             lexer: Lexer {
802                 input,
803                 pos: 0,
804                 loc: Loc::start(),
805             },
806             syntax: Syntax::Proto2,
807             next_token: None,
808         }
809     }
810 
811     pub fn loc(&self) -> Loc {
812         self.next_token.clone().map_or(self.lexer.loc, |n| n.loc)
813     }
814 
815     fn lookahead(&mut self) -> ParserResult<Option<&Token>> {
816         Ok(match self.next_token {
817             Some(ref token) => Some(&token.token),
818             None => {
819                 self.next_token = self.lexer.next_token()?;
820                 match self.next_token {
821                     Some(ref token) => Some(&token.token),
822                     None => None,
823                 }
824             }
825         })
826     }
827 
828     fn lookahead_some(&mut self) -> ParserResult<&Token> {
829         match self.lookahead()? {
830             Some(token) => Ok(token),
831             None => Err(ParserError::UnexpectedEof),
832         }
833     }
834 
835     fn next(&mut self) -> ParserResult<Option<Token>> {
836         self.lookahead()?;
837         Ok(self
838             .next_token
839             .take()
840             .map(|TokenWithLocation { token, .. }| token))
841     }
842 
843     fn next_some(&mut self) -> ParserResult<Token> {
844         match self.next()? {
845             Some(token) => Ok(token),
846             None => Err(ParserError::UnexpectedEof),
847         }
848     }
849 
850     /// Can be called only after lookahead, otherwise it's error
851     fn advance(&mut self) -> ParserResult<Token> {
852         self.next_token
853             .take()
854             .map(|TokenWithLocation { token, .. }| token)
855             .ok_or(ParserError::InternalError)
856     }
857 
858     /// No more tokens
859     fn syntax_eof(&mut self) -> ParserResult<bool> {
860         Ok(self.lookahead()?.is_none())
861     }
862 
863     fn next_token_if_map<P, R>(&mut self, p: P) -> ParserResult<Option<R>>
864     where
865         P: FnOnce(&Token) -> Option<R>,
866     {
867         self.lookahead()?;
868         let v = match self.next_token {
869             Some(ref token) => match p(&token.token) {
870                 Some(v) => v,
871                 None => return Ok(None),
872             },
873             _ => return Ok(None),
874         };
875         self.next_token = None;
876         Ok(Some(v))
877     }
878 
879     fn next_token_check_map<P, R>(&mut self, p: P) -> ParserResult<R>
880     where
881         P: FnOnce(&Token) -> ParserResult<R>,
882     {
883         self.lookahead()?;
884         let r = match self.next_token {
885             Some(ref token) => p(&token.token)?,
886             None => return Err(ParserError::UnexpectedEof),
887         };
888         self.next_token = None;
889         Ok(r)
890     }
891 
892     fn next_token_if<P>(&mut self, p: P) -> ParserResult<Option<Token>>
893     where
894         P: FnOnce(&Token) -> bool,
895     {
896         self.next_token_if_map(|token| if p(token) { Some(token.clone()) } else { None })
897     }
898 
899     fn next_ident_if_in(&mut self, idents: &[&str]) -> ParserResult<Option<String>> {
900         let v = match self.lookahead()? {
901             Some(&Token::Ident(ref next)) => {
902                 if idents.into_iter().find(|&i| i == next).is_some() {
903                     next.clone()
904                 } else {
905                     return Ok(None);
906                 }
907             }
908             _ => return Ok(None),
909         };
910         self.advance()?;
911         Ok(Some(v))
912     }
913 
914     fn next_ident_if_eq(&mut self, word: &str) -> ParserResult<bool> {
915         Ok(self.next_ident_if_in(&[word])? != None)
916     }
917 
918     fn next_ident_if_eq_error(&mut self, word: &str) -> ParserResult<()> {
919         if self.clone().next_ident_if_eq(word)? {
920             return Err(ParserError::IncorrectInput);
921         }
922         Ok(())
923     }
924 
925     fn next_symbol_if_eq(&mut self, symbol: char) -> ParserResult<bool> {
926         Ok(self.next_token_if(|token| match token {
927             &Token::Symbol(c) if c == symbol => true,
928             _ => false,
929         })? != None)
930     }
931 
932     fn next_symbol_expect_eq(&mut self, symbol: char) -> ParserResult<()> {
933         if self.lookahead_is_symbol(symbol)? {
934             self.advance()?;
935             Ok(())
936         } else {
937             Err(ParserError::ExpectChar(symbol))
938         }
939     }
940 
941     fn lookahead_if_symbol(&mut self) -> ParserResult<Option<char>> {
942         Ok(match self.lookahead()? {
943             Some(&Token::Symbol(c)) => Some(c),
944             _ => None,
945         })
946     }
947 
948     fn lookahead_is_symbol(&mut self, symbol: char) -> ParserResult<bool> {
949         Ok(self.lookahead_if_symbol()? == Some(symbol))
950     }
951 
952     // Protobuf grammar
953 
954     fn next_ident(&mut self) -> ParserResult<String> {
955         self.next_token_check_map(|token| match token {
956             &Token::Ident(ref ident) => Ok(ident.clone()),
957             _ => Err(ParserError::ExpectIdent),
958         })
959     }
960 
961     fn next_str_lit(&mut self) -> ParserResult<StrLit> {
962         self.next_token_check_map(|token| match token {
963             &Token::StrLit(ref str_lit) => Ok(str_lit.clone()),
964             _ => Err(ParserError::IncorrectInput),
965         })
966     }
967 
968     // fullIdent = ident { "." ident }
969     fn next_full_ident(&mut self) -> ParserResult<String> {
970         let mut full_ident = String::new();
971         // https://github.com/google/protobuf/issues/4563
972         if self.next_symbol_if_eq('.')? {
973             full_ident.push('.');
974         }
975         full_ident.push_str(&self.next_ident()?);
976         while self.next_symbol_if_eq('.')? {
977             full_ident.push('.');
978             full_ident.push_str(&self.next_ident()?);
979         }
980         Ok(full_ident)
981     }
982 
983     // messageName = ident
984     // enumName = ident
985     // messageType = [ "." ] { ident "." } messageName
986     // enumType = [ "." ] { ident "." } enumName
987     fn next_message_or_enum_type(&mut self) -> ParserResult<String> {
988         let mut full_name = String::new();
989         if self.next_symbol_if_eq('.')? {
990             full_name.push('.');
991         }
992         full_name.push_str(&self.next_ident()?);
993         while self.next_symbol_if_eq('.')? {
994             full_name.push('.');
995             full_name.push_str(&self.next_ident()?);
996         }
997         Ok(full_name)
998     }
999 
1000     fn is_ascii_uppercase(c: char) -> bool {
1001         c >= 'A' && c <= 'Z'
1002     }
1003 
1004     // groupName = capitalLetter { letter | decimalDigit | "_" }
1005     fn next_group_name(&mut self) -> ParserResult<String> {
1006         // lexer cannot distinguish between group name and other ident
1007         let mut clone = self.clone();
1008         let ident = clone.next_ident()?;
1009         if !Parser::is_ascii_uppercase(ident.chars().next().unwrap()) {
1010             return Err(ParserError::GroupNameShouldStartWithUpperCase);
1011         }
1012         *self = clone;
1013         Ok(ident)
1014     }
1015 
1016     // Boolean
1017 
1018     // boolLit = "true" | "false"
1019     fn next_bool_lit_opt(&mut self) -> ParserResult<Option<bool>> {
1020         Ok(if self.next_ident_if_eq("true")? {
1021             Some(true)
1022         } else if self.next_ident_if_eq("false")? {
1023             Some(false)
1024         } else {
1025             None
1026         })
1027     }
1028 
1029     // Constant
1030 
1031     fn next_num_lit(&mut self) -> ParserResult<NumLit> {
1032         self.next_token_check_map(|token| token.to_num_lit())
1033     }
1034 
1035     // constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
1036     //            strLit | boolLit
1037     fn next_constant(&mut self) -> ParserResult<ProtobufConstant> {
1038         // https://github.com/google/protobuf/blob/a21f225824e994ebd35e8447382ea4e0cd165b3c/src/google/protobuf/unittest_custom_options.proto#L350
1039         if self.lookahead_is_symbol('{')? {
1040             return Ok(ProtobufConstant::BracedExpr(self.next_braces()?));
1041         }
1042 
1043         if let Some(b) = self.next_bool_lit_opt()? {
1044             return Ok(ProtobufConstant::Bool(b));
1045         }
1046 
1047         if let &Token::Symbol(c) = self.lookahead_some()? {
1048             if c == '+' || c == '-' {
1049                 self.advance()?;
1050                 let sign = c == '+';
1051                 return Ok(self.next_num_lit()?.to_option_value(sign)?);
1052             }
1053         }
1054 
1055         if let Some(r) = self.next_token_if_map(|token| match token {
1056             &Token::StrLit(ref s) => Some(ProtobufConstant::String(s.clone())),
1057             _ => None,
1058         })? {
1059             return Ok(r);
1060         }
1061 
1062         match self.lookahead_some()? {
1063             &Token::IntLit(..) | &Token::FloatLit(..) => {
1064                 return self.next_num_lit()?.to_option_value(true);
1065             }
1066             &Token::Ident(..) => {
1067                 return Ok(ProtobufConstant::Ident(self.next_full_ident()?));
1068             }
1069             _ => {}
1070         }
1071 
1072         Err(ParserError::ExpectConstant)
1073     }
1074 
1075     fn next_int_lit(&mut self) -> ParserResult<u64> {
1076         self.next_token_check_map(|token| match token {
1077             &Token::IntLit(i) => Ok(i),
1078             _ => Err(ParserError::IncorrectInput),
1079         })
1080     }
1081 
1082     // Syntax
1083 
1084     // syntax = "syntax" "=" quote "proto2" quote ";"
1085     // syntax = "syntax" "=" quote "proto3" quote ";"
1086     fn next_syntax(&mut self) -> ParserResult<Option<Syntax>> {
1087         if self.next_ident_if_eq("syntax")? {
1088             self.next_symbol_expect_eq('=')?;
1089             let syntax_str = self.next_str_lit()?.decode_utf8()?;
1090             let syntax = if syntax_str == "proto2" {
1091                 Syntax::Proto2
1092             } else if syntax_str == "proto3" {
1093                 Syntax::Proto3
1094             } else {
1095                 return Err(ParserError::UnknownSyntax);
1096             };
1097             self.next_symbol_expect_eq(';')?;
1098             Ok(Some(syntax))
1099         } else {
1100             Ok(None)
1101         }
1102     }
1103 
1104     // Import Statement
1105 
1106     // import = "import" [ "weak" | "public" ] strLit ";"
1107     fn next_import_opt(&mut self) -> ParserResult<Option<String>> {
1108         if self.next_ident_if_eq("import")? {
1109             self.next_ident_if_in(&["weak", "public"])?;
1110             let import_path = self.next_str_lit()?.decode_utf8()?;
1111             self.next_symbol_expect_eq(';')?;
1112             Ok(Some(import_path))
1113         } else {
1114             Ok(None)
1115         }
1116     }
1117 
1118     // Package
1119 
1120     // package = "package" fullIdent ";"
1121     fn next_package_opt(&mut self) -> ParserResult<Option<String>> {
1122         if self.next_ident_if_eq("package")? {
1123             let package = self.next_full_ident()?;
1124             self.next_symbol_expect_eq(';')?;
1125             Ok(Some(package))
1126         } else {
1127             Ok(None)
1128         }
1129     }
1130 
1131     // Option
1132 
1133     fn next_ident_or_braced(&mut self) -> ParserResult<String> {
1134         let mut ident_or_braced = String::new();
1135         if self.next_symbol_if_eq('(')? {
1136             ident_or_braced.push('(');
1137             ident_or_braced.push_str(&self.next_full_ident()?);
1138             self.next_symbol_expect_eq(')')?;
1139             ident_or_braced.push(')');
1140         } else {
1141             ident_or_braced.push_str(&self.next_ident()?);
1142         }
1143         Ok(ident_or_braced)
1144     }
1145 
1146     // https://github.com/google/protobuf/issues/4563
1147     // optionName = ( ident | "(" fullIdent ")" ) { "." ident }
1148     fn next_option_name(&mut self) -> ParserResult<String> {
1149         let mut option_name = String::new();
1150         option_name.push_str(&self.next_ident_or_braced()?);
1151         while self.next_symbol_if_eq('.')? {
1152             option_name.push('.');
1153             option_name.push_str(&self.next_ident_or_braced()?);
1154         }
1155         Ok(option_name)
1156     }
1157 
1158     // option = "option" optionName  "=" constant ";"
1159     fn next_option_opt(&mut self) -> ParserResult<Option<ProtobufOption>> {
1160         if self.next_ident_if_eq("option")? {
1161             let name = self.next_option_name()?;
1162             self.next_symbol_expect_eq('=')?;
1163             let value = self.next_constant()?;
1164             self.next_symbol_expect_eq(';')?;
1165             Ok(Some(ProtobufOption { name, value }))
1166         } else {
1167             Ok(None)
1168         }
1169     }
1170 
1171     // Fields
1172 
1173     // label = "required" | "optional" | "repeated"
1174     fn next_label(&mut self, mode: MessageBodyParseMode) -> ParserResult<Rule> {
1175         let map = &[
1176             ("optional", Rule::Optional),
1177             ("required", Rule::Required),
1178             ("repeated", Rule::Repeated),
1179         ];
1180         for &(name, value) in map {
1181             let mut clone = self.clone();
1182             if clone.next_ident_if_eq(name)? {
1183                 if !mode.label_allowed(value) {
1184                     return Err(ParserError::LabelNotAllowed);
1185                 }
1186 
1187                 *self = clone;
1188                 return Ok(value);
1189             }
1190         }
1191 
1192         if mode.some_label_required() {
1193             Err(ParserError::LabelRequired)
1194         } else {
1195             Ok(Rule::Optional)
1196         }
1197     }
1198 
1199     fn next_field_type(&mut self) -> ParserResult<FieldType> {
1200         let simple = &[
1201             ("int32", FieldType::Int32),
1202             ("int64", FieldType::Int64),
1203             ("uint32", FieldType::Uint32),
1204             ("uint64", FieldType::Uint64),
1205             ("sint32", FieldType::Sint32),
1206             ("sint64", FieldType::Sint64),
1207             ("fixed32", FieldType::Fixed32),
1208             ("sfixed32", FieldType::Sfixed32),
1209             ("fixed64", FieldType::Fixed64),
1210             ("sfixed64", FieldType::Sfixed64),
1211             ("bool", FieldType::Bool),
1212             ("string", FieldType::String),
1213             ("bytes", FieldType::Bytes),
1214             ("float", FieldType::Float),
1215             ("double", FieldType::Double),
1216         ];
1217         for &(ref n, ref t) in simple {
1218             if self.next_ident_if_eq(n)? {
1219                 return Ok(t.clone());
1220             }
1221         }
1222 
1223         if let Some(t) = self.next_map_field_type_opt()? {
1224             return Ok(t);
1225         }
1226 
1227         let message_or_enum = self.next_message_or_enum_type()?;
1228         Ok(FieldType::MessageOrEnum(message_or_enum))
1229     }
1230 
1231     fn next_field_number(&mut self) -> ParserResult<i32> {
1232         self.next_token_check_map(|token| match token {
1233             &Token::IntLit(i) => i.to_i32(),
1234             _ => Err(ParserError::IncorrectInput),
1235         })
1236     }
1237 
1238     // fieldOption = optionName "=" constant
1239     fn next_field_option(&mut self) -> ParserResult<ProtobufOption> {
1240         let name = self.next_option_name()?;
1241         self.next_symbol_expect_eq('=')?;
1242         let value = self.next_constant()?;
1243         Ok(ProtobufOption { name, value })
1244     }
1245 
1246     // fieldOptions = fieldOption { ","  fieldOption }
1247     fn next_field_options(&mut self) -> ParserResult<Vec<ProtobufOption>> {
1248         let mut options = Vec::new();
1249 
1250         options.push(self.next_field_option()?);
1251 
1252         while self.next_symbol_if_eq(',')? {
1253             options.push(self.next_field_option()?);
1254         }
1255 
1256         Ok(options)
1257     }
1258 
1259     // field = label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
1260     // group = label "group" groupName "=" fieldNumber messageBody
1261     fn next_field(&mut self, mode: MessageBodyParseMode) -> ParserResult<Field> {
1262         let rule = if self.clone().next_ident_if_eq("map")? {
1263             if !mode.map_allowed() {
1264                 return Err(ParserError::MapFieldNotAllowed);
1265             }
1266             Rule::Optional
1267         } else {
1268             self.next_label(mode)?
1269         };
1270         if self.next_ident_if_eq("group")? {
1271             let name = self.next_group_name()?.to_owned();
1272             self.next_symbol_expect_eq('=')?;
1273             let number = self.next_field_number()?;
1274 
1275             let mode = match self.syntax {
1276                 Syntax::Proto2 => MessageBodyParseMode::MessageProto2,
1277                 Syntax::Proto3 => MessageBodyParseMode::MessageProto3,
1278             };
1279 
1280             let MessageBody { fields, .. } = self.next_message_body(mode)?;
1281 
1282             Ok(Field {
1283                 name,
1284                 rule,
1285                 typ: FieldType::Group(fields),
1286                 number,
1287                 options: Vec::new(),
1288             })
1289         } else {
1290             let typ = self.next_field_type()?;
1291             let name = self.next_ident()?.to_owned();
1292             self.next_symbol_expect_eq('=')?;
1293             let number = self.next_field_number()?;
1294 
1295             let mut options = Vec::new();
1296 
1297             if self.next_symbol_if_eq('[')? {
1298                 for o in self.next_field_options()? {
1299                     options.push(o);
1300                 }
1301                 self.next_symbol_expect_eq(']')?;
1302             }
1303             self.next_symbol_expect_eq(';')?;
1304             Ok(Field {
1305                 name,
1306                 rule,
1307                 typ,
1308                 number,
1309                 options,
1310             })
1311         }
1312     }
1313 
1314     // oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}"
1315     // oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
1316     fn next_oneof_opt(&mut self) -> ParserResult<Option<OneOf>> {
1317         if self.next_ident_if_eq("oneof")? {
1318             let name = self.next_ident()?.to_owned();
1319             let MessageBody { fields, .. } = self.next_message_body(MessageBodyParseMode::Oneof)?;
1320             Ok(Some(OneOf { name, fields }))
1321         } else {
1322             Ok(None)
1323         }
1324     }
1325 
1326     // mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
1327     // keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
1328     //           "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
1329     fn next_map_field_type_opt(&mut self) -> ParserResult<Option<FieldType>> {
1330         if self.next_ident_if_eq("map")? {
1331             self.next_symbol_expect_eq('<')?;
1332             // TODO: restrict key types
1333             let key = self.next_field_type()?;
1334             self.next_symbol_expect_eq(',')?;
1335             let value = self.next_field_type()?;
1336             self.next_symbol_expect_eq('>')?;
1337             Ok(Some(FieldType::Map(Box::new((key, value)))))
1338         } else {
1339             Ok(None)
1340         }
1341     }
1342 
1343     // Extensions and Reserved
1344 
1345     // Extensions
1346 
1347     // range =  intLit [ "to" ( intLit | "max" ) ]
1348     fn next_range(&mut self) -> ParserResult<FieldNumberRange> {
1349         let from = self.next_field_number()?;
1350         let to = if self.next_ident_if_eq("to")? {
1351             if self.next_ident_if_eq("max")? {
1352                 i32::max_value()
1353             } else {
1354                 self.next_field_number()?
1355             }
1356         } else {
1357             from
1358         };
1359         Ok(FieldNumberRange { from, to })
1360     }
1361 
1362     // ranges = range { "," range }
1363     fn next_ranges(&mut self) -> ParserResult<Vec<FieldNumberRange>> {
1364         let mut ranges = Vec::new();
1365         ranges.push(self.next_range()?);
1366         while self.next_symbol_if_eq(',')? {
1367             ranges.push(self.next_range()?);
1368         }
1369         Ok(ranges)
1370     }
1371 
1372     // extensions = "extensions" ranges ";"
1373     fn next_extensions_opt(&mut self) -> ParserResult<Option<Vec<FieldNumberRange>>> {
1374         if self.next_ident_if_eq("extensions")? {
1375             Ok(Some(self.next_ranges()?))
1376         } else {
1377             Ok(None)
1378         }
1379     }
1380 
1381     // Reserved
1382 
1383     // Grammar is incorrect: https://github.com/google/protobuf/issues/4558
1384     // reserved = "reserved" ( ranges | fieldNames ) ";"
1385     // fieldNames = fieldName { "," fieldName }
1386     fn next_reserved_opt(&mut self) -> ParserResult<Option<(Vec<FieldNumberRange>, Vec<String>)>> {
1387         if self.next_ident_if_eq("reserved")? {
1388             let (ranges, names) = if let &Token::StrLit(..) = self.lookahead_some()? {
1389                 let mut names = Vec::new();
1390                 names.push(self.next_str_lit()?.decode_utf8()?);
1391                 while self.next_symbol_if_eq(',')? {
1392                     names.push(self.next_str_lit()?.decode_utf8()?);
1393                 }
1394                 (Vec::new(), names)
1395             } else {
1396                 (self.next_ranges()?, Vec::new())
1397             };
1398 
1399             self.next_symbol_expect_eq(';')?;
1400 
1401             Ok(Some((ranges, names)))
1402         } else {
1403             Ok(None)
1404         }
1405     }
1406 
1407     // Top Level definitions
1408 
1409     // Enum definition
1410 
1411     // enumValueOption = optionName "=" constant
1412     fn next_enum_value_option(&mut self) -> ParserResult<()> {
1413         self.next_option_name()?;
1414         self.next_symbol_expect_eq('=')?;
1415         self.next_constant()?;
1416         Ok(())
1417     }
1418 
1419     // https://github.com/google/protobuf/issues/4561
1420     fn next_enum_value(&mut self) -> ParserResult<i32> {
1421         let minus = self.next_symbol_if_eq('-')?;
1422         let lit = self.next_int_lit()?;
1423         Ok(if minus {
1424             let unsigned = lit.to_i64()?;
1425             match unsigned.checked_neg() {
1426                 Some(neg) => neg.to_i32()?,
1427                 None => return Err(ParserError::IntegerOverflow),
1428             }
1429         } else {
1430             lit.to_i32()?
1431         })
1432     }
1433 
1434     // enumField = ident "=" intLit [ "[" enumValueOption { ","  enumValueOption } "]" ]";"
1435     fn next_enum_field(&mut self) -> ParserResult<EnumValue> {
1436         let name = self.next_ident()?.to_owned();
1437         self.next_symbol_expect_eq('=')?;
1438         let number = self.next_enum_value()?;
1439         if self.next_symbol_if_eq('[')? {
1440             self.next_enum_value_option()?;
1441             while self.next_symbol_if_eq(',')? {
1442                 self.next_enum_value_option()?;
1443             }
1444             self.next_symbol_expect_eq(']')?;
1445         }
1446 
1447         Ok(EnumValue { name, number })
1448     }
1449 
1450     // enum = "enum" enumName enumBody
1451     // enumBody = "{" { option | enumField | emptyStatement } "}"
1452     fn next_enum_opt(&mut self) -> ParserResult<Option<Enumeration>> {
1453         if self.next_ident_if_eq("enum")? {
1454             let name = self.next_ident()?.to_owned();
1455 
1456             let mut values = Vec::new();
1457             let mut options = Vec::new();
1458 
1459             self.next_symbol_expect_eq('{')?;
1460             while self.lookahead_if_symbol()? != Some('}') {
1461                 // emptyStatement
1462                 if self.next_symbol_if_eq(';')? {
1463                     continue;
1464                 }
1465 
1466                 if let Some(o) = self.next_option_opt()? {
1467                     options.push(o);
1468                     continue;
1469                 }
1470 
1471                 values.push(self.next_enum_field()?);
1472             }
1473             self.next_symbol_expect_eq('}')?;
1474             Ok(Some(Enumeration {
1475                 name,
1476                 values,
1477                 options,
1478             }))
1479         } else {
1480             Ok(None)
1481         }
1482     }
1483 
1484     // Message definition
1485 
1486     // messageBody = "{" { field | enum | message | extend | extensions | group |
1487     //               option | oneof | mapField | reserved | emptyStatement } "}"
1488     fn next_message_body(&mut self, mode: MessageBodyParseMode) -> ParserResult<MessageBody> {
1489         self.next_symbol_expect_eq('{')?;
1490 
1491         let mut r = MessageBody::default();
1492 
1493         while self.lookahead_if_symbol()? != Some('}') {
1494             // emptyStatement
1495             if self.next_symbol_if_eq(';')? {
1496                 continue;
1497             }
1498 
1499             if mode.is_most_non_fields_allowed() {
1500                 if let Some((field_nums, field_names)) = self.next_reserved_opt()? {
1501                     r.reserved_nums.extend(field_nums);
1502                     r.reserved_names.extend(field_names);
1503                     continue;
1504                 }
1505 
1506                 if let Some(oneof) = self.next_oneof_opt()? {
1507                     r.oneofs.push(oneof);
1508                     continue;
1509                 }
1510 
1511                 if let Some(_extensions) = self.next_extensions_opt()? {
1512                     continue;
1513                 }
1514 
1515                 if let Some(_extend) = self.next_extend_opt()? {
1516                     continue;
1517                 }
1518 
1519                 if let Some(nested_message) = self.next_message_opt()? {
1520                     r.messages.push(nested_message);
1521                     continue;
1522                 }
1523 
1524                 if let Some(nested_enum) = self.next_enum_opt()? {
1525                     r.enums.push(nested_enum);
1526                     continue;
1527                 }
1528             } else {
1529                 self.next_ident_if_eq_error("reserved")?;
1530                 self.next_ident_if_eq_error("oneof")?;
1531                 self.next_ident_if_eq_error("extensions")?;
1532                 self.next_ident_if_eq_error("extend")?;
1533                 self.next_ident_if_eq_error("message")?;
1534                 self.next_ident_if_eq_error("enum")?;
1535             }
1536 
1537             if mode.is_option_allowed() {
1538                 if let Some(option) = self.next_option_opt()? {
1539                     r.options.push(option);
1540                     continue;
1541                 }
1542             } else {
1543                 self.next_ident_if_eq_error("option")?;
1544             }
1545 
1546             r.fields.push(self.next_field(mode)?);
1547         }
1548 
1549         self.next_symbol_expect_eq('}')?;
1550 
1551         Ok(r)
1552     }
1553 
1554     // message = "message" messageName messageBody
1555     fn next_message_opt(&mut self) -> ParserResult<Option<Message>> {
1556         if self.next_ident_if_eq("message")? {
1557             let name = self.next_ident()?.to_owned();
1558 
1559             let mode = match self.syntax {
1560                 Syntax::Proto2 => MessageBodyParseMode::MessageProto2,
1561                 Syntax::Proto3 => MessageBodyParseMode::MessageProto3,
1562             };
1563 
1564             let MessageBody {
1565                 fields,
1566                 oneofs,
1567                 reserved_nums,
1568                 reserved_names,
1569                 messages,
1570                 enums,
1571                 options,
1572             } = self.next_message_body(mode)?;
1573 
1574             Ok(Some(Message {
1575                 name,
1576                 fields,
1577                 oneofs,
1578                 reserved_nums,
1579                 reserved_names,
1580                 messages,
1581                 enums,
1582                 options,
1583             }))
1584         } else {
1585             Ok(None)
1586         }
1587     }
1588 
1589     // Extend
1590 
1591     // extend = "extend" messageType "{" {field | group | emptyStatement} "}"
1592     fn next_extend_opt(&mut self) -> ParserResult<Option<Vec<Extension>>> {
1593         let mut clone = self.clone();
1594         if clone.next_ident_if_eq("extend")? {
1595             // According to spec `extend` is only for `proto2`, but it is used in `proto3`
1596             // https://github.com/google/protobuf/issues/4610
1597 
1598             *self = clone;
1599 
1600             let extendee = self.next_message_or_enum_type()?;
1601 
1602             let mode = match self.syntax {
1603                 Syntax::Proto2 => MessageBodyParseMode::ExtendProto2,
1604                 Syntax::Proto3 => MessageBodyParseMode::ExtendProto3,
1605             };
1606 
1607             let MessageBody { fields, .. } = self.next_message_body(mode)?;
1608 
1609             let extensions = fields
1610                 .into_iter()
1611                 .map(|field| {
1612                     let extendee = extendee.clone();
1613                     Extension { extendee, field }
1614                 })
1615                 .collect();
1616 
1617             Ok(Some(extensions))
1618         } else {
1619             Ok(None)
1620         }
1621     }
1622 
1623     // Service definition
1624 
1625     fn next_braces(&mut self) -> ParserResult<String> {
1626         let mut r = String::new();
1627         self.next_symbol_expect_eq('{')?;
1628         r.push('{');
1629         loop {
1630             if self.lookahead_if_symbol()? == Some('{') {
1631                 r.push_str(&self.next_braces()?);
1632                 continue;
1633             }
1634             let next = self.next_some()?;
1635             r.push_str(&next.format());
1636             if let Token::Symbol('}') = next {
1637                 break;
1638             }
1639         }
1640         Ok(r)
1641     }
1642 
1643     // service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}"
1644     // rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ]
1645     //       messageType ")" (( "{" { option | emptyStatement } "}" ) | ";" )
1646     // stream = "stream" streamName "(" messageType "," messageType ")" (( "{"
1647     //        { option | emptyStatement } "}") | ";" )
1648     fn next_service_opt(&mut self) -> ParserResult<Option<()>> {
1649         if self.next_ident_if_eq("service")? {
1650             let _name = self.next_ident()?;
1651             self.next_braces()?;
1652             Ok(Some(()))
1653         } else {
1654             Ok(None)
1655         }
1656     }
1657 
1658     // Proto file
1659 
1660     // proto = syntax { import | package | option | topLevelDef | emptyStatement }
1661     // topLevelDef = message | enum | extend | service
1662     pub fn next_proto(&mut self) -> ParserResult<FileDescriptor> {
1663         let syntax = self.next_syntax()?.unwrap_or(Syntax::Proto2);
1664         self.syntax = syntax;
1665 
1666         let mut import_paths = Vec::new();
1667         let mut package = String::new();
1668         let mut messages = Vec::new();
1669         let mut enums = Vec::new();
1670         let mut extensions = Vec::new();
1671         let mut options = Vec::new();
1672 
1673         while !self.syntax_eof()? {
1674             if let Some(import_path) = self.next_import_opt()? {
1675                 import_paths.push(import_path);
1676                 continue;
1677             }
1678 
1679             if let Some(next_package) = self.next_package_opt()? {
1680                 package = next_package.to_owned();
1681                 continue;
1682             }
1683 
1684             if let Some(option) = self.next_option_opt()? {
1685                 options.push(option);
1686                 continue;
1687             }
1688 
1689             if let Some(message) = self.next_message_opt()? {
1690                 messages.push(message);
1691                 continue;
1692             }
1693 
1694             if let Some(enumeration) = self.next_enum_opt()? {
1695                 enums.push(enumeration);
1696                 continue;
1697             }
1698 
1699             if let Some(more_extensions) = self.next_extend_opt()? {
1700                 extensions.extend(more_extensions);
1701                 continue;
1702             }
1703 
1704             if let Some(_service) = self.next_service_opt()? {
1705                 continue;
1706             }
1707 
1708             if self.next_symbol_if_eq(';')? {
1709                 continue;
1710             }
1711 
1712             return Err(ParserError::IncorrectInput);
1713         }
1714 
1715         Ok(FileDescriptor {
1716             import_paths,
1717             package,
1718             syntax,
1719             messages,
1720             enums,
1721             extensions,
1722             options,
1723         })
1724     }
1725 }
1726 
1727 #[cfg(test)]
1728 mod test {
1729     use super::*;
1730 
1731     fn lex<P, R>(input: &str, parse_what: P) -> R
1732     where
1733         P: FnOnce(&mut Lexer) -> ParserResult<R>,
1734     {
1735         let mut lexer = Lexer {
1736             input,
1737             pos: 0,
1738             loc: Loc::start(),
1739         };
1740         let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
1741         assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
1742         r
1743     }
1744 
1745     fn lex_opt<P, R>(input: &str, parse_what: P) -> R
1746     where
1747         P: FnOnce(&mut Lexer) -> ParserResult<Option<R>>,
1748     {
1749         let mut lexer = Lexer {
1750             input,
1751             pos: 0,
1752             loc: Loc::start(),
1753         };
1754         let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
1755         let r = o.expect(&format!("lexer returned none at {}", lexer.loc));
1756         assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
1757         r
1758     }
1759 
1760     fn parse<P, R>(input: &str, parse_what: P) -> R
1761     where
1762         P: FnOnce(&mut Parser) -> ParserResult<R>,
1763     {
1764         let mut parser = Parser::new(input);
1765         let r = parse_what(&mut parser).expect(&format!("parse failed at {}", parser.loc()));
1766         let eof = parser
1767             .syntax_eof()
1768             .expect(&format!("check eof failed at {}", parser.loc()));
1769         assert!(eof, "{}", parser.loc());
1770         r
1771     }
1772 
1773     fn parse_opt<P, R>(input: &str, parse_what: P) -> R
1774     where
1775         P: FnOnce(&mut Parser) -> ParserResult<Option<R>>,
1776     {
1777         let mut parser = Parser::new(input);
1778         let o = parse_what(&mut parser).expect(&format!("parse failed at {}", parser.loc()));
1779         let r = o.expect(&format!("parser returned none at {}", parser.loc()));
1780         assert!(parser.syntax_eof().unwrap());
1781         r
1782     }
1783 
1784     #[test]
1785     fn test_lexer_int_lit() {
1786         let msg = r#"10"#;
1787         let mess = lex_opt(msg, |p| p.next_int_lit_opt());
1788         assert_eq!(10, mess);
1789     }
1790 
1791     #[test]
1792     fn test_lexer_float_lit() {
1793         let msg = r#"12.3"#;
1794         let mess = lex(msg, |p| p.next_token_inner());
1795         assert_eq!(Token::FloatLit(12.3), mess);
1796     }
1797 
1798     #[test]
1799     fn test_ident() {
1800         let msg = r#"  aabb_c  "#;
1801         let mess = parse(msg, |p| p.next_ident().map(|s| s.to_owned()));
1802         assert_eq!("aabb_c", mess);
1803     }
1804 
1805     #[test]
1806     fn test_str_lit() {
1807         let msg = r#"  "a\nb"  "#;
1808         let mess = parse(msg, |p| p.next_str_lit());
1809         assert_eq!(
1810             StrLit {
1811                 escaped: r#"a\nb"#.to_owned()
1812             },
1813             mess
1814         );
1815     }
1816 
1817     #[test]
1818     fn test_syntax() {
1819         let msg = r#"  syntax = "proto3";  "#;
1820         let mess = parse_opt(msg, |p| p.next_syntax());
1821         assert_eq!(Syntax::Proto3, mess);
1822     }
1823 
1824     #[test]
1825     fn test_field_default_value_int() {
1826         let msg = r#"  optional int64 f = 4 [default = 12];  "#;
1827         let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2));
1828         assert_eq!("f", mess.name);
1829         assert_eq!("default", mess.options[0].name);
1830         assert_eq!("12", mess.options[0].value.format());
1831     }
1832 
1833     #[test]
1834     fn test_field_default_value_float() {
1835         let msg = r#"  optional float f = 2 [default = 10.0];  "#;
1836         let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2));
1837         assert_eq!("f", mess.name);
1838         assert_eq!("default", mess.options[0].name);
1839         assert_eq!("10.0", mess.options[0].value.format());
1840     }
1841 
1842     #[test]
1843     fn test_message() {
1844         let msg = r#"message ReferenceData
1845     {
1846         repeated ScenarioInfo  scenarioSet = 1;
1847         repeated CalculatedObjectInfo calculatedObjectSet = 2;
1848         repeated RiskFactorList riskFactorListSet = 3;
1849         repeated RiskMaturityInfo riskMaturitySet = 4;
1850         repeated IndicatorInfo indicatorSet = 5;
1851         repeated RiskStrikeInfo riskStrikeSet = 6;
1852         repeated FreeProjectionList freeProjectionListSet = 7;
1853         repeated ValidationProperty ValidationSet = 8;
1854         repeated CalcProperties calcPropertiesSet = 9;
1855         repeated MaturityInfo maturitySet = 10;
1856     }"#;
1857 
1858         let mess = parse_opt(msg, |p| p.next_message_opt());
1859         assert_eq!(10, mess.fields.len());
1860     }
1861 
1862     #[test]
1863     fn test_enum() {
1864         let msg = r#"enum PairingStatus {
1865                 DEALPAIRED        = 0;
1866                 INVENTORYORPHAN   = 1;
1867                 CALCULATEDORPHAN  = 2;
1868                 CANCELED          = 3;
1869     }"#;
1870 
1871         let enumeration = parse_opt(msg, |p| p.next_enum_opt());
1872         assert_eq!(4, enumeration.values.len());
1873     }
1874 
1875     #[test]
1876     fn test_ignore() {
1877         let msg = r#"option optimize_for = SPEED;"#;
1878 
1879         parse_opt(msg, |p| p.next_option_opt());
1880     }
1881 
1882     #[test]
1883     fn test_import() {
1884         let msg = r#"syntax = "proto3";
1885 
1886     import "test_import_nested_imported_pb.proto";
1887 
1888     message ContainsImportedNested {
1889         ContainerForNested.NestedMessage m = 1;
1890         ContainerForNested.NestedEnum e = 2;
1891     }
1892     "#;
1893         let desc = parse(msg, |p| p.next_proto());
1894 
1895         assert_eq!(
1896             vec!["test_import_nested_imported_pb.proto"],
1897             desc.import_paths
1898         );
1899     }
1900 
1901     #[test]
1902     fn test_package() {
1903         let msg = r#"
1904         package foo.bar;
1905 
1906     message ContainsImportedNested {
1907         optional ContainerForNested.NestedMessage m = 1;
1908         optional ContainerForNested.NestedEnum e = 2;
1909     }
1910     "#;
1911         let desc = parse(msg, |p| p.next_proto());
1912         assert_eq!("foo.bar".to_string(), desc.package);
1913     }
1914 
1915     #[test]
1916     fn test_nested_message() {
1917         let msg = r#"message A
1918     {
1919         message B {
1920             repeated int32 a = 1;
1921             optional string b = 2;
1922         }
1923         optional string b = 1;
1924     }"#;
1925 
1926         let mess = parse_opt(msg, |p| p.next_message_opt());
1927         assert_eq!(1, mess.messages.len());
1928     }
1929 
1930     #[test]
1931     fn test_map() {
1932         let msg = r#"message A
1933     {
1934         optional map<string, int32> b = 1;
1935     }"#;
1936 
1937         let mess = parse_opt(msg, |p| p.next_message_opt());
1938         assert_eq!(1, mess.fields.len());
1939         match mess.fields[0].typ {
1940             FieldType::Map(ref f) => match &**f {
1941                 &(FieldType::String, FieldType::Int32) => (),
1942                 ref f => panic!("Expecting Map<String, Int32> found {:?}", f),
1943             },
1944             ref f => panic!("Expecting map, got {:?}", f),
1945         }
1946     }
1947 
1948     #[test]
1949     fn test_oneof() {
1950         let msg = r#"message A
1951     {
1952         optional int32 a1 = 1;
1953         oneof a_oneof {
1954             string a2 = 2;
1955             int32 a3 = 3;
1956             bytes a4 = 4;
1957         }
1958         repeated bool a5 = 5;
1959     }"#;
1960 
1961         let mess = parse_opt(msg, |p| p.next_message_opt());
1962         assert_eq!(1, mess.oneofs.len());
1963         assert_eq!(3, mess.oneofs[0].fields.len());
1964     }
1965 
1966     #[test]
1967     fn test_reserved() {
1968         let msg = r#"message Sample {
1969        reserved 4, 15, 17 to 20, 30;
1970        reserved "foo", "bar";
1971        optional uint64 age =1;
1972        required bytes name =2;
1973     }"#;
1974 
1975         let mess = parse_opt(msg, |p| p.next_message_opt());
1976         assert_eq!(
1977             vec![
1978                 FieldNumberRange { from: 4, to: 4 },
1979                 FieldNumberRange { from: 15, to: 15 },
1980                 FieldNumberRange { from: 17, to: 20 },
1981                 FieldNumberRange { from: 30, to: 30 }
1982             ],
1983             mess.reserved_nums
1984         );
1985         assert_eq!(
1986             vec!["foo".to_string(), "bar".to_string()],
1987             mess.reserved_names
1988         );
1989         assert_eq!(2, mess.fields.len());
1990     }
1991 
1992     #[test]
1993     fn test_default_value_int() {
1994         let msg = r#"message Sample {
1995             optional int32 x = 1 [default = 17];
1996         }"#;
1997 
1998         let mess = parse_opt(msg, |p| p.next_message_opt());
1999         assert_eq!("default", mess.fields[0].options[0].name);
2000         assert_eq!("17", mess.fields[0].options[0].value.format());
2001     }
2002 
2003     #[test]
2004     fn test_default_value_string() {
2005         let msg = r#"message Sample {
2006             optional string x = 1 [default = "ab\nc d\"g\'h\0\"z"];
2007         }"#;
2008 
2009         let mess = parse_opt(msg, |p| p.next_message_opt());
2010         assert_eq!(
2011             r#""ab\nc d\"g\'h\0\"z""#,
2012             mess.fields[0].options[0].value.format()
2013         );
2014     }
2015 
2016     #[test]
2017     fn test_default_value_bytes() {
2018         let msg = r#"message Sample {
2019             optional bytes x = 1 [default = "ab\nc d\xfeE\"g\'h\0\"z"];
2020         }"#;
2021 
2022         let mess = parse_opt(msg, |p| p.next_message_opt());
2023         assert_eq!(
2024             r#""ab\nc d\xfeE\"g\'h\0\"z""#,
2025             mess.fields[0].options[0].value.format()
2026         );
2027     }
2028 
2029     #[test]
2030     fn test_group() {
2031         let msg = r#"message MessageWithGroup {
2032             optional string aaa = 1;
2033 
2034             repeated group Identifier = 18 {
2035                 optional int32 iii = 19;
2036                 optional string sss = 20;
2037             }
2038 
2039             required int bbb = 3;
2040         }"#;
2041         let mess = parse_opt(msg, |p| p.next_message_opt());
2042 
2043         assert_eq!("Identifier", mess.fields[1].name);
2044         if let FieldType::Group(ref group_fields) = mess.fields[1].typ {
2045             assert_eq!(2, group_fields.len());
2046         } else {
2047             panic!("expecting group");
2048         }
2049 
2050         assert_eq!("bbb", mess.fields[2].name);
2051     }
2052 
2053     #[test]
2054     fn test_incorrect_file_descriptor() {
2055         let msg = r#"
2056             message Foo {}
2057 
2058             dfgdg
2059         "#;
2060 
2061         let err = FileDescriptor::parse(msg).err().expect("err");
2062         assert_eq!(4, err.line);
2063     }
2064 
2065     #[test]
2066     fn test_extend() {
2067         let proto = r#"
2068             syntax = "proto2";
2069 
2070             extend google.protobuf.FileOptions {
2071                 optional bool foo = 17001;
2072                 optional string bar = 17002;
2073             }
2074 
2075             extend google.protobuf.MessageOptions {
2076                 optional bool baz = 17003;
2077             }
2078         "#;
2079 
2080         let fd = FileDescriptor::parse(proto).expect("fd");
2081         assert_eq!(3, fd.extensions.len());
2082         assert_eq!("google.protobuf.FileOptions", fd.extensions[0].extendee);
2083         assert_eq!("google.protobuf.FileOptions", fd.extensions[1].extendee);
2084         assert_eq!("google.protobuf.MessageOptions", fd.extensions[2].extendee);
2085         assert_eq!(17003, fd.extensions[2].field.number);
2086     }
2087 }
2088