1 use std::f64; 2 use std::fmt; 3 use std::num::ParseIntError; 4 use std::str; 5 6 use model::*; 7 use protobuf_codegen::float; 8 use str_lit::*; 9 10 const FIRST_LINE: u32 = 1; 11 const FIRST_COL: u32 = 1; 12 13 /// Location in file 14 #[derive(Copy, Clone, Debug, Eq, PartialEq)] 15 pub struct Loc { 16 /// 1-based 17 pub line: u32, 18 /// 1-based 19 pub col: u32, 20 } 21 22 impl fmt::Display for Loc { 23 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 24 write!(f, "{}:{}", self.line, self.col) 25 } 26 } 27 28 impl Loc { 29 pub fn start() -> Loc { 30 Loc { 31 line: FIRST_LINE, 32 col: FIRST_COL, 33 } 34 } 35 } 36 37 /// Basic information about parsing error. 38 #[derive(Debug)] 39 pub enum ParserError { 40 IncorrectInput, 41 IncorrectFloatLit, 42 NotUtf8, 43 ExpectChar(char), 44 ExpectConstant, 45 ExpectIdent, 46 ExpectHexDigit, 47 ExpectOctDigit, 48 ExpectDecDigit, 49 UnknownSyntax, 50 UnexpectedEof, 51 ParseIntError, 52 IntegerOverflow, 53 LabelNotAllowed, 54 LabelRequired, 55 InternalError, 56 StrLitDecodeError(StrLitDecodeError), 57 GroupNameShouldStartWithUpperCase, 58 MapFieldNotAllowed, 59 } 60 61 #[derive(Debug)] 62 pub struct ParserErrorWithLocation { 63 pub error: ParserError, 64 /// 1-based 65 pub line: u32, 66 /// 1-based 67 pub col: u32, 68 } 69 70 impl From<StrLitDecodeError> for ParserError { 71 fn from(e: StrLitDecodeError) -> Self { 72 ParserError::StrLitDecodeError(e) 73 } 74 } 75 76 impl From<ParseIntError> for ParserError { 77 fn from(_: ParseIntError) -> Self { 78 ParserError::ParseIntError 79 } 80 } 81 82 impl From<float::ProtobufFloatParseError> for ParserError { 83 fn from(_: float::ProtobufFloatParseError) -> Self { 84 ParserError::IncorrectFloatLit 85 } 86 } 87 88 pub type ParserResult<T> = Result<T, ParserError>; 89 90 trait ToU8 { 91 fn to_u8(&self) -> ParserResult<u8>; 92 } 93 94 trait ToI32 { 95 fn to_i32(&self) -> ParserResult<i32>; 96 } 97 98 trait ToI64 { 99 fn to_i64(&self) -> ParserResult<i64>; 100 } 101 102 trait ToChar { 103 fn to_char(&self) -> ParserResult<char>; 104 } 105 106 impl ToI32 for u64 { 107 fn to_i32(&self) -> ParserResult<i32> { 108 if *self <= i32::max_value() as u64 { 109 Ok(*self as i32) 110 } else { 111 Err(ParserError::IntegerOverflow) 112 } 113 } 114 } 115 116 impl ToI32 for i64 { 117 fn to_i32(&self) -> ParserResult<i32> { 118 if *self <= i32::max_value() as i64 && *self >= i32::min_value() as i64 { 119 Ok(*self as i32) 120 } else { 121 Err(ParserError::IntegerOverflow) 122 } 123 } 124 } 125 126 impl ToI64 for u64 { 127 fn to_i64(&self) -> Result<i64, ParserError> { 128 if *self <= i64::max_value() as u64 { 129 Ok(*self as i64) 130 } else { 131 Err(ParserError::IntegerOverflow) 132 } 133 } 134 } 135 136 impl ToChar for u8 { 137 fn to_char(&self) -> Result<char, ParserError> { 138 if *self <= 0x7f { 139 Ok(*self as char) 140 } else { 141 Err(ParserError::NotUtf8) 142 } 143 } 144 } 145 146 impl ToU8 for u32 { 147 fn to_u8(&self) -> Result<u8, ParserError> { 148 if *self as u8 as u32 == *self { 149 Ok(*self as u8) 150 } else { 151 Err(ParserError::IntegerOverflow) 152 } 153 } 154 } 155 156 trait U64Extensions { 157 fn neg(&self) -> ParserResult<i64>; 158 } 159 160 impl U64Extensions for u64 { 161 fn neg(&self) -> ParserResult<i64> { 162 if *self <= 0x7fff_ffff_ffff_ffff { 163 Ok(-(*self as i64)) 164 } else if *self == 0x8000_0000_0000_0000 { 165 Ok(-0x8000_0000_0000_0000) 166 } else { 167 Err(ParserError::IntegerOverflow) 168 } 169 } 170 } 171 172 #[derive(Clone, Debug, PartialEq)] 173 enum Token { 174 Ident(String), 175 Symbol(char), 176 IntLit(u64), 177 // including quotes 178 StrLit(StrLit), 179 FloatLit(f64), 180 } 181 182 impl Token { 183 /// Back to original 184 fn format(&self) -> String { 185 match self { 186 &Token::Ident(ref s) => s.clone(), 187 &Token::Symbol(c) => c.to_string(), 188 &Token::IntLit(ref i) => i.to_string(), 189 &Token::StrLit(ref s) => s.quoted(), 190 &Token::FloatLit(ref f) => f.to_string(), 191 } 192 } 193 194 fn to_num_lit(&self) -> ParserResult<NumLit> { 195 match self { 196 &Token::IntLit(i) => Ok(NumLit::U64(i)), 197 &Token::FloatLit(f) => Ok(NumLit::F64(f)), 198 _ => Err(ParserError::IncorrectInput), 199 } 200 } 201 } 202 203 #[derive(Clone)] 204 struct TokenWithLocation { 205 token: Token, 206 loc: Loc, 207 } 208 209 #[derive(Copy, Clone)] 210 pub struct Lexer<'a> { 211 pub input: &'a str, 212 pub pos: usize, 213 pub loc: Loc, 214 } 215 216 fn is_letter(c: char) -> bool { 217 c.is_alphabetic() || c == '_' 218 } 219 220 impl<'a> Lexer<'a> { 221 /// No more chars 222 pub fn eof(&self) -> bool { 223 self.pos == self.input.len() 224 } 225 226 /// Remaining chars 227 fn rem_chars(&self) -> &'a str { 228 &self.input[self.pos..] 229 } 230 231 fn lookahead_char_is_in(&self, alphabet: &str) -> bool { 232 self.lookahead_char() 233 .map_or(false, |c| alphabet.contains(c)) 234 } 235 236 fn next_char_opt(&mut self) -> Option<char> { 237 let rem = self.rem_chars(); 238 if rem.is_empty() { 239 None 240 } else { 241 let mut char_indices = rem.char_indices(); 242 let (_, c) = char_indices.next().unwrap(); 243 let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len()); 244 self.pos += c_len; 245 if c == '\n' { 246 self.loc.line += 1; 247 self.loc.col = FIRST_COL; 248 } else { 249 self.loc.col += 1; 250 } 251 Some(c) 252 } 253 } 254 255 fn next_char(&mut self) -> ParserResult<char> { 256 self.next_char_opt().ok_or(ParserError::UnexpectedEof) 257 } 258 259 /// Skip whitespaces 260 fn skip_whitespaces(&mut self) { 261 self.take_while(|c| c.is_whitespace()); 262 } 263 264 fn skip_comment(&mut self) -> ParserResult<()> { 265 if self.skip_if_lookahead_is_str("/*") { 266 let end = "*/"; 267 match self.rem_chars().find(end) { 268 None => Err(ParserError::UnexpectedEof), 269 Some(len) => { 270 let new_pos = self.pos + len + end.len(); 271 self.skip_to_pos(new_pos); 272 Ok(()) 273 } 274 } 275 } else { 276 Ok(()) 277 } 278 } 279 280 fn skip_block_comment(&mut self) { 281 if self.skip_if_lookahead_is_str("//") { 282 loop { 283 match self.next_char_opt() { 284 Some('\n') | None => break, 285 _ => {} 286 } 287 } 288 } 289 } 290 291 fn skip_ws(&mut self) -> ParserResult<()> { 292 loop { 293 let pos = self.pos; 294 self.skip_whitespaces(); 295 self.skip_comment()?; 296 self.skip_block_comment(); 297 if pos == self.pos { 298 // Did not advance 299 return Ok(()); 300 } 301 } 302 } 303 304 fn take_while<F>(&mut self, f: F) -> &'a str 305 where 306 F: Fn(char) -> bool, 307 { 308 let start = self.pos; 309 while self.lookahead_char().map(&f) == Some(true) { 310 self.next_char_opt().unwrap(); 311 } 312 let end = self.pos; 313 &self.input[start..end] 314 } 315 316 fn lookahead_char(&self) -> Option<char> { 317 self.clone().next_char_opt() 318 } 319 320 fn lookahead_is_str(&self, s: &str) -> bool { 321 self.rem_chars().starts_with(s) 322 } 323 324 fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool { 325 if self.lookahead_is_str(s) { 326 let new_pos = self.pos + s.len(); 327 self.skip_to_pos(new_pos); 328 true 329 } else { 330 false 331 } 332 } 333 334 fn next_char_if<P>(&mut self, p: P) -> Option<char> 335 where 336 P: FnOnce(char) -> bool, 337 { 338 let mut clone = self.clone(); 339 match clone.next_char_opt() { 340 Some(c) if p(c) => { 341 *self = clone; 342 Some(c) 343 } 344 _ => None, 345 } 346 } 347 348 fn next_char_if_eq(&mut self, expect: char) -> bool { 349 self.next_char_if(|c| c == expect) != None 350 } 351 352 fn next_char_if_in(&mut self, alphabet: &str) -> Option<char> { 353 for c in alphabet.chars() { 354 if self.next_char_if_eq(c) { 355 return Some(c); 356 } 357 } 358 None 359 } 360 361 fn next_char_expect_eq(&mut self, expect: char) -> ParserResult<()> { 362 if self.next_char_if_eq(expect) { 363 Ok(()) 364 } else { 365 Err(ParserError::ExpectChar(expect)) 366 } 367 } 368 369 // str functions 370 371 /// properly update line and column 372 fn skip_to_pos(&mut self, new_pos: usize) -> &'a str { 373 assert!(new_pos >= self.pos); 374 assert!(new_pos <= self.input.len()); 375 let pos = self.pos; 376 while self.pos != new_pos { 377 self.next_char_opt().unwrap(); 378 } 379 &self.input[pos..new_pos] 380 } 381 382 // Protobuf grammar 383 384 // char functions 385 386 // letter = "A" … "Z" | "a" … "z" 387 // https://github.com/google/protobuf/issues/4565 388 fn next_letter_opt(&mut self) -> Option<char> { 389 self.next_char_if(is_letter) 390 } 391 392 // capitalLetter = "A" … "Z" 393 fn _next_capital_letter_opt(&mut self) -> Option<char> { 394 self.next_char_if(|c| c >= 'A' && c <= 'Z') 395 } 396 397 fn is_ascii_alphanumeric(c: char) -> bool { 398 (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') 399 } 400 401 fn next_ident_part(&mut self) -> Option<char> { 402 self.next_char_if(|c| Lexer::is_ascii_alphanumeric(c) || c == '_') 403 } 404 405 // Identifiers 406 407 // ident = letter { letter | decimalDigit | "_" } 408 fn next_ident_opt(&mut self) -> ParserResult<Option<String>> { 409 if let Some(c) = self.next_letter_opt() { 410 let mut ident = String::new(); 411 ident.push(c); 412 while let Some(c) = self.next_ident_part() { 413 ident.push(c); 414 } 415 Ok(Some(ident)) 416 } else { 417 Ok(None) 418 } 419 } 420 421 // Integer literals 422 423 fn is_ascii_hexdigit(c: char) -> bool { 424 (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 425 } 426 427 // hexLit = "0" ( "x" | "X" ) hexDigit { hexDigit } 428 fn next_hex_lit(&mut self) -> ParserResult<Option<u64>> { 429 Ok( 430 if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") { 431 let s = self.take_while(Lexer::is_ascii_hexdigit); 432 Some(u64::from_str_radix(s, 16)? as u64) 433 } else { 434 None 435 }, 436 ) 437 } 438 439 fn is_ascii_digit(c: char) -> bool { 440 c >= '0' && c <= '9' 441 } 442 443 // decimalLit = ( "1" … "9" ) { decimalDigit } 444 // octalLit = "0" { octalDigit } 445 fn next_decimal_octal_lit(&mut self) -> ParserResult<Option<u64>> { 446 // do not advance on number parse error 447 let mut clone = self.clone(); 448 449 let pos = clone.pos; 450 451 Ok(if clone.next_char_if(Lexer::is_ascii_digit) != None { 452 clone.take_while(Lexer::is_ascii_digit); 453 let value = clone.input[pos..clone.pos].parse()?; 454 *self = clone; 455 Some(value) 456 } else { 457 None 458 }) 459 } 460 461 // hexDigit = "0" … "9" | "A" … "F" | "a" … "f" 462 fn next_hex_digit(&mut self) -> ParserResult<u32> { 463 let mut clone = self.clone(); 464 let r = match clone.next_char()? { 465 c if c >= '0' && c <= '9' => c as u32 - b'0' as u32, 466 c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10, 467 c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10, 468 _ => return Err(ParserError::ExpectHexDigit), 469 }; 470 *self = clone; 471 Ok(r) 472 } 473 474 // octalDigit = "0" … "7" 475 fn next_octal_digit(&mut self) -> ParserResult<u32> { 476 let mut clone = self.clone(); 477 let r = match clone.next_char()? { 478 c if c >= '0' && c <= '7' => c as u32 - b'0' as u32, 479 _ => return Err(ParserError::ExpectOctDigit), 480 }; 481 *self = clone; 482 Ok(r) 483 } 484 485 // decimalDigit = "0" … "9" 486 fn next_decimal_digit(&mut self) -> ParserResult<u32> { 487 let mut clone = self.clone(); 488 let r = match clone.next_char()? { 489 c if c >= '0' && c <= '9' => c as u32 - '0' as u32, 490 _ => return Err(ParserError::ExpectDecDigit), 491 }; 492 *self = clone; 493 Ok(r) 494 } 495 496 // decimals = decimalDigit { decimalDigit } 497 fn next_decimal_digits(&mut self) -> ParserResult<()> { 498 self.next_decimal_digit()?; 499 self.take_while(|c| c >= '0' && c <= '9'); 500 Ok(()) 501 } 502 503 // intLit = decimalLit | octalLit | hexLit 504 fn next_int_lit_opt(&mut self) -> ParserResult<Option<u64>> { 505 self.skip_ws()?; 506 if let Some(i) = self.next_hex_lit()? { 507 return Ok(Some(i)); 508 } 509 if let Some(i) = self.next_decimal_octal_lit()? { 510 return Ok(Some(i)); 511 } 512 Ok(None) 513 } 514 515 // Floating-point literals 516 517 // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals 518 fn next_exponent_opt(&mut self) -> ParserResult<Option<()>> { 519 if self.next_char_if_in("eE") != None { 520 self.next_char_if_in("+-"); 521 self.next_decimal_digits()?; 522 Ok(Some(())) 523 } else { 524 Ok(None) 525 } 526 } 527 528 // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan" 529 fn next_float_lit(&mut self) -> ParserResult<()> { 530 // "inf" and "nan" are handled as part of ident 531 if self.next_char_if_eq('.') { 532 self.next_decimal_digits()?; 533 self.next_exponent_opt()?; 534 } else { 535 self.next_decimal_digits()?; 536 if self.next_char_if_eq('.') { 537 self.next_decimal_digits()?; 538 self.next_exponent_opt()?; 539 } else { 540 if self.next_exponent_opt()? == None { 541 return Err(ParserError::IncorrectFloatLit); 542 } 543 } 544 } 545 Ok(()) 546 } 547 548 // String literals 549 550 // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/ 551 // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit 552 // https://github.com/google/protobuf/issues/4560 553 // octEscape = '\' octalDigit octalDigit octalDigit 554 // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' ) 555 // quote = "'" | '"' 556 pub fn next_char_value(&mut self) -> ParserResult<char> { 557 match self.next_char()? { 558 '\\' => { 559 match self.next_char()? { 560 '\'' => Ok('\''), 561 '"' => Ok('"'), 562 '\\' => Ok('\\'), 563 'a' => Ok('\x07'), 564 'b' => Ok('\x08'), 565 'f' => Ok('\x0c'), 566 'n' => Ok('\n'), 567 'r' => Ok('\r'), 568 't' => Ok('\t'), 569 'v' => Ok('\x0b'), 570 'x' => { 571 let d1 = self.next_hex_digit()? as u8; 572 let d2 = self.next_hex_digit()? as u8; 573 // TODO: do not decode as char if > 0x80 574 Ok(((d1 << 4) | d2) as char) 575 } 576 d if d >= '0' && d <= '7' => { 577 let mut r = d as u8 - b'0'; 578 for _ in 0..2 { 579 match self.next_octal_digit() { 580 Err(_) => break, 581 Ok(d) => r = (r << 3) + d as u8, 582 } 583 } 584 // TODO: do not decode as char if > 0x80 585 Ok(r as char) 586 } 587 // https://github.com/google/protobuf/issues/4562 588 c => Ok(c), 589 } 590 } 591 '\n' | '\0' => Err(ParserError::IncorrectInput), 592 c => Ok(c), 593 } 594 } 595 596 // https://github.com/google/protobuf/issues/4564 597 // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' ) 598 fn next_str_lit_raw(&mut self) -> ParserResult<String> { 599 let mut raw = String::new(); 600 601 let mut first = true; 602 loop { 603 if !first { 604 self.skip_ws()?; 605 } 606 607 let start = self.pos; 608 609 let q = match self.next_char_if_in("'\"") { 610 Some(q) => q, 611 None if !first => break, 612 None => return Err(ParserError::IncorrectInput), 613 }; 614 first = false; 615 while self.lookahead_char() != Some(q) { 616 self.next_char_value()?; 617 } 618 self.next_char_expect_eq(q)?; 619 620 raw.push_str(&self.input[start + 1..self.pos - 1]); 621 } 622 Ok(raw) 623 } 624 625 fn next_str_lit_raw_opt(&mut self) -> ParserResult<Option<String>> { 626 if self.lookahead_char_is_in("'\"") { 627 Ok(Some(self.next_str_lit_raw()?)) 628 } else { 629 Ok(None) 630 } 631 } 632 633 fn is_ascii_punctuation(c: char) -> bool { 634 match c { 635 '.' | ',' | ':' | ';' | '/' | '\\' | '=' | '%' | '+' | '-' | '*' | '<' | '>' | '(' 636 | ')' | '{' | '}' | '[' | ']' => true, 637 _ => false, 638 } 639 } 640 641 fn next_token_inner(&mut self) -> ParserResult<Token> { 642 if let Some(ident) = self.next_ident_opt()? { 643 let token = if ident == float::PROTOBUF_NAN { 644 Token::FloatLit(f64::NAN) 645 } else if ident == float::PROTOBUF_INF { 646 Token::FloatLit(f64::INFINITY) 647 } else { 648 Token::Ident(ident.to_owned()) 649 }; 650 return Ok(token); 651 } 652 653 let mut clone = self.clone(); 654 let pos = clone.pos; 655 if let Ok(_) = clone.next_float_lit() { 656 let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?; 657 *self = clone; 658 return Ok(Token::FloatLit(f)); 659 } 660 661 if let Some(lit) = self.next_int_lit_opt()? { 662 return Ok(Token::IntLit(lit)); 663 } 664 665 if let Some(escaped) = self.next_str_lit_raw_opt()? { 666 return Ok(Token::StrLit(StrLit { escaped })); 667 } 668 669 // This branch must be after str lit 670 if let Some(c) = self.next_char_if(Lexer::is_ascii_punctuation) { 671 return Ok(Token::Symbol(c)); 672 } 673 674 if let Some(ident) = self.next_ident_opt()? { 675 return Ok(Token::Ident(ident)); 676 } 677 678 Err(ParserError::IncorrectInput) 679 } 680 681 fn next_token(&mut self) -> ParserResult<Option<TokenWithLocation>> { 682 self.skip_ws()?; 683 let loc = self.loc; 684 685 Ok(if self.eof() { 686 None 687 } else { 688 let token = self.next_token_inner()?; 689 // Skip whitespace here to update location 690 // to the beginning of the next token 691 self.skip_ws()?; 692 Some(TokenWithLocation { token, loc }) 693 }) 694 } 695 } 696 697 #[derive(Clone)] 698 pub struct Parser<'a> { 699 lexer: Lexer<'a>, 700 syntax: Syntax, 701 next_token: Option<TokenWithLocation>, 702 } 703 704 #[derive(Copy, Clone)] 705 enum MessageBodyParseMode { 706 MessageProto2, 707 MessageProto3, 708 Oneof, 709 ExtendProto2, 710 ExtendProto3, 711 } 712 713 impl MessageBodyParseMode { 714 fn label_allowed(&self, label: Rule) -> bool { 715 match label { 716 Rule::Repeated => match *self { 717 MessageBodyParseMode::MessageProto2 718 | MessageBodyParseMode::MessageProto3 719 | MessageBodyParseMode::ExtendProto2 720 | MessageBodyParseMode::ExtendProto3 => true, 721 MessageBodyParseMode::Oneof => false, 722 }, 723 Rule::Optional | Rule::Required => match *self { 724 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true, 725 MessageBodyParseMode::MessageProto3 726 | MessageBodyParseMode::ExtendProto3 727 | MessageBodyParseMode::Oneof => false, 728 }, 729 } 730 } 731 732 fn some_label_required(&self) -> bool { 733 match *self { 734 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true, 735 MessageBodyParseMode::MessageProto3 736 | MessageBodyParseMode::ExtendProto3 737 | MessageBodyParseMode::Oneof => false, 738 } 739 } 740 741 fn map_allowed(&self) -> bool { 742 match *self { 743 MessageBodyParseMode::MessageProto2 744 | MessageBodyParseMode::MessageProto3 745 | MessageBodyParseMode::ExtendProto2 746 | MessageBodyParseMode::ExtendProto3 => true, 747 MessageBodyParseMode::Oneof => false, 748 } 749 } 750 751 fn is_most_non_fields_allowed(&self) -> bool { 752 match *self { 753 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::MessageProto3 => true, 754 MessageBodyParseMode::ExtendProto2 755 | MessageBodyParseMode::ExtendProto3 756 | MessageBodyParseMode::Oneof => false, 757 } 758 } 759 760 fn is_option_allowed(&self) -> bool { 761 match *self { 762 MessageBodyParseMode::MessageProto2 763 | MessageBodyParseMode::MessageProto3 764 | MessageBodyParseMode::Oneof => true, 765 MessageBodyParseMode::ExtendProto2 | MessageBodyParseMode::ExtendProto3 => false, 766 } 767 } 768 } 769 770 #[derive(Default)] 771 pub struct MessageBody { 772 pub fields: Vec<Field>, 773 pub oneofs: Vec<OneOf>, 774 pub reserved_nums: Vec<FieldNumberRange>, 775 pub reserved_names: Vec<String>, 776 pub messages: Vec<Message>, 777 pub enums: Vec<Enumeration>, 778 pub options: Vec<ProtobufOption>, 779 } 780 781 #[derive(Copy, Clone)] 782 enum NumLit { 783 U64(u64), 784 F64(f64), 785 } 786 787 impl NumLit { 788 fn to_option_value(&self, sign_is_plus: bool) -> ParserResult<ProtobufConstant> { 789 Ok(match (*self, sign_is_plus) { 790 (NumLit::U64(u), true) => ProtobufConstant::U64(u), 791 (NumLit::F64(f), true) => ProtobufConstant::F64(f), 792 (NumLit::U64(u), false) => ProtobufConstant::I64(u.neg()?), 793 (NumLit::F64(f), false) => ProtobufConstant::F64(-f), 794 }) 795 } 796 } 797 798 impl<'a> Parser<'a> { 799 pub fn new(input: &'a str) -> Parser<'a> { 800 Parser { 801 lexer: Lexer { 802 input, 803 pos: 0, 804 loc: Loc::start(), 805 }, 806 syntax: Syntax::Proto2, 807 next_token: None, 808 } 809 } 810 811 pub fn loc(&self) -> Loc { 812 self.next_token.clone().map_or(self.lexer.loc, |n| n.loc) 813 } 814 815 fn lookahead(&mut self) -> ParserResult<Option<&Token>> { 816 Ok(match self.next_token { 817 Some(ref token) => Some(&token.token), 818 None => { 819 self.next_token = self.lexer.next_token()?; 820 match self.next_token { 821 Some(ref token) => Some(&token.token), 822 None => None, 823 } 824 } 825 }) 826 } 827 828 fn lookahead_some(&mut self) -> ParserResult<&Token> { 829 match self.lookahead()? { 830 Some(token) => Ok(token), 831 None => Err(ParserError::UnexpectedEof), 832 } 833 } 834 835 fn next(&mut self) -> ParserResult<Option<Token>> { 836 self.lookahead()?; 837 Ok(self 838 .next_token 839 .take() 840 .map(|TokenWithLocation { token, .. }| token)) 841 } 842 843 fn next_some(&mut self) -> ParserResult<Token> { 844 match self.next()? { 845 Some(token) => Ok(token), 846 None => Err(ParserError::UnexpectedEof), 847 } 848 } 849 850 /// Can be called only after lookahead, otherwise it's error 851 fn advance(&mut self) -> ParserResult<Token> { 852 self.next_token 853 .take() 854 .map(|TokenWithLocation { token, .. }| token) 855 .ok_or(ParserError::InternalError) 856 } 857 858 /// No more tokens 859 fn syntax_eof(&mut self) -> ParserResult<bool> { 860 Ok(self.lookahead()?.is_none()) 861 } 862 863 fn next_token_if_map<P, R>(&mut self, p: P) -> ParserResult<Option<R>> 864 where 865 P: FnOnce(&Token) -> Option<R>, 866 { 867 self.lookahead()?; 868 let v = match self.next_token { 869 Some(ref token) => match p(&token.token) { 870 Some(v) => v, 871 None => return Ok(None), 872 }, 873 _ => return Ok(None), 874 }; 875 self.next_token = None; 876 Ok(Some(v)) 877 } 878 879 fn next_token_check_map<P, R>(&mut self, p: P) -> ParserResult<R> 880 where 881 P: FnOnce(&Token) -> ParserResult<R>, 882 { 883 self.lookahead()?; 884 let r = match self.next_token { 885 Some(ref token) => p(&token.token)?, 886 None => return Err(ParserError::UnexpectedEof), 887 }; 888 self.next_token = None; 889 Ok(r) 890 } 891 892 fn next_token_if<P>(&mut self, p: P) -> ParserResult<Option<Token>> 893 where 894 P: FnOnce(&Token) -> bool, 895 { 896 self.next_token_if_map(|token| if p(token) { Some(token.clone()) } else { None }) 897 } 898 899 fn next_ident_if_in(&mut self, idents: &[&str]) -> ParserResult<Option<String>> { 900 let v = match self.lookahead()? { 901 Some(&Token::Ident(ref next)) => { 902 if idents.into_iter().find(|&i| i == next).is_some() { 903 next.clone() 904 } else { 905 return Ok(None); 906 } 907 } 908 _ => return Ok(None), 909 }; 910 self.advance()?; 911 Ok(Some(v)) 912 } 913 914 fn next_ident_if_eq(&mut self, word: &str) -> ParserResult<bool> { 915 Ok(self.next_ident_if_in(&[word])? != None) 916 } 917 918 fn next_ident_if_eq_error(&mut self, word: &str) -> ParserResult<()> { 919 if self.clone().next_ident_if_eq(word)? { 920 return Err(ParserError::IncorrectInput); 921 } 922 Ok(()) 923 } 924 925 fn next_symbol_if_eq(&mut self, symbol: char) -> ParserResult<bool> { 926 Ok(self.next_token_if(|token| match token { 927 &Token::Symbol(c) if c == symbol => true, 928 _ => false, 929 })? != None) 930 } 931 932 fn next_symbol_expect_eq(&mut self, symbol: char) -> ParserResult<()> { 933 if self.lookahead_is_symbol(symbol)? { 934 self.advance()?; 935 Ok(()) 936 } else { 937 Err(ParserError::ExpectChar(symbol)) 938 } 939 } 940 941 fn lookahead_if_symbol(&mut self) -> ParserResult<Option<char>> { 942 Ok(match self.lookahead()? { 943 Some(&Token::Symbol(c)) => Some(c), 944 _ => None, 945 }) 946 } 947 948 fn lookahead_is_symbol(&mut self, symbol: char) -> ParserResult<bool> { 949 Ok(self.lookahead_if_symbol()? == Some(symbol)) 950 } 951 952 // Protobuf grammar 953 954 fn next_ident(&mut self) -> ParserResult<String> { 955 self.next_token_check_map(|token| match token { 956 &Token::Ident(ref ident) => Ok(ident.clone()), 957 _ => Err(ParserError::ExpectIdent), 958 }) 959 } 960 961 fn next_str_lit(&mut self) -> ParserResult<StrLit> { 962 self.next_token_check_map(|token| match token { 963 &Token::StrLit(ref str_lit) => Ok(str_lit.clone()), 964 _ => Err(ParserError::IncorrectInput), 965 }) 966 } 967 968 // fullIdent = ident { "." ident } 969 fn next_full_ident(&mut self) -> ParserResult<String> { 970 let mut full_ident = String::new(); 971 // https://github.com/google/protobuf/issues/4563 972 if self.next_symbol_if_eq('.')? { 973 full_ident.push('.'); 974 } 975 full_ident.push_str(&self.next_ident()?); 976 while self.next_symbol_if_eq('.')? { 977 full_ident.push('.'); 978 full_ident.push_str(&self.next_ident()?); 979 } 980 Ok(full_ident) 981 } 982 983 // messageName = ident 984 // enumName = ident 985 // messageType = [ "." ] { ident "." } messageName 986 // enumType = [ "." ] { ident "." } enumName 987 fn next_message_or_enum_type(&mut self) -> ParserResult<String> { 988 let mut full_name = String::new(); 989 if self.next_symbol_if_eq('.')? { 990 full_name.push('.'); 991 } 992 full_name.push_str(&self.next_ident()?); 993 while self.next_symbol_if_eq('.')? { 994 full_name.push('.'); 995 full_name.push_str(&self.next_ident()?); 996 } 997 Ok(full_name) 998 } 999 1000 fn is_ascii_uppercase(c: char) -> bool { 1001 c >= 'A' && c <= 'Z' 1002 } 1003 1004 // groupName = capitalLetter { letter | decimalDigit | "_" } 1005 fn next_group_name(&mut self) -> ParserResult<String> { 1006 // lexer cannot distinguish between group name and other ident 1007 let mut clone = self.clone(); 1008 let ident = clone.next_ident()?; 1009 if !Parser::is_ascii_uppercase(ident.chars().next().unwrap()) { 1010 return Err(ParserError::GroupNameShouldStartWithUpperCase); 1011 } 1012 *self = clone; 1013 Ok(ident) 1014 } 1015 1016 // Boolean 1017 1018 // boolLit = "true" | "false" 1019 fn next_bool_lit_opt(&mut self) -> ParserResult<Option<bool>> { 1020 Ok(if self.next_ident_if_eq("true")? { 1021 Some(true) 1022 } else if self.next_ident_if_eq("false")? { 1023 Some(false) 1024 } else { 1025 None 1026 }) 1027 } 1028 1029 // Constant 1030 1031 fn next_num_lit(&mut self) -> ParserResult<NumLit> { 1032 self.next_token_check_map(|token| token.to_num_lit()) 1033 } 1034 1035 // constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | 1036 // strLit | boolLit 1037 fn next_constant(&mut self) -> ParserResult<ProtobufConstant> { 1038 // https://github.com/google/protobuf/blob/a21f225824e994ebd35e8447382ea4e0cd165b3c/src/google/protobuf/unittest_custom_options.proto#L350 1039 if self.lookahead_is_symbol('{')? { 1040 return Ok(ProtobufConstant::BracedExpr(self.next_braces()?)); 1041 } 1042 1043 if let Some(b) = self.next_bool_lit_opt()? { 1044 return Ok(ProtobufConstant::Bool(b)); 1045 } 1046 1047 if let &Token::Symbol(c) = self.lookahead_some()? { 1048 if c == '+' || c == '-' { 1049 self.advance()?; 1050 let sign = c == '+'; 1051 return Ok(self.next_num_lit()?.to_option_value(sign)?); 1052 } 1053 } 1054 1055 if let Some(r) = self.next_token_if_map(|token| match token { 1056 &Token::StrLit(ref s) => Some(ProtobufConstant::String(s.clone())), 1057 _ => None, 1058 })? { 1059 return Ok(r); 1060 } 1061 1062 match self.lookahead_some()? { 1063 &Token::IntLit(..) | &Token::FloatLit(..) => { 1064 return self.next_num_lit()?.to_option_value(true); 1065 } 1066 &Token::Ident(..) => { 1067 return Ok(ProtobufConstant::Ident(self.next_full_ident()?)); 1068 } 1069 _ => {} 1070 } 1071 1072 Err(ParserError::ExpectConstant) 1073 } 1074 1075 fn next_int_lit(&mut self) -> ParserResult<u64> { 1076 self.next_token_check_map(|token| match token { 1077 &Token::IntLit(i) => Ok(i), 1078 _ => Err(ParserError::IncorrectInput), 1079 }) 1080 } 1081 1082 // Syntax 1083 1084 // syntax = "syntax" "=" quote "proto2" quote ";" 1085 // syntax = "syntax" "=" quote "proto3" quote ";" 1086 fn next_syntax(&mut self) -> ParserResult<Option<Syntax>> { 1087 if self.next_ident_if_eq("syntax")? { 1088 self.next_symbol_expect_eq('=')?; 1089 let syntax_str = self.next_str_lit()?.decode_utf8()?; 1090 let syntax = if syntax_str == "proto2" { 1091 Syntax::Proto2 1092 } else if syntax_str == "proto3" { 1093 Syntax::Proto3 1094 } else { 1095 return Err(ParserError::UnknownSyntax); 1096 }; 1097 self.next_symbol_expect_eq(';')?; 1098 Ok(Some(syntax)) 1099 } else { 1100 Ok(None) 1101 } 1102 } 1103 1104 // Import Statement 1105 1106 // import = "import" [ "weak" | "public" ] strLit ";" 1107 fn next_import_opt(&mut self) -> ParserResult<Option<String>> { 1108 if self.next_ident_if_eq("import")? { 1109 self.next_ident_if_in(&["weak", "public"])?; 1110 let import_path = self.next_str_lit()?.decode_utf8()?; 1111 self.next_symbol_expect_eq(';')?; 1112 Ok(Some(import_path)) 1113 } else { 1114 Ok(None) 1115 } 1116 } 1117 1118 // Package 1119 1120 // package = "package" fullIdent ";" 1121 fn next_package_opt(&mut self) -> ParserResult<Option<String>> { 1122 if self.next_ident_if_eq("package")? { 1123 let package = self.next_full_ident()?; 1124 self.next_symbol_expect_eq(';')?; 1125 Ok(Some(package)) 1126 } else { 1127 Ok(None) 1128 } 1129 } 1130 1131 // Option 1132 1133 fn next_ident_or_braced(&mut self) -> ParserResult<String> { 1134 let mut ident_or_braced = String::new(); 1135 if self.next_symbol_if_eq('(')? { 1136 ident_or_braced.push('('); 1137 ident_or_braced.push_str(&self.next_full_ident()?); 1138 self.next_symbol_expect_eq(')')?; 1139 ident_or_braced.push(')'); 1140 } else { 1141 ident_or_braced.push_str(&self.next_ident()?); 1142 } 1143 Ok(ident_or_braced) 1144 } 1145 1146 // https://github.com/google/protobuf/issues/4563 1147 // optionName = ( ident | "(" fullIdent ")" ) { "." ident } 1148 fn next_option_name(&mut self) -> ParserResult<String> { 1149 let mut option_name = String::new(); 1150 option_name.push_str(&self.next_ident_or_braced()?); 1151 while self.next_symbol_if_eq('.')? { 1152 option_name.push('.'); 1153 option_name.push_str(&self.next_ident_or_braced()?); 1154 } 1155 Ok(option_name) 1156 } 1157 1158 // option = "option" optionName "=" constant ";" 1159 fn next_option_opt(&mut self) -> ParserResult<Option<ProtobufOption>> { 1160 if self.next_ident_if_eq("option")? { 1161 let name = self.next_option_name()?; 1162 self.next_symbol_expect_eq('=')?; 1163 let value = self.next_constant()?; 1164 self.next_symbol_expect_eq(';')?; 1165 Ok(Some(ProtobufOption { name, value })) 1166 } else { 1167 Ok(None) 1168 } 1169 } 1170 1171 // Fields 1172 1173 // label = "required" | "optional" | "repeated" 1174 fn next_label(&mut self, mode: MessageBodyParseMode) -> ParserResult<Rule> { 1175 let map = &[ 1176 ("optional", Rule::Optional), 1177 ("required", Rule::Required), 1178 ("repeated", Rule::Repeated), 1179 ]; 1180 for &(name, value) in map { 1181 let mut clone = self.clone(); 1182 if clone.next_ident_if_eq(name)? { 1183 if !mode.label_allowed(value) { 1184 return Err(ParserError::LabelNotAllowed); 1185 } 1186 1187 *self = clone; 1188 return Ok(value); 1189 } 1190 } 1191 1192 if mode.some_label_required() { 1193 Err(ParserError::LabelRequired) 1194 } else { 1195 Ok(Rule::Optional) 1196 } 1197 } 1198 1199 fn next_field_type(&mut self) -> ParserResult<FieldType> { 1200 let simple = &[ 1201 ("int32", FieldType::Int32), 1202 ("int64", FieldType::Int64), 1203 ("uint32", FieldType::Uint32), 1204 ("uint64", FieldType::Uint64), 1205 ("sint32", FieldType::Sint32), 1206 ("sint64", FieldType::Sint64), 1207 ("fixed32", FieldType::Fixed32), 1208 ("sfixed32", FieldType::Sfixed32), 1209 ("fixed64", FieldType::Fixed64), 1210 ("sfixed64", FieldType::Sfixed64), 1211 ("bool", FieldType::Bool), 1212 ("string", FieldType::String), 1213 ("bytes", FieldType::Bytes), 1214 ("float", FieldType::Float), 1215 ("double", FieldType::Double), 1216 ]; 1217 for &(ref n, ref t) in simple { 1218 if self.next_ident_if_eq(n)? { 1219 return Ok(t.clone()); 1220 } 1221 } 1222 1223 if let Some(t) = self.next_map_field_type_opt()? { 1224 return Ok(t); 1225 } 1226 1227 let message_or_enum = self.next_message_or_enum_type()?; 1228 Ok(FieldType::MessageOrEnum(message_or_enum)) 1229 } 1230 1231 fn next_field_number(&mut self) -> ParserResult<i32> { 1232 self.next_token_check_map(|token| match token { 1233 &Token::IntLit(i) => i.to_i32(), 1234 _ => Err(ParserError::IncorrectInput), 1235 }) 1236 } 1237 1238 // fieldOption = optionName "=" constant 1239 fn next_field_option(&mut self) -> ParserResult<ProtobufOption> { 1240 let name = self.next_option_name()?; 1241 self.next_symbol_expect_eq('=')?; 1242 let value = self.next_constant()?; 1243 Ok(ProtobufOption { name, value }) 1244 } 1245 1246 // fieldOptions = fieldOption { "," fieldOption } 1247 fn next_field_options(&mut self) -> ParserResult<Vec<ProtobufOption>> { 1248 let mut options = Vec::new(); 1249 1250 options.push(self.next_field_option()?); 1251 1252 while self.next_symbol_if_eq(',')? { 1253 options.push(self.next_field_option()?); 1254 } 1255 1256 Ok(options) 1257 } 1258 1259 // field = label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" 1260 // group = label "group" groupName "=" fieldNumber messageBody 1261 fn next_field(&mut self, mode: MessageBodyParseMode) -> ParserResult<Field> { 1262 let rule = if self.clone().next_ident_if_eq("map")? { 1263 if !mode.map_allowed() { 1264 return Err(ParserError::MapFieldNotAllowed); 1265 } 1266 Rule::Optional 1267 } else { 1268 self.next_label(mode)? 1269 }; 1270 if self.next_ident_if_eq("group")? { 1271 let name = self.next_group_name()?.to_owned(); 1272 self.next_symbol_expect_eq('=')?; 1273 let number = self.next_field_number()?; 1274 1275 let mode = match self.syntax { 1276 Syntax::Proto2 => MessageBodyParseMode::MessageProto2, 1277 Syntax::Proto3 => MessageBodyParseMode::MessageProto3, 1278 }; 1279 1280 let MessageBody { fields, .. } = self.next_message_body(mode)?; 1281 1282 Ok(Field { 1283 name, 1284 rule, 1285 typ: FieldType::Group(fields), 1286 number, 1287 options: Vec::new(), 1288 }) 1289 } else { 1290 let typ = self.next_field_type()?; 1291 let name = self.next_ident()?.to_owned(); 1292 self.next_symbol_expect_eq('=')?; 1293 let number = self.next_field_number()?; 1294 1295 let mut options = Vec::new(); 1296 1297 if self.next_symbol_if_eq('[')? { 1298 for o in self.next_field_options()? { 1299 options.push(o); 1300 } 1301 self.next_symbol_expect_eq(']')?; 1302 } 1303 self.next_symbol_expect_eq(';')?; 1304 Ok(Field { 1305 name, 1306 rule, 1307 typ, 1308 number, 1309 options, 1310 }) 1311 } 1312 } 1313 1314 // oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" 1315 // oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" 1316 fn next_oneof_opt(&mut self) -> ParserResult<Option<OneOf>> { 1317 if self.next_ident_if_eq("oneof")? { 1318 let name = self.next_ident()?.to_owned(); 1319 let MessageBody { fields, .. } = self.next_message_body(MessageBodyParseMode::Oneof)?; 1320 Ok(Some(OneOf { name, fields })) 1321 } else { 1322 Ok(None) 1323 } 1324 } 1325 1326 // mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" 1327 // keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | 1328 // "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" 1329 fn next_map_field_type_opt(&mut self) -> ParserResult<Option<FieldType>> { 1330 if self.next_ident_if_eq("map")? { 1331 self.next_symbol_expect_eq('<')?; 1332 // TODO: restrict key types 1333 let key = self.next_field_type()?; 1334 self.next_symbol_expect_eq(',')?; 1335 let value = self.next_field_type()?; 1336 self.next_symbol_expect_eq('>')?; 1337 Ok(Some(FieldType::Map(Box::new((key, value))))) 1338 } else { 1339 Ok(None) 1340 } 1341 } 1342 1343 // Extensions and Reserved 1344 1345 // Extensions 1346 1347 // range = intLit [ "to" ( intLit | "max" ) ] 1348 fn next_range(&mut self) -> ParserResult<FieldNumberRange> { 1349 let from = self.next_field_number()?; 1350 let to = if self.next_ident_if_eq("to")? { 1351 if self.next_ident_if_eq("max")? { 1352 i32::max_value() 1353 } else { 1354 self.next_field_number()? 1355 } 1356 } else { 1357 from 1358 }; 1359 Ok(FieldNumberRange { from, to }) 1360 } 1361 1362 // ranges = range { "," range } 1363 fn next_ranges(&mut self) -> ParserResult<Vec<FieldNumberRange>> { 1364 let mut ranges = Vec::new(); 1365 ranges.push(self.next_range()?); 1366 while self.next_symbol_if_eq(',')? { 1367 ranges.push(self.next_range()?); 1368 } 1369 Ok(ranges) 1370 } 1371 1372 // extensions = "extensions" ranges ";" 1373 fn next_extensions_opt(&mut self) -> ParserResult<Option<Vec<FieldNumberRange>>> { 1374 if self.next_ident_if_eq("extensions")? { 1375 Ok(Some(self.next_ranges()?)) 1376 } else { 1377 Ok(None) 1378 } 1379 } 1380 1381 // Reserved 1382 1383 // Grammar is incorrect: https://github.com/google/protobuf/issues/4558 1384 // reserved = "reserved" ( ranges | fieldNames ) ";" 1385 // fieldNames = fieldName { "," fieldName } 1386 fn next_reserved_opt(&mut self) -> ParserResult<Option<(Vec<FieldNumberRange>, Vec<String>)>> { 1387 if self.next_ident_if_eq("reserved")? { 1388 let (ranges, names) = if let &Token::StrLit(..) = self.lookahead_some()? { 1389 let mut names = Vec::new(); 1390 names.push(self.next_str_lit()?.decode_utf8()?); 1391 while self.next_symbol_if_eq(',')? { 1392 names.push(self.next_str_lit()?.decode_utf8()?); 1393 } 1394 (Vec::new(), names) 1395 } else { 1396 (self.next_ranges()?, Vec::new()) 1397 }; 1398 1399 self.next_symbol_expect_eq(';')?; 1400 1401 Ok(Some((ranges, names))) 1402 } else { 1403 Ok(None) 1404 } 1405 } 1406 1407 // Top Level definitions 1408 1409 // Enum definition 1410 1411 // enumValueOption = optionName "=" constant 1412 fn next_enum_value_option(&mut self) -> ParserResult<()> { 1413 self.next_option_name()?; 1414 self.next_symbol_expect_eq('=')?; 1415 self.next_constant()?; 1416 Ok(()) 1417 } 1418 1419 // https://github.com/google/protobuf/issues/4561 1420 fn next_enum_value(&mut self) -> ParserResult<i32> { 1421 let minus = self.next_symbol_if_eq('-')?; 1422 let lit = self.next_int_lit()?; 1423 Ok(if minus { 1424 let unsigned = lit.to_i64()?; 1425 match unsigned.checked_neg() { 1426 Some(neg) => neg.to_i32()?, 1427 None => return Err(ParserError::IntegerOverflow), 1428 } 1429 } else { 1430 lit.to_i32()? 1431 }) 1432 } 1433 1434 // enumField = ident "=" intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" 1435 fn next_enum_field(&mut self) -> ParserResult<EnumValue> { 1436 let name = self.next_ident()?.to_owned(); 1437 self.next_symbol_expect_eq('=')?; 1438 let number = self.next_enum_value()?; 1439 if self.next_symbol_if_eq('[')? { 1440 self.next_enum_value_option()?; 1441 while self.next_symbol_if_eq(',')? { 1442 self.next_enum_value_option()?; 1443 } 1444 self.next_symbol_expect_eq(']')?; 1445 } 1446 1447 Ok(EnumValue { name, number }) 1448 } 1449 1450 // enum = "enum" enumName enumBody 1451 // enumBody = "{" { option | enumField | emptyStatement } "}" 1452 fn next_enum_opt(&mut self) -> ParserResult<Option<Enumeration>> { 1453 if self.next_ident_if_eq("enum")? { 1454 let name = self.next_ident()?.to_owned(); 1455 1456 let mut values = Vec::new(); 1457 let mut options = Vec::new(); 1458 1459 self.next_symbol_expect_eq('{')?; 1460 while self.lookahead_if_symbol()? != Some('}') { 1461 // emptyStatement 1462 if self.next_symbol_if_eq(';')? { 1463 continue; 1464 } 1465 1466 if let Some(o) = self.next_option_opt()? { 1467 options.push(o); 1468 continue; 1469 } 1470 1471 values.push(self.next_enum_field()?); 1472 } 1473 self.next_symbol_expect_eq('}')?; 1474 Ok(Some(Enumeration { 1475 name, 1476 values, 1477 options, 1478 })) 1479 } else { 1480 Ok(None) 1481 } 1482 } 1483 1484 // Message definition 1485 1486 // messageBody = "{" { field | enum | message | extend | extensions | group | 1487 // option | oneof | mapField | reserved | emptyStatement } "}" 1488 fn next_message_body(&mut self, mode: MessageBodyParseMode) -> ParserResult<MessageBody> { 1489 self.next_symbol_expect_eq('{')?; 1490 1491 let mut r = MessageBody::default(); 1492 1493 while self.lookahead_if_symbol()? != Some('}') { 1494 // emptyStatement 1495 if self.next_symbol_if_eq(';')? { 1496 continue; 1497 } 1498 1499 if mode.is_most_non_fields_allowed() { 1500 if let Some((field_nums, field_names)) = self.next_reserved_opt()? { 1501 r.reserved_nums.extend(field_nums); 1502 r.reserved_names.extend(field_names); 1503 continue; 1504 } 1505 1506 if let Some(oneof) = self.next_oneof_opt()? { 1507 r.oneofs.push(oneof); 1508 continue; 1509 } 1510 1511 if let Some(_extensions) = self.next_extensions_opt()? { 1512 continue; 1513 } 1514 1515 if let Some(_extend) = self.next_extend_opt()? { 1516 continue; 1517 } 1518 1519 if let Some(nested_message) = self.next_message_opt()? { 1520 r.messages.push(nested_message); 1521 continue; 1522 } 1523 1524 if let Some(nested_enum) = self.next_enum_opt()? { 1525 r.enums.push(nested_enum); 1526 continue; 1527 } 1528 } else { 1529 self.next_ident_if_eq_error("reserved")?; 1530 self.next_ident_if_eq_error("oneof")?; 1531 self.next_ident_if_eq_error("extensions")?; 1532 self.next_ident_if_eq_error("extend")?; 1533 self.next_ident_if_eq_error("message")?; 1534 self.next_ident_if_eq_error("enum")?; 1535 } 1536 1537 if mode.is_option_allowed() { 1538 if let Some(option) = self.next_option_opt()? { 1539 r.options.push(option); 1540 continue; 1541 } 1542 } else { 1543 self.next_ident_if_eq_error("option")?; 1544 } 1545 1546 r.fields.push(self.next_field(mode)?); 1547 } 1548 1549 self.next_symbol_expect_eq('}')?; 1550 1551 Ok(r) 1552 } 1553 1554 // message = "message" messageName messageBody 1555 fn next_message_opt(&mut self) -> ParserResult<Option<Message>> { 1556 if self.next_ident_if_eq("message")? { 1557 let name = self.next_ident()?.to_owned(); 1558 1559 let mode = match self.syntax { 1560 Syntax::Proto2 => MessageBodyParseMode::MessageProto2, 1561 Syntax::Proto3 => MessageBodyParseMode::MessageProto3, 1562 }; 1563 1564 let MessageBody { 1565 fields, 1566 oneofs, 1567 reserved_nums, 1568 reserved_names, 1569 messages, 1570 enums, 1571 options, 1572 } = self.next_message_body(mode)?; 1573 1574 Ok(Some(Message { 1575 name, 1576 fields, 1577 oneofs, 1578 reserved_nums, 1579 reserved_names, 1580 messages, 1581 enums, 1582 options, 1583 })) 1584 } else { 1585 Ok(None) 1586 } 1587 } 1588 1589 // Extend 1590 1591 // extend = "extend" messageType "{" {field | group | emptyStatement} "}" 1592 fn next_extend_opt(&mut self) -> ParserResult<Option<Vec<Extension>>> { 1593 let mut clone = self.clone(); 1594 if clone.next_ident_if_eq("extend")? { 1595 // According to spec `extend` is only for `proto2`, but it is used in `proto3` 1596 // https://github.com/google/protobuf/issues/4610 1597 1598 *self = clone; 1599 1600 let extendee = self.next_message_or_enum_type()?; 1601 1602 let mode = match self.syntax { 1603 Syntax::Proto2 => MessageBodyParseMode::ExtendProto2, 1604 Syntax::Proto3 => MessageBodyParseMode::ExtendProto3, 1605 }; 1606 1607 let MessageBody { fields, .. } = self.next_message_body(mode)?; 1608 1609 let extensions = fields 1610 .into_iter() 1611 .map(|field| { 1612 let extendee = extendee.clone(); 1613 Extension { extendee, field } 1614 }) 1615 .collect(); 1616 1617 Ok(Some(extensions)) 1618 } else { 1619 Ok(None) 1620 } 1621 } 1622 1623 // Service definition 1624 1625 fn next_braces(&mut self) -> ParserResult<String> { 1626 let mut r = String::new(); 1627 self.next_symbol_expect_eq('{')?; 1628 r.push('{'); 1629 loop { 1630 if self.lookahead_if_symbol()? == Some('{') { 1631 r.push_str(&self.next_braces()?); 1632 continue; 1633 } 1634 let next = self.next_some()?; 1635 r.push_str(&next.format()); 1636 if let Token::Symbol('}') = next { 1637 break; 1638 } 1639 } 1640 Ok(r) 1641 } 1642 1643 // service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" 1644 // rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] 1645 // messageType ")" (( "{" { option | emptyStatement } "}" ) | ";" ) 1646 // stream = "stream" streamName "(" messageType "," messageType ")" (( "{" 1647 // { option | emptyStatement } "}") | ";" ) 1648 fn next_service_opt(&mut self) -> ParserResult<Option<()>> { 1649 if self.next_ident_if_eq("service")? { 1650 let _name = self.next_ident()?; 1651 self.next_braces()?; 1652 Ok(Some(())) 1653 } else { 1654 Ok(None) 1655 } 1656 } 1657 1658 // Proto file 1659 1660 // proto = syntax { import | package | option | topLevelDef | emptyStatement } 1661 // topLevelDef = message | enum | extend | service 1662 pub fn next_proto(&mut self) -> ParserResult<FileDescriptor> { 1663 let syntax = self.next_syntax()?.unwrap_or(Syntax::Proto2); 1664 self.syntax = syntax; 1665 1666 let mut import_paths = Vec::new(); 1667 let mut package = String::new(); 1668 let mut messages = Vec::new(); 1669 let mut enums = Vec::new(); 1670 let mut extensions = Vec::new(); 1671 let mut options = Vec::new(); 1672 1673 while !self.syntax_eof()? { 1674 if let Some(import_path) = self.next_import_opt()? { 1675 import_paths.push(import_path); 1676 continue; 1677 } 1678 1679 if let Some(next_package) = self.next_package_opt()? { 1680 package = next_package.to_owned(); 1681 continue; 1682 } 1683 1684 if let Some(option) = self.next_option_opt()? { 1685 options.push(option); 1686 continue; 1687 } 1688 1689 if let Some(message) = self.next_message_opt()? { 1690 messages.push(message); 1691 continue; 1692 } 1693 1694 if let Some(enumeration) = self.next_enum_opt()? { 1695 enums.push(enumeration); 1696 continue; 1697 } 1698 1699 if let Some(more_extensions) = self.next_extend_opt()? { 1700 extensions.extend(more_extensions); 1701 continue; 1702 } 1703 1704 if let Some(_service) = self.next_service_opt()? { 1705 continue; 1706 } 1707 1708 if self.next_symbol_if_eq(';')? { 1709 continue; 1710 } 1711 1712 return Err(ParserError::IncorrectInput); 1713 } 1714 1715 Ok(FileDescriptor { 1716 import_paths, 1717 package, 1718 syntax, 1719 messages, 1720 enums, 1721 extensions, 1722 options, 1723 }) 1724 } 1725 } 1726 1727 #[cfg(test)] 1728 mod test { 1729 use super::*; 1730 1731 fn lex<P, R>(input: &str, parse_what: P) -> R 1732 where 1733 P: FnOnce(&mut Lexer) -> ParserResult<R>, 1734 { 1735 let mut lexer = Lexer { 1736 input, 1737 pos: 0, 1738 loc: Loc::start(), 1739 }; 1740 let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc)); 1741 assert!(lexer.eof(), "check eof failed at {}", lexer.loc); 1742 r 1743 } 1744 1745 fn lex_opt<P, R>(input: &str, parse_what: P) -> R 1746 where 1747 P: FnOnce(&mut Lexer) -> ParserResult<Option<R>>, 1748 { 1749 let mut lexer = Lexer { 1750 input, 1751 pos: 0, 1752 loc: Loc::start(), 1753 }; 1754 let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc)); 1755 let r = o.expect(&format!("lexer returned none at {}", lexer.loc)); 1756 assert!(lexer.eof(), "check eof failed at {}", lexer.loc); 1757 r 1758 } 1759 1760 fn parse<P, R>(input: &str, parse_what: P) -> R 1761 where 1762 P: FnOnce(&mut Parser) -> ParserResult<R>, 1763 { 1764 let mut parser = Parser::new(input); 1765 let r = parse_what(&mut parser).expect(&format!("parse failed at {}", parser.loc())); 1766 let eof = parser 1767 .syntax_eof() 1768 .expect(&format!("check eof failed at {}", parser.loc())); 1769 assert!(eof, "{}", parser.loc()); 1770 r 1771 } 1772 1773 fn parse_opt<P, R>(input: &str, parse_what: P) -> R 1774 where 1775 P: FnOnce(&mut Parser) -> ParserResult<Option<R>>, 1776 { 1777 let mut parser = Parser::new(input); 1778 let o = parse_what(&mut parser).expect(&format!("parse failed at {}", parser.loc())); 1779 let r = o.expect(&format!("parser returned none at {}", parser.loc())); 1780 assert!(parser.syntax_eof().unwrap()); 1781 r 1782 } 1783 1784 #[test] 1785 fn test_lexer_int_lit() { 1786 let msg = r#"10"#; 1787 let mess = lex_opt(msg, |p| p.next_int_lit_opt()); 1788 assert_eq!(10, mess); 1789 } 1790 1791 #[test] 1792 fn test_lexer_float_lit() { 1793 let msg = r#"12.3"#; 1794 let mess = lex(msg, |p| p.next_token_inner()); 1795 assert_eq!(Token::FloatLit(12.3), mess); 1796 } 1797 1798 #[test] 1799 fn test_ident() { 1800 let msg = r#" aabb_c "#; 1801 let mess = parse(msg, |p| p.next_ident().map(|s| s.to_owned())); 1802 assert_eq!("aabb_c", mess); 1803 } 1804 1805 #[test] 1806 fn test_str_lit() { 1807 let msg = r#" "a\nb" "#; 1808 let mess = parse(msg, |p| p.next_str_lit()); 1809 assert_eq!( 1810 StrLit { 1811 escaped: r#"a\nb"#.to_owned() 1812 }, 1813 mess 1814 ); 1815 } 1816 1817 #[test] 1818 fn test_syntax() { 1819 let msg = r#" syntax = "proto3"; "#; 1820 let mess = parse_opt(msg, |p| p.next_syntax()); 1821 assert_eq!(Syntax::Proto3, mess); 1822 } 1823 1824 #[test] 1825 fn test_field_default_value_int() { 1826 let msg = r#" optional int64 f = 4 [default = 12]; "#; 1827 let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2)); 1828 assert_eq!("f", mess.name); 1829 assert_eq!("default", mess.options[0].name); 1830 assert_eq!("12", mess.options[0].value.format()); 1831 } 1832 1833 #[test] 1834 fn test_field_default_value_float() { 1835 let msg = r#" optional float f = 2 [default = 10.0]; "#; 1836 let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2)); 1837 assert_eq!("f", mess.name); 1838 assert_eq!("default", mess.options[0].name); 1839 assert_eq!("10.0", mess.options[0].value.format()); 1840 } 1841 1842 #[test] 1843 fn test_message() { 1844 let msg = r#"message ReferenceData 1845 { 1846 repeated ScenarioInfo scenarioSet = 1; 1847 repeated CalculatedObjectInfo calculatedObjectSet = 2; 1848 repeated RiskFactorList riskFactorListSet = 3; 1849 repeated RiskMaturityInfo riskMaturitySet = 4; 1850 repeated IndicatorInfo indicatorSet = 5; 1851 repeated RiskStrikeInfo riskStrikeSet = 6; 1852 repeated FreeProjectionList freeProjectionListSet = 7; 1853 repeated ValidationProperty ValidationSet = 8; 1854 repeated CalcProperties calcPropertiesSet = 9; 1855 repeated MaturityInfo maturitySet = 10; 1856 }"#; 1857 1858 let mess = parse_opt(msg, |p| p.next_message_opt()); 1859 assert_eq!(10, mess.fields.len()); 1860 } 1861 1862 #[test] 1863 fn test_enum() { 1864 let msg = r#"enum PairingStatus { 1865 DEALPAIRED = 0; 1866 INVENTORYORPHAN = 1; 1867 CALCULATEDORPHAN = 2; 1868 CANCELED = 3; 1869 }"#; 1870 1871 let enumeration = parse_opt(msg, |p| p.next_enum_opt()); 1872 assert_eq!(4, enumeration.values.len()); 1873 } 1874 1875 #[test] 1876 fn test_ignore() { 1877 let msg = r#"option optimize_for = SPEED;"#; 1878 1879 parse_opt(msg, |p| p.next_option_opt()); 1880 } 1881 1882 #[test] 1883 fn test_import() { 1884 let msg = r#"syntax = "proto3"; 1885 1886 import "test_import_nested_imported_pb.proto"; 1887 1888 message ContainsImportedNested { 1889 ContainerForNested.NestedMessage m = 1; 1890 ContainerForNested.NestedEnum e = 2; 1891 } 1892 "#; 1893 let desc = parse(msg, |p| p.next_proto()); 1894 1895 assert_eq!( 1896 vec!["test_import_nested_imported_pb.proto"], 1897 desc.import_paths 1898 ); 1899 } 1900 1901 #[test] 1902 fn test_package() { 1903 let msg = r#" 1904 package foo.bar; 1905 1906 message ContainsImportedNested { 1907 optional ContainerForNested.NestedMessage m = 1; 1908 optional ContainerForNested.NestedEnum e = 2; 1909 } 1910 "#; 1911 let desc = parse(msg, |p| p.next_proto()); 1912 assert_eq!("foo.bar".to_string(), desc.package); 1913 } 1914 1915 #[test] 1916 fn test_nested_message() { 1917 let msg = r#"message A 1918 { 1919 message B { 1920 repeated int32 a = 1; 1921 optional string b = 2; 1922 } 1923 optional string b = 1; 1924 }"#; 1925 1926 let mess = parse_opt(msg, |p| p.next_message_opt()); 1927 assert_eq!(1, mess.messages.len()); 1928 } 1929 1930 #[test] 1931 fn test_map() { 1932 let msg = r#"message A 1933 { 1934 optional map<string, int32> b = 1; 1935 }"#; 1936 1937 let mess = parse_opt(msg, |p| p.next_message_opt()); 1938 assert_eq!(1, mess.fields.len()); 1939 match mess.fields[0].typ { 1940 FieldType::Map(ref f) => match &**f { 1941 &(FieldType::String, FieldType::Int32) => (), 1942 ref f => panic!("Expecting Map<String, Int32> found {:?}", f), 1943 }, 1944 ref f => panic!("Expecting map, got {:?}", f), 1945 } 1946 } 1947 1948 #[test] 1949 fn test_oneof() { 1950 let msg = r#"message A 1951 { 1952 optional int32 a1 = 1; 1953 oneof a_oneof { 1954 string a2 = 2; 1955 int32 a3 = 3; 1956 bytes a4 = 4; 1957 } 1958 repeated bool a5 = 5; 1959 }"#; 1960 1961 let mess = parse_opt(msg, |p| p.next_message_opt()); 1962 assert_eq!(1, mess.oneofs.len()); 1963 assert_eq!(3, mess.oneofs[0].fields.len()); 1964 } 1965 1966 #[test] 1967 fn test_reserved() { 1968 let msg = r#"message Sample { 1969 reserved 4, 15, 17 to 20, 30; 1970 reserved "foo", "bar"; 1971 optional uint64 age =1; 1972 required bytes name =2; 1973 }"#; 1974 1975 let mess = parse_opt(msg, |p| p.next_message_opt()); 1976 assert_eq!( 1977 vec![ 1978 FieldNumberRange { from: 4, to: 4 }, 1979 FieldNumberRange { from: 15, to: 15 }, 1980 FieldNumberRange { from: 17, to: 20 }, 1981 FieldNumberRange { from: 30, to: 30 } 1982 ], 1983 mess.reserved_nums 1984 ); 1985 assert_eq!( 1986 vec!["foo".to_string(), "bar".to_string()], 1987 mess.reserved_names 1988 ); 1989 assert_eq!(2, mess.fields.len()); 1990 } 1991 1992 #[test] 1993 fn test_default_value_int() { 1994 let msg = r#"message Sample { 1995 optional int32 x = 1 [default = 17]; 1996 }"#; 1997 1998 let mess = parse_opt(msg, |p| p.next_message_opt()); 1999 assert_eq!("default", mess.fields[0].options[0].name); 2000 assert_eq!("17", mess.fields[0].options[0].value.format()); 2001 } 2002 2003 #[test] 2004 fn test_default_value_string() { 2005 let msg = r#"message Sample { 2006 optional string x = 1 [default = "ab\nc d\"g\'h\0\"z"]; 2007 }"#; 2008 2009 let mess = parse_opt(msg, |p| p.next_message_opt()); 2010 assert_eq!( 2011 r#""ab\nc d\"g\'h\0\"z""#, 2012 mess.fields[0].options[0].value.format() 2013 ); 2014 } 2015 2016 #[test] 2017 fn test_default_value_bytes() { 2018 let msg = r#"message Sample { 2019 optional bytes x = 1 [default = "ab\nc d\xfeE\"g\'h\0\"z"]; 2020 }"#; 2021 2022 let mess = parse_opt(msg, |p| p.next_message_opt()); 2023 assert_eq!( 2024 r#""ab\nc d\xfeE\"g\'h\0\"z""#, 2025 mess.fields[0].options[0].value.format() 2026 ); 2027 } 2028 2029 #[test] 2030 fn test_group() { 2031 let msg = r#"message MessageWithGroup { 2032 optional string aaa = 1; 2033 2034 repeated group Identifier = 18 { 2035 optional int32 iii = 19; 2036 optional string sss = 20; 2037 } 2038 2039 required int bbb = 3; 2040 }"#; 2041 let mess = parse_opt(msg, |p| p.next_message_opt()); 2042 2043 assert_eq!("Identifier", mess.fields[1].name); 2044 if let FieldType::Group(ref group_fields) = mess.fields[1].typ { 2045 assert_eq!(2, group_fields.len()); 2046 } else { 2047 panic!("expecting group"); 2048 } 2049 2050 assert_eq!("bbb", mess.fields[2].name); 2051 } 2052 2053 #[test] 2054 fn test_incorrect_file_descriptor() { 2055 let msg = r#" 2056 message Foo {} 2057 2058 dfgdg 2059 "#; 2060 2061 let err = FileDescriptor::parse(msg).err().expect("err"); 2062 assert_eq!(4, err.line); 2063 } 2064 2065 #[test] 2066 fn test_extend() { 2067 let proto = r#" 2068 syntax = "proto2"; 2069 2070 extend google.protobuf.FileOptions { 2071 optional bool foo = 17001; 2072 optional string bar = 17002; 2073 } 2074 2075 extend google.protobuf.MessageOptions { 2076 optional bool baz = 17003; 2077 } 2078 "#; 2079 2080 let fd = FileDescriptor::parse(proto).expect("fd"); 2081 assert_eq!(3, fd.extensions.len()); 2082 assert_eq!("google.protobuf.FileOptions", fd.extensions[0].extendee); 2083 assert_eq!("google.protobuf.FileOptions", fd.extensions[1].extendee); 2084 assert_eq!("google.protobuf.MessageOptions", fd.extensions[2].extendee); 2085 assert_eq!(17003, fd.extensions[2].field.number); 2086 } 2087 } 2088