1// Go support for Protocol Buffers - Google's data interchange format 2// 3// Copyright 2010 The Go Authors. All rights reserved. 4// https://github.com/golang/protobuf 5// 6// Redistribution and use in source and binary forms, with or without 7// modification, are permitted provided that the following conditions are 8// met: 9// 10// * Redistributions of source code must retain the above copyright 11// notice, this list of conditions and the following disclaimer. 12// * Redistributions in binary form must reproduce the above 13// copyright notice, this list of conditions and the following disclaimer 14// in the documentation and/or other materials provided with the 15// distribution. 16// * Neither the name of Google Inc. nor the names of its 17// contributors may be used to endorse or promote products derived from 18// this software without specific prior written permission. 19// 20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32package proto 33 34// Functions for parsing the Text protocol buffer format. 35// TODO: message sets. 36 37import ( 38 "encoding" 39 "errors" 40 "fmt" 41 "reflect" 42 "strconv" 43 "strings" 44 "unicode/utf8" 45) 46 47type ParseError struct { 48 Message string 49 Line int // 1-based line number 50 Offset int // 0-based byte offset from start of input 51} 52 53func (p *ParseError) Error() string { 54 if p.Line == 1 { 55 // show offset only for first line 56 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message) 57 } 58 return fmt.Sprintf("line %d: %v", p.Line, p.Message) 59} 60 61type token struct { 62 value string 63 err *ParseError 64 line int // line number 65 offset int // byte number from start of input, not start of line 66 unquoted string // the unquoted version of value, if it was a quoted string 67} 68 69func (t *token) String() string { 70 if t.err == nil { 71 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset) 72 } 73 return fmt.Sprintf("parse error: %v", t.err) 74} 75 76type textParser struct { 77 s string // remaining input 78 done bool // whether the parsing is finished (success or error) 79 backed bool // whether back() was called 80 offset, line int 81 cur token 82} 83 84func newTextParser(s string) *textParser { 85 p := new(textParser) 86 p.s = s 87 p.line = 1 88 p.cur.line = 1 89 return p 90} 91 92func (p *textParser) errorf(format string, a ...interface{}) *ParseError { 93 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset} 94 p.cur.err = pe 95 p.done = true 96 return pe 97} 98 99// Numbers and identifiers are matched by [-+._A-Za-z0-9] 100func isIdentOrNumberChar(c byte) bool { 101 switch { 102 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': 103 return true 104 case '0' <= c && c <= '9': 105 return true 106 } 107 switch c { 108 case '-', '+', '.', '_': 109 return true 110 } 111 return false 112} 113 114func isWhitespace(c byte) bool { 115 switch c { 116 case ' ', '\t', '\n', '\r': 117 return true 118 } 119 return false 120} 121 122func isQuote(c byte) bool { 123 switch c { 124 case '"', '\'': 125 return true 126 } 127 return false 128} 129 130func (p *textParser) skipWhitespace() { 131 i := 0 132 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { 133 if p.s[i] == '#' { 134 // comment; skip to end of line or input 135 for i < len(p.s) && p.s[i] != '\n' { 136 i++ 137 } 138 if i == len(p.s) { 139 break 140 } 141 } 142 if p.s[i] == '\n' { 143 p.line++ 144 } 145 i++ 146 } 147 p.offset += i 148 p.s = p.s[i:len(p.s)] 149 if len(p.s) == 0 { 150 p.done = true 151 } 152} 153 154func (p *textParser) advance() { 155 // Skip whitespace 156 p.skipWhitespace() 157 if p.done { 158 return 159 } 160 161 // Start of non-whitespace 162 p.cur.err = nil 163 p.cur.offset, p.cur.line = p.offset, p.line 164 p.cur.unquoted = "" 165 switch p.s[0] { 166 case '<', '>', '{', '}', ':', '[', ']', ';', ',': 167 // Single symbol 168 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] 169 case '"', '\'': 170 // Quoted string 171 i := 1 172 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' { 173 if p.s[i] == '\\' && i+1 < len(p.s) { 174 // skip escaped char 175 i++ 176 } 177 i++ 178 } 179 if i >= len(p.s) || p.s[i] != p.s[0] { 180 p.errorf("unmatched quote") 181 return 182 } 183 unq, err := unquoteC(p.s[1:i], rune(p.s[0])) 184 if err != nil { 185 p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err) 186 return 187 } 188 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] 189 p.cur.unquoted = unq 190 default: 191 i := 0 192 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { 193 i++ 194 } 195 if i == 0 { 196 p.errorf("unexpected byte %#x", p.s[0]) 197 return 198 } 199 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] 200 } 201 p.offset += len(p.cur.value) 202} 203 204var ( 205 errBadUTF8 = errors.New("proto: bad UTF-8") 206 errBadHex = errors.New("proto: bad hexadecimal") 207) 208 209func unquoteC(s string, quote rune) (string, error) { 210 // This is based on C++'s tokenizer.cc. 211 // Despite its name, this is *not* parsing C syntax. 212 // For instance, "\0" is an invalid quoted string. 213 214 // Avoid allocation in trivial cases. 215 simple := true 216 for _, r := range s { 217 if r == '\\' || r == quote { 218 simple = false 219 break 220 } 221 } 222 if simple { 223 return s, nil 224 } 225 226 buf := make([]byte, 0, 3*len(s)/2) 227 for len(s) > 0 { 228 r, n := utf8.DecodeRuneInString(s) 229 if r == utf8.RuneError && n == 1 { 230 return "", errBadUTF8 231 } 232 s = s[n:] 233 if r != '\\' { 234 if r < utf8.RuneSelf { 235 buf = append(buf, byte(r)) 236 } else { 237 buf = append(buf, string(r)...) 238 } 239 continue 240 } 241 242 ch, tail, err := unescape(s) 243 if err != nil { 244 return "", err 245 } 246 buf = append(buf, ch...) 247 s = tail 248 } 249 return string(buf), nil 250} 251 252func unescape(s string) (ch string, tail string, err error) { 253 r, n := utf8.DecodeRuneInString(s) 254 if r == utf8.RuneError && n == 1 { 255 return "", "", errBadUTF8 256 } 257 s = s[n:] 258 switch r { 259 case 'a': 260 return "\a", s, nil 261 case 'b': 262 return "\b", s, nil 263 case 'f': 264 return "\f", s, nil 265 case 'n': 266 return "\n", s, nil 267 case 'r': 268 return "\r", s, nil 269 case 't': 270 return "\t", s, nil 271 case 'v': 272 return "\v", s, nil 273 case '?': 274 return "?", s, nil // trigraph workaround 275 case '\'', '"', '\\': 276 return string(r), s, nil 277 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X': 278 if len(s) < 2 { 279 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r) 280 } 281 base := 8 282 ss := s[:2] 283 s = s[2:] 284 if r == 'x' || r == 'X' { 285 base = 16 286 } else { 287 ss = string(r) + ss 288 } 289 i, err := strconv.ParseUint(ss, base, 8) 290 if err != nil { 291 return "", "", err 292 } 293 return string([]byte{byte(i)}), s, nil 294 case 'u', 'U': 295 n := 4 296 if r == 'U' { 297 n = 8 298 } 299 if len(s) < n { 300 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n) 301 } 302 303 bs := make([]byte, n/2) 304 for i := 0; i < n; i += 2 { 305 a, ok1 := unhex(s[i]) 306 b, ok2 := unhex(s[i+1]) 307 if !ok1 || !ok2 { 308 return "", "", errBadHex 309 } 310 bs[i/2] = a<<4 | b 311 } 312 s = s[n:] 313 return string(bs), s, nil 314 } 315 return "", "", fmt.Errorf(`unknown escape \%c`, r) 316} 317 318// Adapted from src/pkg/strconv/quote.go. 319func unhex(b byte) (v byte, ok bool) { 320 switch { 321 case '0' <= b && b <= '9': 322 return b - '0', true 323 case 'a' <= b && b <= 'f': 324 return b - 'a' + 10, true 325 case 'A' <= b && b <= 'F': 326 return b - 'A' + 10, true 327 } 328 return 0, false 329} 330 331// Back off the parser by one token. Can only be done between calls to next(). 332// It makes the next advance() a no-op. 333func (p *textParser) back() { p.backed = true } 334 335// Advances the parser and returns the new current token. 336func (p *textParser) next() *token { 337 if p.backed || p.done { 338 p.backed = false 339 return &p.cur 340 } 341 p.advance() 342 if p.done { 343 p.cur.value = "" 344 } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) { 345 // Look for multiple quoted strings separated by whitespace, 346 // and concatenate them. 347 cat := p.cur 348 for { 349 p.skipWhitespace() 350 if p.done || !isQuote(p.s[0]) { 351 break 352 } 353 p.advance() 354 if p.cur.err != nil { 355 return &p.cur 356 } 357 cat.value += " " + p.cur.value 358 cat.unquoted += p.cur.unquoted 359 } 360 p.done = false // parser may have seen EOF, but we want to return cat 361 p.cur = cat 362 } 363 return &p.cur 364} 365 366func (p *textParser) consumeToken(s string) error { 367 tok := p.next() 368 if tok.err != nil { 369 return tok.err 370 } 371 if tok.value != s { 372 p.back() 373 return p.errorf("expected %q, found %q", s, tok.value) 374 } 375 return nil 376} 377 378// Return a RequiredNotSetError indicating which required field was not set. 379func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError { 380 st := sv.Type() 381 sprops := GetProperties(st) 382 for i := 0; i < st.NumField(); i++ { 383 if !isNil(sv.Field(i)) { 384 continue 385 } 386 387 props := sprops.Prop[i] 388 if props.Required { 389 return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)} 390 } 391 } 392 return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen 393} 394 395// Returns the index in the struct for the named field, as well as the parsed tag properties. 396func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) { 397 i, ok := sprops.decoderOrigNames[name] 398 if ok { 399 return i, sprops.Prop[i], true 400 } 401 return -1, nil, false 402} 403 404// Consume a ':' from the input stream (if the next token is a colon), 405// returning an error if a colon is needed but not present. 406func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError { 407 tok := p.next() 408 if tok.err != nil { 409 return tok.err 410 } 411 if tok.value != ":" { 412 // Colon is optional when the field is a group or message. 413 needColon := true 414 switch props.Wire { 415 case "group": 416 needColon = false 417 case "bytes": 418 // A "bytes" field is either a message, a string, or a repeated field; 419 // those three become *T, *string and []T respectively, so we can check for 420 // this field being a pointer to a non-string. 421 if typ.Kind() == reflect.Ptr { 422 // *T or *string 423 if typ.Elem().Kind() == reflect.String { 424 break 425 } 426 } else if typ.Kind() == reflect.Slice { 427 // []T or []*T 428 if typ.Elem().Kind() != reflect.Ptr { 429 break 430 } 431 } else if typ.Kind() == reflect.String { 432 // The proto3 exception is for a string field, 433 // which requires a colon. 434 break 435 } 436 needColon = false 437 } 438 if needColon { 439 return p.errorf("expected ':', found %q", tok.value) 440 } 441 p.back() 442 } 443 return nil 444} 445 446func (p *textParser) readStruct(sv reflect.Value, terminator string) error { 447 st := sv.Type() 448 sprops := GetProperties(st) 449 reqCount := sprops.reqCount 450 var reqFieldErr error 451 fieldSet := make(map[string]bool) 452 // A struct is a sequence of "name: value", terminated by one of 453 // '>' or '}', or the end of the input. A name may also be 454 // "[extension]". 455 for { 456 tok := p.next() 457 if tok.err != nil { 458 return tok.err 459 } 460 if tok.value == terminator { 461 break 462 } 463 if tok.value == "[" { 464 // Looks like an extension. 465 // 466 // TODO: Check whether we need to handle 467 // namespace rooted names (e.g. ".something.Foo"). 468 tok = p.next() 469 if tok.err != nil { 470 return tok.err 471 } 472 var desc *ExtensionDesc 473 // This could be faster, but it's functional. 474 // TODO: Do something smarter than a linear scan. 475 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) { 476 if d.Name == tok.value { 477 desc = d 478 break 479 } 480 } 481 if desc == nil { 482 return p.errorf("unrecognized extension %q", tok.value) 483 } 484 // Check the extension terminator. 485 tok = p.next() 486 if tok.err != nil { 487 return tok.err 488 } 489 if tok.value != "]" { 490 return p.errorf("unrecognized extension terminator %q", tok.value) 491 } 492 493 props := &Properties{} 494 props.Parse(desc.Tag) 495 496 typ := reflect.TypeOf(desc.ExtensionType) 497 if err := p.checkForColon(props, typ); err != nil { 498 return err 499 } 500 501 rep := desc.repeated() 502 503 // Read the extension structure, and set it in 504 // the value we're constructing. 505 var ext reflect.Value 506 if !rep { 507 ext = reflect.New(typ).Elem() 508 } else { 509 ext = reflect.New(typ.Elem()).Elem() 510 } 511 if err := p.readAny(ext, props); err != nil { 512 if _, ok := err.(*RequiredNotSetError); !ok { 513 return err 514 } 515 reqFieldErr = err 516 } 517 ep := sv.Addr().Interface().(extendableProto) 518 if !rep { 519 SetExtension(ep, desc, ext.Interface()) 520 } else { 521 old, err := GetExtension(ep, desc) 522 var sl reflect.Value 523 if err == nil { 524 sl = reflect.ValueOf(old) // existing slice 525 } else { 526 sl = reflect.MakeSlice(typ, 0, 1) 527 } 528 sl = reflect.Append(sl, ext) 529 SetExtension(ep, desc, sl.Interface()) 530 } 531 if err := p.consumeOptionalSeparator(); err != nil { 532 return err 533 } 534 continue 535 } 536 537 // This is a normal, non-extension field. 538 name := tok.value 539 var dst reflect.Value 540 fi, props, ok := structFieldByName(sprops, name) 541 if ok { 542 dst = sv.Field(fi) 543 } else if oop, ok := sprops.OneofTypes[name]; ok { 544 // It is a oneof. 545 props = oop.Prop 546 nv := reflect.New(oop.Type.Elem()) 547 dst = nv.Elem().Field(0) 548 sv.Field(oop.Field).Set(nv) 549 } 550 if !dst.IsValid() { 551 return p.errorf("unknown field name %q in %v", name, st) 552 } 553 554 if dst.Kind() == reflect.Map { 555 // Consume any colon. 556 if err := p.checkForColon(props, dst.Type()); err != nil { 557 return err 558 } 559 560 // Construct the map if it doesn't already exist. 561 if dst.IsNil() { 562 dst.Set(reflect.MakeMap(dst.Type())) 563 } 564 key := reflect.New(dst.Type().Key()).Elem() 565 val := reflect.New(dst.Type().Elem()).Elem() 566 567 // The map entry should be this sequence of tokens: 568 // < key : KEY value : VALUE > 569 // Technically the "key" and "value" could come in any order, 570 // but in practice they won't. 571 572 tok := p.next() 573 var terminator string 574 switch tok.value { 575 case "<": 576 terminator = ">" 577 case "{": 578 terminator = "}" 579 default: 580 return p.errorf("expected '{' or '<', found %q", tok.value) 581 } 582 if err := p.consumeToken("key"); err != nil { 583 return err 584 } 585 if err := p.consumeToken(":"); err != nil { 586 return err 587 } 588 if err := p.readAny(key, props.mkeyprop); err != nil { 589 return err 590 } 591 if err := p.consumeOptionalSeparator(); err != nil { 592 return err 593 } 594 if err := p.consumeToken("value"); err != nil { 595 return err 596 } 597 if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil { 598 return err 599 } 600 if err := p.readAny(val, props.mvalprop); err != nil { 601 return err 602 } 603 if err := p.consumeOptionalSeparator(); err != nil { 604 return err 605 } 606 if err := p.consumeToken(terminator); err != nil { 607 return err 608 } 609 610 dst.SetMapIndex(key, val) 611 continue 612 } 613 614 // Check that it's not already set if it's not a repeated field. 615 if !props.Repeated && fieldSet[name] { 616 return p.errorf("non-repeated field %q was repeated", name) 617 } 618 619 if err := p.checkForColon(props, dst.Type()); err != nil { 620 return err 621 } 622 623 // Parse into the field. 624 fieldSet[name] = true 625 if err := p.readAny(dst, props); err != nil { 626 if _, ok := err.(*RequiredNotSetError); !ok { 627 return err 628 } 629 reqFieldErr = err 630 } else if props.Required { 631 reqCount-- 632 } 633 634 if err := p.consumeOptionalSeparator(); err != nil { 635 return err 636 } 637 638 } 639 640 if reqCount > 0 { 641 return p.missingRequiredFieldError(sv) 642 } 643 return reqFieldErr 644} 645 646// consumeOptionalSeparator consumes an optional semicolon or comma. 647// It is used in readStruct to provide backward compatibility. 648func (p *textParser) consumeOptionalSeparator() error { 649 tok := p.next() 650 if tok.err != nil { 651 return tok.err 652 } 653 if tok.value != ";" && tok.value != "," { 654 p.back() 655 } 656 return nil 657} 658 659func (p *textParser) readAny(v reflect.Value, props *Properties) error { 660 tok := p.next() 661 if tok.err != nil { 662 return tok.err 663 } 664 if tok.value == "" { 665 return p.errorf("unexpected EOF") 666 } 667 668 switch fv := v; fv.Kind() { 669 case reflect.Slice: 670 at := v.Type() 671 if at.Elem().Kind() == reflect.Uint8 { 672 // Special case for []byte 673 if tok.value[0] != '"' && tok.value[0] != '\'' { 674 // Deliberately written out here, as the error after 675 // this switch statement would write "invalid []byte: ...", 676 // which is not as user-friendly. 677 return p.errorf("invalid string: %v", tok.value) 678 } 679 bytes := []byte(tok.unquoted) 680 fv.Set(reflect.ValueOf(bytes)) 681 return nil 682 } 683 // Repeated field. 684 if tok.value == "[" { 685 // Repeated field with list notation, like [1,2,3]. 686 for { 687 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem())) 688 err := p.readAny(fv.Index(fv.Len()-1), props) 689 if err != nil { 690 return err 691 } 692 tok := p.next() 693 if tok.err != nil { 694 return tok.err 695 } 696 if tok.value == "]" { 697 break 698 } 699 if tok.value != "," { 700 return p.errorf("Expected ']' or ',' found %q", tok.value) 701 } 702 } 703 return nil 704 } 705 // One value of the repeated field. 706 p.back() 707 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem())) 708 return p.readAny(fv.Index(fv.Len()-1), props) 709 case reflect.Bool: 710 // Either "true", "false", 1 or 0. 711 switch tok.value { 712 case "true", "1": 713 fv.SetBool(true) 714 return nil 715 case "false", "0": 716 fv.SetBool(false) 717 return nil 718 } 719 case reflect.Float32, reflect.Float64: 720 v := tok.value 721 // Ignore 'f' for compatibility with output generated by C++, but don't 722 // remove 'f' when the value is "-inf" or "inf". 723 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" { 724 v = v[:len(v)-1] 725 } 726 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil { 727 fv.SetFloat(f) 728 return nil 729 } 730 case reflect.Int32: 731 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil { 732 fv.SetInt(x) 733 return nil 734 } 735 736 if len(props.Enum) == 0 { 737 break 738 } 739 m, ok := enumValueMaps[props.Enum] 740 if !ok { 741 break 742 } 743 x, ok := m[tok.value] 744 if !ok { 745 break 746 } 747 fv.SetInt(int64(x)) 748 return nil 749 case reflect.Int64: 750 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil { 751 fv.SetInt(x) 752 return nil 753 } 754 755 case reflect.Ptr: 756 // A basic field (indirected through pointer), or a repeated message/group 757 p.back() 758 fv.Set(reflect.New(fv.Type().Elem())) 759 return p.readAny(fv.Elem(), props) 760 case reflect.String: 761 if tok.value[0] == '"' || tok.value[0] == '\'' { 762 fv.SetString(tok.unquoted) 763 return nil 764 } 765 case reflect.Struct: 766 var terminator string 767 switch tok.value { 768 case "{": 769 terminator = "}" 770 case "<": 771 terminator = ">" 772 default: 773 return p.errorf("expected '{' or '<', found %q", tok.value) 774 } 775 // TODO: Handle nested messages which implement encoding.TextUnmarshaler. 776 return p.readStruct(fv, terminator) 777 case reflect.Uint32: 778 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil { 779 fv.SetUint(uint64(x)) 780 return nil 781 } 782 case reflect.Uint64: 783 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil { 784 fv.SetUint(x) 785 return nil 786 } 787 } 788 return p.errorf("invalid %v: %v", v.Type(), tok.value) 789} 790 791// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb 792// before starting to unmarshal, so any existing data in pb is always removed. 793// If a required field is not set and no other error occurs, 794// UnmarshalText returns *RequiredNotSetError. 795func UnmarshalText(s string, pb Message) error { 796 if um, ok := pb.(encoding.TextUnmarshaler); ok { 797 err := um.UnmarshalText([]byte(s)) 798 return err 799 } 800 pb.Reset() 801 v := reflect.ValueOf(pb) 802 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil { 803 return pe 804 } 805 return nil 806} 807