1// Package jlexer contains a JSON lexer implementation. 2// 3// It is expected that it is mostly used with generated parser code, so the interface is tuned 4// for a parser that knows what kind of data is expected. 5package jlexer 6 7import ( 8 "bytes" 9 "encoding/base64" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "strconv" 15 "unicode" 16 "unicode/utf16" 17 "unicode/utf8" 18 19 "github.com/josharian/intern" 20) 21 22// tokenKind determines type of a token. 23type tokenKind byte 24 25const ( 26 tokenUndef tokenKind = iota // No token. 27 tokenDelim // Delimiter: one of '{', '}', '[' or ']'. 28 tokenString // A string literal, e.g. "abc\u1234" 29 tokenNumber // Number literal, e.g. 1.5e5 30 tokenBool // Boolean literal: true or false. 31 tokenNull // null keyword. 32) 33 34// token describes a single token: type, position in the input and value. 35type token struct { 36 kind tokenKind // Type of a token. 37 38 boolValue bool // Value if a boolean literal token. 39 byteValueCloned bool // true if byteValue was allocated and does not refer to original json body 40 byteValue []byte // Raw value of a token. 41 delimValue byte 42} 43 44// Lexer is a JSON lexer: it iterates over JSON tokens in a byte slice. 45type Lexer struct { 46 Data []byte // Input data given to the lexer. 47 48 start int // Start of the current token. 49 pos int // Current unscanned position in the input stream. 50 token token // Last scanned token, if token.kind != tokenUndef. 51 52 firstElement bool // Whether current element is the first in array or an object. 53 wantSep byte // A comma or a colon character, which need to occur before a token. 54 55 UseMultipleErrors bool // If we want to use multiple errors. 56 fatalError error // Fatal error occurred during lexing. It is usually a syntax error. 57 multipleErrors []*LexerError // Semantic errors occurred during lexing. Marshalling will be continued after finding this errors. 58} 59 60// FetchToken scans the input for the next token. 61func (r *Lexer) FetchToken() { 62 r.token.kind = tokenUndef 63 r.start = r.pos 64 65 // Check if r.Data has r.pos element 66 // If it doesn't, it mean corrupted input data 67 if len(r.Data) < r.pos { 68 r.errParse("Unexpected end of data") 69 return 70 } 71 // Determine the type of a token by skipping whitespace and reading the 72 // first character. 73 for _, c := range r.Data[r.pos:] { 74 switch c { 75 case ':', ',': 76 if r.wantSep == c { 77 r.pos++ 78 r.start++ 79 r.wantSep = 0 80 } else { 81 r.errSyntax() 82 } 83 84 case ' ', '\t', '\r', '\n': 85 r.pos++ 86 r.start++ 87 88 case '"': 89 if r.wantSep != 0 { 90 r.errSyntax() 91 } 92 93 r.token.kind = tokenString 94 r.fetchString() 95 return 96 97 case '{', '[': 98 if r.wantSep != 0 { 99 r.errSyntax() 100 } 101 r.firstElement = true 102 r.token.kind = tokenDelim 103 r.token.delimValue = r.Data[r.pos] 104 r.pos++ 105 return 106 107 case '}', ']': 108 if !r.firstElement && (r.wantSep != ',') { 109 r.errSyntax() 110 } 111 r.wantSep = 0 112 r.token.kind = tokenDelim 113 r.token.delimValue = r.Data[r.pos] 114 r.pos++ 115 return 116 117 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': 118 if r.wantSep != 0 { 119 r.errSyntax() 120 } 121 r.token.kind = tokenNumber 122 r.fetchNumber() 123 return 124 125 case 'n': 126 if r.wantSep != 0 { 127 r.errSyntax() 128 } 129 130 r.token.kind = tokenNull 131 r.fetchNull() 132 return 133 134 case 't': 135 if r.wantSep != 0 { 136 r.errSyntax() 137 } 138 139 r.token.kind = tokenBool 140 r.token.boolValue = true 141 r.fetchTrue() 142 return 143 144 case 'f': 145 if r.wantSep != 0 { 146 r.errSyntax() 147 } 148 149 r.token.kind = tokenBool 150 r.token.boolValue = false 151 r.fetchFalse() 152 return 153 154 default: 155 r.errSyntax() 156 return 157 } 158 } 159 r.fatalError = io.EOF 160 return 161} 162 163// isTokenEnd returns true if the char can follow a non-delimiter token 164func isTokenEnd(c byte) bool { 165 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '[' || c == ']' || c == '{' || c == '}' || c == ',' || c == ':' 166} 167 168// fetchNull fetches and checks remaining bytes of null keyword. 169func (r *Lexer) fetchNull() { 170 r.pos += 4 171 if r.pos > len(r.Data) || 172 r.Data[r.pos-3] != 'u' || 173 r.Data[r.pos-2] != 'l' || 174 r.Data[r.pos-1] != 'l' || 175 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) { 176 177 r.pos -= 4 178 r.errSyntax() 179 } 180} 181 182// fetchTrue fetches and checks remaining bytes of true keyword. 183func (r *Lexer) fetchTrue() { 184 r.pos += 4 185 if r.pos > len(r.Data) || 186 r.Data[r.pos-3] != 'r' || 187 r.Data[r.pos-2] != 'u' || 188 r.Data[r.pos-1] != 'e' || 189 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) { 190 191 r.pos -= 4 192 r.errSyntax() 193 } 194} 195 196// fetchFalse fetches and checks remaining bytes of false keyword. 197func (r *Lexer) fetchFalse() { 198 r.pos += 5 199 if r.pos > len(r.Data) || 200 r.Data[r.pos-4] != 'a' || 201 r.Data[r.pos-3] != 'l' || 202 r.Data[r.pos-2] != 's' || 203 r.Data[r.pos-1] != 'e' || 204 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) { 205 206 r.pos -= 5 207 r.errSyntax() 208 } 209} 210 211// fetchNumber scans a number literal token. 212func (r *Lexer) fetchNumber() { 213 hasE := false 214 afterE := false 215 hasDot := false 216 217 r.pos++ 218 for i, c := range r.Data[r.pos:] { 219 switch { 220 case c >= '0' && c <= '9': 221 afterE = false 222 case c == '.' && !hasDot: 223 hasDot = true 224 case (c == 'e' || c == 'E') && !hasE: 225 hasE = true 226 hasDot = true 227 afterE = true 228 case (c == '+' || c == '-') && afterE: 229 afterE = false 230 default: 231 r.pos += i 232 if !isTokenEnd(c) { 233 r.errSyntax() 234 } else { 235 r.token.byteValue = r.Data[r.start:r.pos] 236 } 237 return 238 } 239 } 240 241 r.pos = len(r.Data) 242 r.token.byteValue = r.Data[r.start:] 243} 244 245// findStringLen tries to scan into the string literal for ending quote char to determine required size. 246// The size will be exact if no escapes are present and may be inexact if there are escaped chars. 247func findStringLen(data []byte) (isValid bool, length int) { 248 for { 249 idx := bytes.IndexByte(data, '"') 250 if idx == -1 { 251 return false, len(data) 252 } 253 if idx == 0 || (idx > 0 && data[idx-1] != '\\') { 254 return true, length + idx 255 } 256 257 // count \\\\\\\ sequences. even number of slashes means quote is not really escaped 258 cnt := 1 259 for idx-cnt-1 >= 0 && data[idx-cnt-1] == '\\' { 260 cnt++ 261 } 262 if cnt%2 == 0 { 263 return true, length + idx 264 } 265 266 length += idx + 1 267 data = data[idx+1:] 268 } 269} 270 271// unescapeStringToken performs unescaping of string token. 272// if no escaping is needed, original string is returned, otherwise - a new one allocated 273func (r *Lexer) unescapeStringToken() (err error) { 274 data := r.token.byteValue 275 var unescapedData []byte 276 277 for { 278 i := bytes.IndexByte(data, '\\') 279 if i == -1 { 280 break 281 } 282 283 escapedRune, escapedBytes, err := decodeEscape(data[i:]) 284 if err != nil { 285 r.errParse(err.Error()) 286 return err 287 } 288 289 if unescapedData == nil { 290 unescapedData = make([]byte, 0, len(r.token.byteValue)) 291 } 292 293 var d [4]byte 294 s := utf8.EncodeRune(d[:], escapedRune) 295 unescapedData = append(unescapedData, data[:i]...) 296 unescapedData = append(unescapedData, d[:s]...) 297 298 data = data[i+escapedBytes:] 299 } 300 301 if unescapedData != nil { 302 r.token.byteValue = append(unescapedData, data...) 303 r.token.byteValueCloned = true 304 } 305 return 306} 307 308// getu4 decodes \uXXXX from the beginning of s, returning the hex value, 309// or it returns -1. 310func getu4(s []byte) rune { 311 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { 312 return -1 313 } 314 var val rune 315 for i := 2; i < len(s) && i < 6; i++ { 316 var v byte 317 c := s[i] 318 switch c { 319 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 320 v = c - '0' 321 case 'a', 'b', 'c', 'd', 'e', 'f': 322 v = c - 'a' + 10 323 case 'A', 'B', 'C', 'D', 'E', 'F': 324 v = c - 'A' + 10 325 default: 326 return -1 327 } 328 329 val <<= 4 330 val |= rune(v) 331 } 332 return val 333} 334 335// decodeEscape processes a single escape sequence and returns number of bytes processed. 336func decodeEscape(data []byte) (decoded rune, bytesProcessed int, err error) { 337 if len(data) < 2 { 338 return 0, 0, errors.New("incorrect escape symbol \\ at the end of token") 339 } 340 341 c := data[1] 342 switch c { 343 case '"', '/', '\\': 344 return rune(c), 2, nil 345 case 'b': 346 return '\b', 2, nil 347 case 'f': 348 return '\f', 2, nil 349 case 'n': 350 return '\n', 2, nil 351 case 'r': 352 return '\r', 2, nil 353 case 't': 354 return '\t', 2, nil 355 case 'u': 356 rr := getu4(data) 357 if rr < 0 { 358 return 0, 0, errors.New("incorrectly escaped \\uXXXX sequence") 359 } 360 361 read := 6 362 if utf16.IsSurrogate(rr) { 363 rr1 := getu4(data[read:]) 364 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { 365 read += 6 366 rr = dec 367 } else { 368 rr = unicode.ReplacementChar 369 } 370 } 371 return rr, read, nil 372 } 373 374 return 0, 0, errors.New("incorrectly escaped bytes") 375} 376 377// fetchString scans a string literal token. 378func (r *Lexer) fetchString() { 379 r.pos++ 380 data := r.Data[r.pos:] 381 382 isValid, length := findStringLen(data) 383 if !isValid { 384 r.pos += length 385 r.errParse("unterminated string literal") 386 return 387 } 388 r.token.byteValue = data[:length] 389 r.pos += length + 1 // skip closing '"' as well 390} 391 392// scanToken scans the next token if no token is currently available in the lexer. 393func (r *Lexer) scanToken() { 394 if r.token.kind != tokenUndef || r.fatalError != nil { 395 return 396 } 397 398 r.FetchToken() 399} 400 401// consume resets the current token to allow scanning the next one. 402func (r *Lexer) consume() { 403 r.token.kind = tokenUndef 404 r.token.delimValue = 0 405} 406 407// Ok returns true if no error (including io.EOF) was encountered during scanning. 408func (r *Lexer) Ok() bool { 409 return r.fatalError == nil 410} 411 412const maxErrorContextLen = 13 413 414func (r *Lexer) errParse(what string) { 415 if r.fatalError == nil { 416 var str string 417 if len(r.Data)-r.pos <= maxErrorContextLen { 418 str = string(r.Data) 419 } else { 420 str = string(r.Data[r.pos:r.pos+maxErrorContextLen-3]) + "..." 421 } 422 r.fatalError = &LexerError{ 423 Reason: what, 424 Offset: r.pos, 425 Data: str, 426 } 427 } 428} 429 430func (r *Lexer) errSyntax() { 431 r.errParse("syntax error") 432} 433 434func (r *Lexer) errInvalidToken(expected string) { 435 if r.fatalError != nil { 436 return 437 } 438 if r.UseMultipleErrors { 439 r.pos = r.start 440 r.consume() 441 r.SkipRecursive() 442 switch expected { 443 case "[": 444 r.token.delimValue = ']' 445 r.token.kind = tokenDelim 446 case "{": 447 r.token.delimValue = '}' 448 r.token.kind = tokenDelim 449 } 450 r.addNonfatalError(&LexerError{ 451 Reason: fmt.Sprintf("expected %s", expected), 452 Offset: r.start, 453 Data: string(r.Data[r.start:r.pos]), 454 }) 455 return 456 } 457 458 var str string 459 if len(r.token.byteValue) <= maxErrorContextLen { 460 str = string(r.token.byteValue) 461 } else { 462 str = string(r.token.byteValue[:maxErrorContextLen-3]) + "..." 463 } 464 r.fatalError = &LexerError{ 465 Reason: fmt.Sprintf("expected %s", expected), 466 Offset: r.pos, 467 Data: str, 468 } 469} 470 471func (r *Lexer) GetPos() int { 472 return r.pos 473} 474 475// Delim consumes a token and verifies that it is the given delimiter. 476func (r *Lexer) Delim(c byte) { 477 if r.token.kind == tokenUndef && r.Ok() { 478 r.FetchToken() 479 } 480 481 if !r.Ok() || r.token.delimValue != c { 482 r.consume() // errInvalidToken can change token if UseMultipleErrors is enabled. 483 r.errInvalidToken(string([]byte{c})) 484 } else { 485 r.consume() 486 } 487} 488 489// IsDelim returns true if there was no scanning error and next token is the given delimiter. 490func (r *Lexer) IsDelim(c byte) bool { 491 if r.token.kind == tokenUndef && r.Ok() { 492 r.FetchToken() 493 } 494 return !r.Ok() || r.token.delimValue == c 495} 496 497// Null verifies that the next token is null and consumes it. 498func (r *Lexer) Null() { 499 if r.token.kind == tokenUndef && r.Ok() { 500 r.FetchToken() 501 } 502 if !r.Ok() || r.token.kind != tokenNull { 503 r.errInvalidToken("null") 504 } 505 r.consume() 506} 507 508// IsNull returns true if the next token is a null keyword. 509func (r *Lexer) IsNull() bool { 510 if r.token.kind == tokenUndef && r.Ok() { 511 r.FetchToken() 512 } 513 return r.Ok() && r.token.kind == tokenNull 514} 515 516// Skip skips a single token. 517func (r *Lexer) Skip() { 518 if r.token.kind == tokenUndef && r.Ok() { 519 r.FetchToken() 520 } 521 r.consume() 522} 523 524// SkipRecursive skips next array or object completely, or just skips a single token if not 525// an array/object. 526// 527// Note: no syntax validation is performed on the skipped data. 528func (r *Lexer) SkipRecursive() { 529 r.scanToken() 530 var start, end byte 531 532 switch r.token.delimValue { 533 case '{': 534 start, end = '{', '}' 535 case '[': 536 start, end = '[', ']' 537 default: 538 r.consume() 539 return 540 } 541 542 r.consume() 543 544 level := 1 545 inQuotes := false 546 wasEscape := false 547 548 for i, c := range r.Data[r.pos:] { 549 switch { 550 case c == start && !inQuotes: 551 level++ 552 case c == end && !inQuotes: 553 level-- 554 if level == 0 { 555 r.pos += i + 1 556 return 557 } 558 case c == '\\' && inQuotes: 559 wasEscape = !wasEscape 560 continue 561 case c == '"' && inQuotes: 562 inQuotes = wasEscape 563 case c == '"': 564 inQuotes = true 565 } 566 wasEscape = false 567 } 568 r.pos = len(r.Data) 569 r.fatalError = &LexerError{ 570 Reason: "EOF reached while skipping array/object or token", 571 Offset: r.pos, 572 Data: string(r.Data[r.pos:]), 573 } 574} 575 576// Raw fetches the next item recursively as a data slice 577func (r *Lexer) Raw() []byte { 578 r.SkipRecursive() 579 if !r.Ok() { 580 return nil 581 } 582 return r.Data[r.start:r.pos] 583} 584 585// IsStart returns whether the lexer is positioned at the start 586// of an input string. 587func (r *Lexer) IsStart() bool { 588 return r.pos == 0 589} 590 591// Consumed reads all remaining bytes from the input, publishing an error if 592// there is anything but whitespace remaining. 593func (r *Lexer) Consumed() { 594 if r.pos > len(r.Data) || !r.Ok() { 595 return 596 } 597 598 for _, c := range r.Data[r.pos:] { 599 if c != ' ' && c != '\t' && c != '\r' && c != '\n' { 600 r.AddError(&LexerError{ 601 Reason: "invalid character '" + string(c) + "' after top-level value", 602 Offset: r.pos, 603 Data: string(r.Data[r.pos:]), 604 }) 605 return 606 } 607 608 r.pos++ 609 r.start++ 610 } 611} 612 613func (r *Lexer) unsafeString(skipUnescape bool) (string, []byte) { 614 if r.token.kind == tokenUndef && r.Ok() { 615 r.FetchToken() 616 } 617 if !r.Ok() || r.token.kind != tokenString { 618 r.errInvalidToken("string") 619 return "", nil 620 } 621 if !skipUnescape { 622 if err := r.unescapeStringToken(); err != nil { 623 r.errInvalidToken("string") 624 return "", nil 625 } 626 } 627 628 bytes := r.token.byteValue 629 ret := bytesToStr(r.token.byteValue) 630 r.consume() 631 return ret, bytes 632} 633 634// UnsafeString returns the string value if the token is a string literal. 635// 636// Warning: returned string may point to the input buffer, so the string should not outlive 637// the input buffer. Intended pattern of usage is as an argument to a switch statement. 638func (r *Lexer) UnsafeString() string { 639 ret, _ := r.unsafeString(false) 640 return ret 641} 642 643// UnsafeBytes returns the byte slice if the token is a string literal. 644func (r *Lexer) UnsafeBytes() []byte { 645 _, ret := r.unsafeString(false) 646 return ret 647} 648 649// UnsafeFieldName returns current member name string token 650func (r *Lexer) UnsafeFieldName(skipUnescape bool) string { 651 ret, _ := r.unsafeString(skipUnescape) 652 return ret 653} 654 655// String reads a string literal. 656func (r *Lexer) String() string { 657 if r.token.kind == tokenUndef && r.Ok() { 658 r.FetchToken() 659 } 660 if !r.Ok() || r.token.kind != tokenString { 661 r.errInvalidToken("string") 662 return "" 663 } 664 if err := r.unescapeStringToken(); err != nil { 665 r.errInvalidToken("string") 666 return "" 667 } 668 var ret string 669 if r.token.byteValueCloned { 670 ret = bytesToStr(r.token.byteValue) 671 } else { 672 ret = string(r.token.byteValue) 673 } 674 r.consume() 675 return ret 676} 677 678// StringIntern reads a string literal, and performs string interning on it. 679func (r *Lexer) StringIntern() string { 680 if r.token.kind == tokenUndef && r.Ok() { 681 r.FetchToken() 682 } 683 if !r.Ok() || r.token.kind != tokenString { 684 r.errInvalidToken("string") 685 return "" 686 } 687 if err := r.unescapeStringToken(); err != nil { 688 r.errInvalidToken("string") 689 return "" 690 } 691 ret := intern.Bytes(r.token.byteValue) 692 r.consume() 693 return ret 694} 695 696// Bytes reads a string literal and base64 decodes it into a byte slice. 697func (r *Lexer) Bytes() []byte { 698 if r.token.kind == tokenUndef && r.Ok() { 699 r.FetchToken() 700 } 701 if !r.Ok() || r.token.kind != tokenString { 702 r.errInvalidToken("string") 703 return nil 704 } 705 ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue))) 706 n, err := base64.StdEncoding.Decode(ret, r.token.byteValue) 707 if err != nil { 708 r.fatalError = &LexerError{ 709 Reason: err.Error(), 710 } 711 return nil 712 } 713 714 r.consume() 715 return ret[:n] 716} 717 718// Bool reads a true or false boolean keyword. 719func (r *Lexer) Bool() bool { 720 if r.token.kind == tokenUndef && r.Ok() { 721 r.FetchToken() 722 } 723 if !r.Ok() || r.token.kind != tokenBool { 724 r.errInvalidToken("bool") 725 return false 726 } 727 ret := r.token.boolValue 728 r.consume() 729 return ret 730} 731 732func (r *Lexer) number() string { 733 if r.token.kind == tokenUndef && r.Ok() { 734 r.FetchToken() 735 } 736 if !r.Ok() || r.token.kind != tokenNumber { 737 r.errInvalidToken("number") 738 return "" 739 } 740 ret := bytesToStr(r.token.byteValue) 741 r.consume() 742 return ret 743} 744 745func (r *Lexer) Uint8() uint8 { 746 s := r.number() 747 if !r.Ok() { 748 return 0 749 } 750 751 n, err := strconv.ParseUint(s, 10, 8) 752 if err != nil { 753 r.addNonfatalError(&LexerError{ 754 Offset: r.start, 755 Reason: err.Error(), 756 Data: s, 757 }) 758 } 759 return uint8(n) 760} 761 762func (r *Lexer) Uint16() uint16 { 763 s := r.number() 764 if !r.Ok() { 765 return 0 766 } 767 768 n, err := strconv.ParseUint(s, 10, 16) 769 if err != nil { 770 r.addNonfatalError(&LexerError{ 771 Offset: r.start, 772 Reason: err.Error(), 773 Data: s, 774 }) 775 } 776 return uint16(n) 777} 778 779func (r *Lexer) Uint32() uint32 { 780 s := r.number() 781 if !r.Ok() { 782 return 0 783 } 784 785 n, err := strconv.ParseUint(s, 10, 32) 786 if err != nil { 787 r.addNonfatalError(&LexerError{ 788 Offset: r.start, 789 Reason: err.Error(), 790 Data: s, 791 }) 792 } 793 return uint32(n) 794} 795 796func (r *Lexer) Uint64() uint64 { 797 s := r.number() 798 if !r.Ok() { 799 return 0 800 } 801 802 n, err := strconv.ParseUint(s, 10, 64) 803 if err != nil { 804 r.addNonfatalError(&LexerError{ 805 Offset: r.start, 806 Reason: err.Error(), 807 Data: s, 808 }) 809 } 810 return n 811} 812 813func (r *Lexer) Uint() uint { 814 return uint(r.Uint64()) 815} 816 817func (r *Lexer) Int8() int8 { 818 s := r.number() 819 if !r.Ok() { 820 return 0 821 } 822 823 n, err := strconv.ParseInt(s, 10, 8) 824 if err != nil { 825 r.addNonfatalError(&LexerError{ 826 Offset: r.start, 827 Reason: err.Error(), 828 Data: s, 829 }) 830 } 831 return int8(n) 832} 833 834func (r *Lexer) Int16() int16 { 835 s := r.number() 836 if !r.Ok() { 837 return 0 838 } 839 840 n, err := strconv.ParseInt(s, 10, 16) 841 if err != nil { 842 r.addNonfatalError(&LexerError{ 843 Offset: r.start, 844 Reason: err.Error(), 845 Data: s, 846 }) 847 } 848 return int16(n) 849} 850 851func (r *Lexer) Int32() int32 { 852 s := r.number() 853 if !r.Ok() { 854 return 0 855 } 856 857 n, err := strconv.ParseInt(s, 10, 32) 858 if err != nil { 859 r.addNonfatalError(&LexerError{ 860 Offset: r.start, 861 Reason: err.Error(), 862 Data: s, 863 }) 864 } 865 return int32(n) 866} 867 868func (r *Lexer) Int64() int64 { 869 s := r.number() 870 if !r.Ok() { 871 return 0 872 } 873 874 n, err := strconv.ParseInt(s, 10, 64) 875 if err != nil { 876 r.addNonfatalError(&LexerError{ 877 Offset: r.start, 878 Reason: err.Error(), 879 Data: s, 880 }) 881 } 882 return n 883} 884 885func (r *Lexer) Int() int { 886 return int(r.Int64()) 887} 888 889func (r *Lexer) Uint8Str() uint8 { 890 s, b := r.unsafeString(false) 891 if !r.Ok() { 892 return 0 893 } 894 895 n, err := strconv.ParseUint(s, 10, 8) 896 if err != nil { 897 r.addNonfatalError(&LexerError{ 898 Offset: r.start, 899 Reason: err.Error(), 900 Data: string(b), 901 }) 902 } 903 return uint8(n) 904} 905 906func (r *Lexer) Uint16Str() uint16 { 907 s, b := r.unsafeString(false) 908 if !r.Ok() { 909 return 0 910 } 911 912 n, err := strconv.ParseUint(s, 10, 16) 913 if err != nil { 914 r.addNonfatalError(&LexerError{ 915 Offset: r.start, 916 Reason: err.Error(), 917 Data: string(b), 918 }) 919 } 920 return uint16(n) 921} 922 923func (r *Lexer) Uint32Str() uint32 { 924 s, b := r.unsafeString(false) 925 if !r.Ok() { 926 return 0 927 } 928 929 n, err := strconv.ParseUint(s, 10, 32) 930 if err != nil { 931 r.addNonfatalError(&LexerError{ 932 Offset: r.start, 933 Reason: err.Error(), 934 Data: string(b), 935 }) 936 } 937 return uint32(n) 938} 939 940func (r *Lexer) Uint64Str() uint64 { 941 s, b := r.unsafeString(false) 942 if !r.Ok() { 943 return 0 944 } 945 946 n, err := strconv.ParseUint(s, 10, 64) 947 if err != nil { 948 r.addNonfatalError(&LexerError{ 949 Offset: r.start, 950 Reason: err.Error(), 951 Data: string(b), 952 }) 953 } 954 return n 955} 956 957func (r *Lexer) UintStr() uint { 958 return uint(r.Uint64Str()) 959} 960 961func (r *Lexer) UintptrStr() uintptr { 962 return uintptr(r.Uint64Str()) 963} 964 965func (r *Lexer) Int8Str() int8 { 966 s, b := r.unsafeString(false) 967 if !r.Ok() { 968 return 0 969 } 970 971 n, err := strconv.ParseInt(s, 10, 8) 972 if err != nil { 973 r.addNonfatalError(&LexerError{ 974 Offset: r.start, 975 Reason: err.Error(), 976 Data: string(b), 977 }) 978 } 979 return int8(n) 980} 981 982func (r *Lexer) Int16Str() int16 { 983 s, b := r.unsafeString(false) 984 if !r.Ok() { 985 return 0 986 } 987 988 n, err := strconv.ParseInt(s, 10, 16) 989 if err != nil { 990 r.addNonfatalError(&LexerError{ 991 Offset: r.start, 992 Reason: err.Error(), 993 Data: string(b), 994 }) 995 } 996 return int16(n) 997} 998 999func (r *Lexer) Int32Str() int32 { 1000 s, b := r.unsafeString(false) 1001 if !r.Ok() { 1002 return 0 1003 } 1004 1005 n, err := strconv.ParseInt(s, 10, 32) 1006 if err != nil { 1007 r.addNonfatalError(&LexerError{ 1008 Offset: r.start, 1009 Reason: err.Error(), 1010 Data: string(b), 1011 }) 1012 } 1013 return int32(n) 1014} 1015 1016func (r *Lexer) Int64Str() int64 { 1017 s, b := r.unsafeString(false) 1018 if !r.Ok() { 1019 return 0 1020 } 1021 1022 n, err := strconv.ParseInt(s, 10, 64) 1023 if err != nil { 1024 r.addNonfatalError(&LexerError{ 1025 Offset: r.start, 1026 Reason: err.Error(), 1027 Data: string(b), 1028 }) 1029 } 1030 return n 1031} 1032 1033func (r *Lexer) IntStr() int { 1034 return int(r.Int64Str()) 1035} 1036 1037func (r *Lexer) Float32() float32 { 1038 s := r.number() 1039 if !r.Ok() { 1040 return 0 1041 } 1042 1043 n, err := strconv.ParseFloat(s, 32) 1044 if err != nil { 1045 r.addNonfatalError(&LexerError{ 1046 Offset: r.start, 1047 Reason: err.Error(), 1048 Data: s, 1049 }) 1050 } 1051 return float32(n) 1052} 1053 1054func (r *Lexer) Float32Str() float32 { 1055 s, b := r.unsafeString(false) 1056 if !r.Ok() { 1057 return 0 1058 } 1059 n, err := strconv.ParseFloat(s, 32) 1060 if err != nil { 1061 r.addNonfatalError(&LexerError{ 1062 Offset: r.start, 1063 Reason: err.Error(), 1064 Data: string(b), 1065 }) 1066 } 1067 return float32(n) 1068} 1069 1070func (r *Lexer) Float64() float64 { 1071 s := r.number() 1072 if !r.Ok() { 1073 return 0 1074 } 1075 1076 n, err := strconv.ParseFloat(s, 64) 1077 if err != nil { 1078 r.addNonfatalError(&LexerError{ 1079 Offset: r.start, 1080 Reason: err.Error(), 1081 Data: s, 1082 }) 1083 } 1084 return n 1085} 1086 1087func (r *Lexer) Float64Str() float64 { 1088 s, b := r.unsafeString(false) 1089 if !r.Ok() { 1090 return 0 1091 } 1092 n, err := strconv.ParseFloat(s, 64) 1093 if err != nil { 1094 r.addNonfatalError(&LexerError{ 1095 Offset: r.start, 1096 Reason: err.Error(), 1097 Data: string(b), 1098 }) 1099 } 1100 return n 1101} 1102 1103func (r *Lexer) Error() error { 1104 return r.fatalError 1105} 1106 1107func (r *Lexer) AddError(e error) { 1108 if r.fatalError == nil { 1109 r.fatalError = e 1110 } 1111} 1112 1113func (r *Lexer) AddNonFatalError(e error) { 1114 r.addNonfatalError(&LexerError{ 1115 Offset: r.start, 1116 Data: string(r.Data[r.start:r.pos]), 1117 Reason: e.Error(), 1118 }) 1119} 1120 1121func (r *Lexer) addNonfatalError(err *LexerError) { 1122 if r.UseMultipleErrors { 1123 // We don't want to add errors with the same offset. 1124 if len(r.multipleErrors) != 0 && r.multipleErrors[len(r.multipleErrors)-1].Offset == err.Offset { 1125 return 1126 } 1127 r.multipleErrors = append(r.multipleErrors, err) 1128 return 1129 } 1130 r.fatalError = err 1131} 1132 1133func (r *Lexer) GetNonFatalErrors() []*LexerError { 1134 return r.multipleErrors 1135} 1136 1137// JsonNumber fetches and json.Number from 'encoding/json' package. 1138// Both int, float or string, contains them are valid values 1139func (r *Lexer) JsonNumber() json.Number { 1140 if r.token.kind == tokenUndef && r.Ok() { 1141 r.FetchToken() 1142 } 1143 if !r.Ok() { 1144 r.errInvalidToken("json.Number") 1145 return json.Number("") 1146 } 1147 1148 switch r.token.kind { 1149 case tokenString: 1150 return json.Number(r.String()) 1151 case tokenNumber: 1152 return json.Number(r.Raw()) 1153 case tokenNull: 1154 r.Null() 1155 return json.Number("") 1156 default: 1157 r.errSyntax() 1158 return json.Number("") 1159 } 1160} 1161 1162// Interface fetches an interface{} analogous to the 'encoding/json' package. 1163func (r *Lexer) Interface() interface{} { 1164 if r.token.kind == tokenUndef && r.Ok() { 1165 r.FetchToken() 1166 } 1167 1168 if !r.Ok() { 1169 return nil 1170 } 1171 switch r.token.kind { 1172 case tokenString: 1173 return r.String() 1174 case tokenNumber: 1175 return r.Float64() 1176 case tokenBool: 1177 return r.Bool() 1178 case tokenNull: 1179 r.Null() 1180 return nil 1181 } 1182 1183 if r.token.delimValue == '{' { 1184 r.consume() 1185 1186 ret := map[string]interface{}{} 1187 for !r.IsDelim('}') { 1188 key := r.String() 1189 r.WantColon() 1190 ret[key] = r.Interface() 1191 r.WantComma() 1192 } 1193 r.Delim('}') 1194 1195 if r.Ok() { 1196 return ret 1197 } else { 1198 return nil 1199 } 1200 } else if r.token.delimValue == '[' { 1201 r.consume() 1202 1203 ret := []interface{}{} 1204 for !r.IsDelim(']') { 1205 ret = append(ret, r.Interface()) 1206 r.WantComma() 1207 } 1208 r.Delim(']') 1209 1210 if r.Ok() { 1211 return ret 1212 } else { 1213 return nil 1214 } 1215 } 1216 r.errSyntax() 1217 return nil 1218} 1219 1220// WantComma requires a comma to be present before fetching next token. 1221func (r *Lexer) WantComma() { 1222 r.wantSep = ',' 1223 r.firstElement = false 1224} 1225 1226// WantColon requires a colon to be present before fetching next token. 1227func (r *Lexer) WantColon() { 1228 r.wantSep = ':' 1229 r.firstElement = false 1230} 1231