1// Package jlexer contains a JSON lexer implementation. 2// 3// It is expected that it is mostly used with generated parser code, so the interface is tuned 4// for a parser that knows what kind of data is expected. 5package jlexer 6 7import ( 8 "encoding/base64" 9 "encoding/json" 10 "errors" 11 "fmt" 12 "io" 13 "strconv" 14 "unicode" 15 "unicode/utf16" 16 "unicode/utf8" 17) 18 19// tokenKind determines type of a token. 20type tokenKind byte 21 22const ( 23 tokenUndef tokenKind = iota // No token. 24 tokenDelim // Delimiter: one of '{', '}', '[' or ']'. 25 tokenString // A string literal, e.g. "abc\u1234" 26 tokenNumber // Number literal, e.g. 1.5e5 27 tokenBool // Boolean literal: true or false. 28 tokenNull // null keyword. 29) 30 31// token describes a single token: type, position in the input and value. 32type token struct { 33 kind tokenKind // Type of a token. 34 35 boolValue bool // Value if a boolean literal token. 36 byteValue []byte // Raw value of a token. 37 delimValue byte 38} 39 40// Lexer is a JSON lexer: it iterates over JSON tokens in a byte slice. 41type Lexer struct { 42 Data []byte // Input data given to the lexer. 43 44 start int // Start of the current token. 45 pos int // Current unscanned position in the input stream. 46 token token // Last scanned token, if token.kind != tokenUndef. 47 48 firstElement bool // Whether current element is the first in array or an object. 49 wantSep byte // A comma or a colon character, which need to occur before a token. 50 51 UseMultipleErrors bool // If we want to use multiple errors. 52 fatalError error // Fatal error occurred during lexing. It is usually a syntax error. 53 multipleErrors []*LexerError // Semantic errors occurred during lexing. Marshalling will be continued after finding this errors. 54} 55 56// FetchToken scans the input for the next token. 57func (r *Lexer) FetchToken() { 58 r.token.kind = tokenUndef 59 r.start = r.pos 60 61 // Check if r.Data has r.pos element 62 // If it doesn't, it mean corrupted input data 63 if len(r.Data) < r.pos { 64 r.errParse("Unexpected end of data") 65 return 66 } 67 // Determine the type of a token by skipping whitespace and reading the 68 // first character. 69 for _, c := range r.Data[r.pos:] { 70 switch c { 71 case ':', ',': 72 if r.wantSep == c { 73 r.pos++ 74 r.start++ 75 r.wantSep = 0 76 } else { 77 r.errSyntax() 78 } 79 80 case ' ', '\t', '\r', '\n': 81 r.pos++ 82 r.start++ 83 84 case '"': 85 if r.wantSep != 0 { 86 r.errSyntax() 87 } 88 89 r.token.kind = tokenString 90 r.fetchString() 91 return 92 93 case '{', '[': 94 if r.wantSep != 0 { 95 r.errSyntax() 96 } 97 r.firstElement = true 98 r.token.kind = tokenDelim 99 r.token.delimValue = r.Data[r.pos] 100 r.pos++ 101 return 102 103 case '}', ']': 104 if !r.firstElement && (r.wantSep != ',') { 105 r.errSyntax() 106 } 107 r.wantSep = 0 108 r.token.kind = tokenDelim 109 r.token.delimValue = r.Data[r.pos] 110 r.pos++ 111 return 112 113 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': 114 if r.wantSep != 0 { 115 r.errSyntax() 116 } 117 r.token.kind = tokenNumber 118 r.fetchNumber() 119 return 120 121 case 'n': 122 if r.wantSep != 0 { 123 r.errSyntax() 124 } 125 126 r.token.kind = tokenNull 127 r.fetchNull() 128 return 129 130 case 't': 131 if r.wantSep != 0 { 132 r.errSyntax() 133 } 134 135 r.token.kind = tokenBool 136 r.token.boolValue = true 137 r.fetchTrue() 138 return 139 140 case 'f': 141 if r.wantSep != 0 { 142 r.errSyntax() 143 } 144 145 r.token.kind = tokenBool 146 r.token.boolValue = false 147 r.fetchFalse() 148 return 149 150 default: 151 r.errSyntax() 152 return 153 } 154 } 155 r.fatalError = io.EOF 156 return 157} 158 159// isTokenEnd returns true if the char can follow a non-delimiter token 160func isTokenEnd(c byte) bool { 161 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '[' || c == ']' || c == '{' || c == '}' || c == ',' || c == ':' 162} 163 164// fetchNull fetches and checks remaining bytes of null keyword. 165func (r *Lexer) fetchNull() { 166 r.pos += 4 167 if r.pos > len(r.Data) || 168 r.Data[r.pos-3] != 'u' || 169 r.Data[r.pos-2] != 'l' || 170 r.Data[r.pos-1] != 'l' || 171 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) { 172 173 r.pos -= 4 174 r.errSyntax() 175 } 176} 177 178// fetchTrue fetches and checks remaining bytes of true keyword. 179func (r *Lexer) fetchTrue() { 180 r.pos += 4 181 if r.pos > len(r.Data) || 182 r.Data[r.pos-3] != 'r' || 183 r.Data[r.pos-2] != 'u' || 184 r.Data[r.pos-1] != 'e' || 185 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) { 186 187 r.pos -= 4 188 r.errSyntax() 189 } 190} 191 192// fetchFalse fetches and checks remaining bytes of false keyword. 193func (r *Lexer) fetchFalse() { 194 r.pos += 5 195 if r.pos > len(r.Data) || 196 r.Data[r.pos-4] != 'a' || 197 r.Data[r.pos-3] != 'l' || 198 r.Data[r.pos-2] != 's' || 199 r.Data[r.pos-1] != 'e' || 200 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) { 201 202 r.pos -= 5 203 r.errSyntax() 204 } 205} 206 207// fetchNumber scans a number literal token. 208func (r *Lexer) fetchNumber() { 209 hasE := false 210 afterE := false 211 hasDot := false 212 213 r.pos++ 214 for i, c := range r.Data[r.pos:] { 215 switch { 216 case c >= '0' && c <= '9': 217 afterE = false 218 case c == '.' && !hasDot: 219 hasDot = true 220 case (c == 'e' || c == 'E') && !hasE: 221 hasE = true 222 hasDot = true 223 afterE = true 224 case (c == '+' || c == '-') && afterE: 225 afterE = false 226 default: 227 r.pos += i 228 if !isTokenEnd(c) { 229 r.errSyntax() 230 } else { 231 r.token.byteValue = r.Data[r.start:r.pos] 232 } 233 return 234 } 235 } 236 237 r.pos = len(r.Data) 238 r.token.byteValue = r.Data[r.start:] 239} 240 241// findStringLen tries to scan into the string literal for ending quote char to determine required size. 242// The size will be exact if no escapes are present and may be inexact if there are escaped chars. 243func findStringLen(data []byte) (isValid, hasEscapes bool, length int) { 244 delta := 0 245 246 for i := 0; i < len(data); i++ { 247 switch data[i] { 248 case '\\': 249 i++ 250 delta++ 251 if i < len(data) && data[i] == 'u' { 252 delta++ 253 } 254 case '"': 255 return true, (delta > 0), (i - delta) 256 } 257 } 258 259 return false, false, len(data) 260} 261 262// getu4 decodes \uXXXX from the beginning of s, returning the hex value, 263// or it returns -1. 264func getu4(s []byte) rune { 265 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { 266 return -1 267 } 268 var val rune 269 for i := 2; i < len(s) && i < 6; i++ { 270 var v byte 271 c := s[i] 272 switch c { 273 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 274 v = c - '0' 275 case 'a', 'b', 'c', 'd', 'e', 'f': 276 v = c - 'a' + 10 277 case 'A', 'B', 'C', 'D', 'E', 'F': 278 v = c - 'A' + 10 279 default: 280 return -1 281 } 282 283 val <<= 4 284 val |= rune(v) 285 } 286 return val 287} 288 289// processEscape processes a single escape sequence and returns number of bytes processed. 290func (r *Lexer) processEscape(data []byte) (int, error) { 291 if len(data) < 2 { 292 return 0, fmt.Errorf("syntax error at %v", string(data)) 293 } 294 295 c := data[1] 296 switch c { 297 case '"', '/', '\\': 298 r.token.byteValue = append(r.token.byteValue, c) 299 return 2, nil 300 case 'b': 301 r.token.byteValue = append(r.token.byteValue, '\b') 302 return 2, nil 303 case 'f': 304 r.token.byteValue = append(r.token.byteValue, '\f') 305 return 2, nil 306 case 'n': 307 r.token.byteValue = append(r.token.byteValue, '\n') 308 return 2, nil 309 case 'r': 310 r.token.byteValue = append(r.token.byteValue, '\r') 311 return 2, nil 312 case 't': 313 r.token.byteValue = append(r.token.byteValue, '\t') 314 return 2, nil 315 case 'u': 316 rr := getu4(data) 317 if rr < 0 { 318 return 0, errors.New("syntax error") 319 } 320 321 read := 6 322 if utf16.IsSurrogate(rr) { 323 rr1 := getu4(data[read:]) 324 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { 325 read += 6 326 rr = dec 327 } else { 328 rr = unicode.ReplacementChar 329 } 330 } 331 var d [4]byte 332 s := utf8.EncodeRune(d[:], rr) 333 r.token.byteValue = append(r.token.byteValue, d[:s]...) 334 return read, nil 335 } 336 337 return 0, errors.New("syntax error") 338} 339 340// fetchString scans a string literal token. 341func (r *Lexer) fetchString() { 342 r.pos++ 343 data := r.Data[r.pos:] 344 345 isValid, hasEscapes, length := findStringLen(data) 346 if !isValid { 347 r.pos += length 348 r.errParse("unterminated string literal") 349 return 350 } 351 if !hasEscapes { 352 r.token.byteValue = data[:length] 353 r.pos += length + 1 354 return 355 } 356 357 r.token.byteValue = make([]byte, 0, length) 358 p := 0 359 for i := 0; i < len(data); { 360 switch data[i] { 361 case '"': 362 r.pos += i + 1 363 r.token.byteValue = append(r.token.byteValue, data[p:i]...) 364 i++ 365 return 366 367 case '\\': 368 r.token.byteValue = append(r.token.byteValue, data[p:i]...) 369 off, err := r.processEscape(data[i:]) 370 if err != nil { 371 r.errParse(err.Error()) 372 return 373 } 374 i += off 375 p = i 376 377 default: 378 i++ 379 } 380 } 381 r.errParse("unterminated string literal") 382} 383 384// scanToken scans the next token if no token is currently available in the lexer. 385func (r *Lexer) scanToken() { 386 if r.token.kind != tokenUndef || r.fatalError != nil { 387 return 388 } 389 390 r.FetchToken() 391} 392 393// consume resets the current token to allow scanning the next one. 394func (r *Lexer) consume() { 395 r.token.kind = tokenUndef 396 r.token.delimValue = 0 397} 398 399// Ok returns true if no error (including io.EOF) was encountered during scanning. 400func (r *Lexer) Ok() bool { 401 return r.fatalError == nil 402} 403 404const maxErrorContextLen = 13 405 406func (r *Lexer) errParse(what string) { 407 if r.fatalError == nil { 408 var str string 409 if len(r.Data)-r.pos <= maxErrorContextLen { 410 str = string(r.Data) 411 } else { 412 str = string(r.Data[r.pos:r.pos+maxErrorContextLen-3]) + "..." 413 } 414 r.fatalError = &LexerError{ 415 Reason: what, 416 Offset: r.pos, 417 Data: str, 418 } 419 } 420} 421 422func (r *Lexer) errSyntax() { 423 r.errParse("syntax error") 424} 425 426func (r *Lexer) errInvalidToken(expected string) { 427 if r.fatalError != nil { 428 return 429 } 430 if r.UseMultipleErrors { 431 r.pos = r.start 432 r.consume() 433 r.SkipRecursive() 434 switch expected { 435 case "[": 436 r.token.delimValue = ']' 437 r.token.kind = tokenDelim 438 case "{": 439 r.token.delimValue = '}' 440 r.token.kind = tokenDelim 441 } 442 r.addNonfatalError(&LexerError{ 443 Reason: fmt.Sprintf("expected %s", expected), 444 Offset: r.start, 445 Data: string(r.Data[r.start:r.pos]), 446 }) 447 return 448 } 449 450 var str string 451 if len(r.token.byteValue) <= maxErrorContextLen { 452 str = string(r.token.byteValue) 453 } else { 454 str = string(r.token.byteValue[:maxErrorContextLen-3]) + "..." 455 } 456 r.fatalError = &LexerError{ 457 Reason: fmt.Sprintf("expected %s", expected), 458 Offset: r.pos, 459 Data: str, 460 } 461} 462 463func (r *Lexer) GetPos() int { 464 return r.pos 465} 466 467// Delim consumes a token and verifies that it is the given delimiter. 468func (r *Lexer) Delim(c byte) { 469 if r.token.kind == tokenUndef && r.Ok() { 470 r.FetchToken() 471 } 472 473 if !r.Ok() || r.token.delimValue != c { 474 r.consume() // errInvalidToken can change token if UseMultipleErrors is enabled. 475 r.errInvalidToken(string([]byte{c})) 476 } else { 477 r.consume() 478 } 479} 480 481// IsDelim returns true if there was no scanning error and next token is the given delimiter. 482func (r *Lexer) IsDelim(c byte) bool { 483 if r.token.kind == tokenUndef && r.Ok() { 484 r.FetchToken() 485 } 486 return !r.Ok() || r.token.delimValue == c 487} 488 489// Null verifies that the next token is null and consumes it. 490func (r *Lexer) Null() { 491 if r.token.kind == tokenUndef && r.Ok() { 492 r.FetchToken() 493 } 494 if !r.Ok() || r.token.kind != tokenNull { 495 r.errInvalidToken("null") 496 } 497 r.consume() 498} 499 500// IsNull returns true if the next token is a null keyword. 501func (r *Lexer) IsNull() bool { 502 if r.token.kind == tokenUndef && r.Ok() { 503 r.FetchToken() 504 } 505 return r.Ok() && r.token.kind == tokenNull 506} 507 508// Skip skips a single token. 509func (r *Lexer) Skip() { 510 if r.token.kind == tokenUndef && r.Ok() { 511 r.FetchToken() 512 } 513 r.consume() 514} 515 516// SkipRecursive skips next array or object completely, or just skips a single token if not 517// an array/object. 518// 519// Note: no syntax validation is performed on the skipped data. 520func (r *Lexer) SkipRecursive() { 521 r.scanToken() 522 var start, end byte 523 524 switch r.token.delimValue { 525 case '{': 526 start, end = '{', '}' 527 case '[': 528 start, end = '[', ']' 529 default: 530 r.consume() 531 return 532 } 533 534 r.consume() 535 536 level := 1 537 inQuotes := false 538 wasEscape := false 539 540 for i, c := range r.Data[r.pos:] { 541 switch { 542 case c == start && !inQuotes: 543 level++ 544 case c == end && !inQuotes: 545 level-- 546 if level == 0 { 547 r.pos += i + 1 548 return 549 } 550 case c == '\\' && inQuotes: 551 wasEscape = !wasEscape 552 continue 553 case c == '"' && inQuotes: 554 inQuotes = wasEscape 555 case c == '"': 556 inQuotes = true 557 } 558 wasEscape = false 559 } 560 r.pos = len(r.Data) 561 r.fatalError = &LexerError{ 562 Reason: "EOF reached while skipping array/object or token", 563 Offset: r.pos, 564 Data: string(r.Data[r.pos:]), 565 } 566} 567 568// Raw fetches the next item recursively as a data slice 569func (r *Lexer) Raw() []byte { 570 r.SkipRecursive() 571 if !r.Ok() { 572 return nil 573 } 574 return r.Data[r.start:r.pos] 575} 576 577// IsStart returns whether the lexer is positioned at the start 578// of an input string. 579func (r *Lexer) IsStart() bool { 580 return r.pos == 0 581} 582 583// Consumed reads all remaining bytes from the input, publishing an error if 584// there is anything but whitespace remaining. 585func (r *Lexer) Consumed() { 586 if r.pos > len(r.Data) || !r.Ok() { 587 return 588 } 589 590 for _, c := range r.Data[r.pos:] { 591 if c != ' ' && c != '\t' && c != '\r' && c != '\n' { 592 r.AddError(&LexerError{ 593 Reason: "invalid character '" + string(c) + "' after top-level value", 594 Offset: r.pos, 595 Data: string(r.Data[r.pos:]), 596 }) 597 return 598 } 599 600 r.pos++ 601 r.start++ 602 } 603} 604 605func (r *Lexer) unsafeString() (string, []byte) { 606 if r.token.kind == tokenUndef && r.Ok() { 607 r.FetchToken() 608 } 609 if !r.Ok() || r.token.kind != tokenString { 610 r.errInvalidToken("string") 611 return "", nil 612 } 613 bytes := r.token.byteValue 614 ret := bytesToStr(r.token.byteValue) 615 r.consume() 616 return ret, bytes 617} 618 619// UnsafeString returns the string value if the token is a string literal. 620// 621// Warning: returned string may point to the input buffer, so the string should not outlive 622// the input buffer. Intended pattern of usage is as an argument to a switch statement. 623func (r *Lexer) UnsafeString() string { 624 ret, _ := r.unsafeString() 625 return ret 626} 627 628// UnsafeBytes returns the byte slice if the token is a string literal. 629func (r *Lexer) UnsafeBytes() []byte { 630 _, ret := r.unsafeString() 631 return ret 632} 633 634// String reads a string literal. 635func (r *Lexer) String() string { 636 if r.token.kind == tokenUndef && r.Ok() { 637 r.FetchToken() 638 } 639 if !r.Ok() || r.token.kind != tokenString { 640 r.errInvalidToken("string") 641 return "" 642 } 643 ret := string(r.token.byteValue) 644 r.consume() 645 return ret 646} 647 648// Bytes reads a string literal and base64 decodes it into a byte slice. 649func (r *Lexer) Bytes() []byte { 650 if r.token.kind == tokenUndef && r.Ok() { 651 r.FetchToken() 652 } 653 if !r.Ok() || r.token.kind != tokenString { 654 r.errInvalidToken("string") 655 return nil 656 } 657 ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue))) 658 n, err := base64.StdEncoding.Decode(ret, r.token.byteValue) 659 if err != nil { 660 r.fatalError = &LexerError{ 661 Reason: err.Error(), 662 } 663 return nil 664 } 665 666 r.consume() 667 return ret[:n] 668} 669 670// Bool reads a true or false boolean keyword. 671func (r *Lexer) Bool() bool { 672 if r.token.kind == tokenUndef && r.Ok() { 673 r.FetchToken() 674 } 675 if !r.Ok() || r.token.kind != tokenBool { 676 r.errInvalidToken("bool") 677 return false 678 } 679 ret := r.token.boolValue 680 r.consume() 681 return ret 682} 683 684func (r *Lexer) number() string { 685 if r.token.kind == tokenUndef && r.Ok() { 686 r.FetchToken() 687 } 688 if !r.Ok() || r.token.kind != tokenNumber { 689 r.errInvalidToken("number") 690 return "" 691 } 692 ret := bytesToStr(r.token.byteValue) 693 r.consume() 694 return ret 695} 696 697func (r *Lexer) Uint8() uint8 { 698 s := r.number() 699 if !r.Ok() { 700 return 0 701 } 702 703 n, err := strconv.ParseUint(s, 10, 8) 704 if err != nil { 705 r.addNonfatalError(&LexerError{ 706 Offset: r.start, 707 Reason: err.Error(), 708 Data: s, 709 }) 710 } 711 return uint8(n) 712} 713 714func (r *Lexer) Uint16() uint16 { 715 s := r.number() 716 if !r.Ok() { 717 return 0 718 } 719 720 n, err := strconv.ParseUint(s, 10, 16) 721 if err != nil { 722 r.addNonfatalError(&LexerError{ 723 Offset: r.start, 724 Reason: err.Error(), 725 Data: s, 726 }) 727 } 728 return uint16(n) 729} 730 731func (r *Lexer) Uint32() uint32 { 732 s := r.number() 733 if !r.Ok() { 734 return 0 735 } 736 737 n, err := strconv.ParseUint(s, 10, 32) 738 if err != nil { 739 r.addNonfatalError(&LexerError{ 740 Offset: r.start, 741 Reason: err.Error(), 742 Data: s, 743 }) 744 } 745 return uint32(n) 746} 747 748func (r *Lexer) Uint64() uint64 { 749 s := r.number() 750 if !r.Ok() { 751 return 0 752 } 753 754 n, err := strconv.ParseUint(s, 10, 64) 755 if err != nil { 756 r.addNonfatalError(&LexerError{ 757 Offset: r.start, 758 Reason: err.Error(), 759 Data: s, 760 }) 761 } 762 return n 763} 764 765func (r *Lexer) Uint() uint { 766 return uint(r.Uint64()) 767} 768 769func (r *Lexer) Int8() int8 { 770 s := r.number() 771 if !r.Ok() { 772 return 0 773 } 774 775 n, err := strconv.ParseInt(s, 10, 8) 776 if err != nil { 777 r.addNonfatalError(&LexerError{ 778 Offset: r.start, 779 Reason: err.Error(), 780 Data: s, 781 }) 782 } 783 return int8(n) 784} 785 786func (r *Lexer) Int16() int16 { 787 s := r.number() 788 if !r.Ok() { 789 return 0 790 } 791 792 n, err := strconv.ParseInt(s, 10, 16) 793 if err != nil { 794 r.addNonfatalError(&LexerError{ 795 Offset: r.start, 796 Reason: err.Error(), 797 Data: s, 798 }) 799 } 800 return int16(n) 801} 802 803func (r *Lexer) Int32() int32 { 804 s := r.number() 805 if !r.Ok() { 806 return 0 807 } 808 809 n, err := strconv.ParseInt(s, 10, 32) 810 if err != nil { 811 r.addNonfatalError(&LexerError{ 812 Offset: r.start, 813 Reason: err.Error(), 814 Data: s, 815 }) 816 } 817 return int32(n) 818} 819 820func (r *Lexer) Int64() int64 { 821 s := r.number() 822 if !r.Ok() { 823 return 0 824 } 825 826 n, err := strconv.ParseInt(s, 10, 64) 827 if err != nil { 828 r.addNonfatalError(&LexerError{ 829 Offset: r.start, 830 Reason: err.Error(), 831 Data: s, 832 }) 833 } 834 return n 835} 836 837func (r *Lexer) Int() int { 838 return int(r.Int64()) 839} 840 841func (r *Lexer) Uint8Str() uint8 { 842 s, b := r.unsafeString() 843 if !r.Ok() { 844 return 0 845 } 846 847 n, err := strconv.ParseUint(s, 10, 8) 848 if err != nil { 849 r.addNonfatalError(&LexerError{ 850 Offset: r.start, 851 Reason: err.Error(), 852 Data: string(b), 853 }) 854 } 855 return uint8(n) 856} 857 858func (r *Lexer) Uint16Str() uint16 { 859 s, b := r.unsafeString() 860 if !r.Ok() { 861 return 0 862 } 863 864 n, err := strconv.ParseUint(s, 10, 16) 865 if err != nil { 866 r.addNonfatalError(&LexerError{ 867 Offset: r.start, 868 Reason: err.Error(), 869 Data: string(b), 870 }) 871 } 872 return uint16(n) 873} 874 875func (r *Lexer) Uint32Str() uint32 { 876 s, b := r.unsafeString() 877 if !r.Ok() { 878 return 0 879 } 880 881 n, err := strconv.ParseUint(s, 10, 32) 882 if err != nil { 883 r.addNonfatalError(&LexerError{ 884 Offset: r.start, 885 Reason: err.Error(), 886 Data: string(b), 887 }) 888 } 889 return uint32(n) 890} 891 892func (r *Lexer) Uint64Str() uint64 { 893 s, b := r.unsafeString() 894 if !r.Ok() { 895 return 0 896 } 897 898 n, err := strconv.ParseUint(s, 10, 64) 899 if err != nil { 900 r.addNonfatalError(&LexerError{ 901 Offset: r.start, 902 Reason: err.Error(), 903 Data: string(b), 904 }) 905 } 906 return n 907} 908 909func (r *Lexer) UintStr() uint { 910 return uint(r.Uint64Str()) 911} 912 913func (r *Lexer) UintptrStr() uintptr { 914 return uintptr(r.Uint64Str()) 915} 916 917func (r *Lexer) Int8Str() int8 { 918 s, b := r.unsafeString() 919 if !r.Ok() { 920 return 0 921 } 922 923 n, err := strconv.ParseInt(s, 10, 8) 924 if err != nil { 925 r.addNonfatalError(&LexerError{ 926 Offset: r.start, 927 Reason: err.Error(), 928 Data: string(b), 929 }) 930 } 931 return int8(n) 932} 933 934func (r *Lexer) Int16Str() int16 { 935 s, b := r.unsafeString() 936 if !r.Ok() { 937 return 0 938 } 939 940 n, err := strconv.ParseInt(s, 10, 16) 941 if err != nil { 942 r.addNonfatalError(&LexerError{ 943 Offset: r.start, 944 Reason: err.Error(), 945 Data: string(b), 946 }) 947 } 948 return int16(n) 949} 950 951func (r *Lexer) Int32Str() int32 { 952 s, b := r.unsafeString() 953 if !r.Ok() { 954 return 0 955 } 956 957 n, err := strconv.ParseInt(s, 10, 32) 958 if err != nil { 959 r.addNonfatalError(&LexerError{ 960 Offset: r.start, 961 Reason: err.Error(), 962 Data: string(b), 963 }) 964 } 965 return int32(n) 966} 967 968func (r *Lexer) Int64Str() int64 { 969 s, b := r.unsafeString() 970 if !r.Ok() { 971 return 0 972 } 973 974 n, err := strconv.ParseInt(s, 10, 64) 975 if err != nil { 976 r.addNonfatalError(&LexerError{ 977 Offset: r.start, 978 Reason: err.Error(), 979 Data: string(b), 980 }) 981 } 982 return n 983} 984 985func (r *Lexer) IntStr() int { 986 return int(r.Int64Str()) 987} 988 989func (r *Lexer) Float32() float32 { 990 s := r.number() 991 if !r.Ok() { 992 return 0 993 } 994 995 n, err := strconv.ParseFloat(s, 32) 996 if err != nil { 997 r.addNonfatalError(&LexerError{ 998 Offset: r.start, 999 Reason: err.Error(), 1000 Data: s, 1001 }) 1002 } 1003 return float32(n) 1004} 1005 1006func (r *Lexer) Float32Str() float32 { 1007 s, b := r.unsafeString() 1008 if !r.Ok() { 1009 return 0 1010 } 1011 n, err := strconv.ParseFloat(s, 32) 1012 if err != nil { 1013 r.addNonfatalError(&LexerError{ 1014 Offset: r.start, 1015 Reason: err.Error(), 1016 Data: string(b), 1017 }) 1018 } 1019 return float32(n) 1020} 1021 1022func (r *Lexer) Float64() float64 { 1023 s := r.number() 1024 if !r.Ok() { 1025 return 0 1026 } 1027 1028 n, err := strconv.ParseFloat(s, 64) 1029 if err != nil { 1030 r.addNonfatalError(&LexerError{ 1031 Offset: r.start, 1032 Reason: err.Error(), 1033 Data: s, 1034 }) 1035 } 1036 return n 1037} 1038 1039func (r *Lexer) Float64Str() float64 { 1040 s, b := r.unsafeString() 1041 if !r.Ok() { 1042 return 0 1043 } 1044 n, err := strconv.ParseFloat(s, 64) 1045 if err != nil { 1046 r.addNonfatalError(&LexerError{ 1047 Offset: r.start, 1048 Reason: err.Error(), 1049 Data: string(b), 1050 }) 1051 } 1052 return n 1053} 1054 1055func (r *Lexer) Error() error { 1056 return r.fatalError 1057} 1058 1059func (r *Lexer) AddError(e error) { 1060 if r.fatalError == nil { 1061 r.fatalError = e 1062 } 1063} 1064 1065func (r *Lexer) AddNonFatalError(e error) { 1066 r.addNonfatalError(&LexerError{ 1067 Offset: r.start, 1068 Data: string(r.Data[r.start:r.pos]), 1069 Reason: e.Error(), 1070 }) 1071} 1072 1073func (r *Lexer) addNonfatalError(err *LexerError) { 1074 if r.UseMultipleErrors { 1075 // We don't want to add errors with the same offset. 1076 if len(r.multipleErrors) != 0 && r.multipleErrors[len(r.multipleErrors)-1].Offset == err.Offset { 1077 return 1078 } 1079 r.multipleErrors = append(r.multipleErrors, err) 1080 return 1081 } 1082 r.fatalError = err 1083} 1084 1085func (r *Lexer) GetNonFatalErrors() []*LexerError { 1086 return r.multipleErrors 1087} 1088 1089// JsonNumber fetches and json.Number from 'encoding/json' package. 1090// Both int, float or string, contains them are valid values 1091func (r *Lexer) JsonNumber() json.Number { 1092 if r.token.kind == tokenUndef && r.Ok() { 1093 r.FetchToken() 1094 } 1095 if !r.Ok() { 1096 r.errInvalidToken("json.Number") 1097 return json.Number("") 1098 } 1099 1100 switch r.token.kind { 1101 case tokenString: 1102 return json.Number(r.String()) 1103 case tokenNumber: 1104 return json.Number(r.Raw()) 1105 case tokenNull: 1106 r.Null() 1107 return json.Number("") 1108 default: 1109 r.errSyntax() 1110 return json.Number("") 1111 } 1112} 1113 1114// Interface fetches an interface{} analogous to the 'encoding/json' package. 1115func (r *Lexer) Interface() interface{} { 1116 if r.token.kind == tokenUndef && r.Ok() { 1117 r.FetchToken() 1118 } 1119 1120 if !r.Ok() { 1121 return nil 1122 } 1123 switch r.token.kind { 1124 case tokenString: 1125 return r.String() 1126 case tokenNumber: 1127 return r.Float64() 1128 case tokenBool: 1129 return r.Bool() 1130 case tokenNull: 1131 r.Null() 1132 return nil 1133 } 1134 1135 if r.token.delimValue == '{' { 1136 r.consume() 1137 1138 ret := map[string]interface{}{} 1139 for !r.IsDelim('}') { 1140 key := r.String() 1141 r.WantColon() 1142 ret[key] = r.Interface() 1143 r.WantComma() 1144 } 1145 r.Delim('}') 1146 1147 if r.Ok() { 1148 return ret 1149 } else { 1150 return nil 1151 } 1152 } else if r.token.delimValue == '[' { 1153 r.consume() 1154 1155 ret := []interface{}{} 1156 for !r.IsDelim(']') { 1157 ret = append(ret, r.Interface()) 1158 r.WantComma() 1159 } 1160 r.Delim(']') 1161 1162 if r.Ok() { 1163 return ret 1164 } else { 1165 return nil 1166 } 1167 } 1168 r.errSyntax() 1169 return nil 1170} 1171 1172// WantComma requires a comma to be present before fetching next token. 1173func (r *Lexer) WantComma() { 1174 r.wantSep = ',' 1175 r.firstElement = false 1176} 1177 1178// WantColon requires a colon to be present before fetching next token. 1179func (r *Lexer) WantColon() { 1180 r.wantSep = ':' 1181 r.firstElement = false 1182} 1183