1// TOML lexer. 2// 3// Written using the principles developed by Rob Pike in 4// http://www.youtube.com/watch?v=HxaD_trXwRE 5 6package toml 7 8import ( 9 "bytes" 10 "errors" 11 "fmt" 12 "strconv" 13 "strings" 14) 15 16// Define state functions 17type tomlLexStateFn func() tomlLexStateFn 18 19// Define lexer 20type tomlLexer struct { 21 inputIdx int 22 input []rune // Textual source 23 currentTokenStart int 24 currentTokenStop int 25 tokens []token 26 brackets []rune 27 line int 28 col int 29 endbufferLine int 30 endbufferCol int 31} 32 33// Basic read operations on input 34 35func (l *tomlLexer) read() rune { 36 r := l.peek() 37 if r == '\n' { 38 l.endbufferLine++ 39 l.endbufferCol = 1 40 } else { 41 l.endbufferCol++ 42 } 43 l.inputIdx++ 44 return r 45} 46 47func (l *tomlLexer) next() rune { 48 r := l.read() 49 50 if r != eof { 51 l.currentTokenStop++ 52 } 53 return r 54} 55 56func (l *tomlLexer) ignore() { 57 l.currentTokenStart = l.currentTokenStop 58 l.line = l.endbufferLine 59 l.col = l.endbufferCol 60} 61 62func (l *tomlLexer) skip() { 63 l.next() 64 l.ignore() 65} 66 67func (l *tomlLexer) fastForward(n int) { 68 for i := 0; i < n; i++ { 69 l.next() 70 } 71} 72 73func (l *tomlLexer) emitWithValue(t tokenType, value string) { 74 l.tokens = append(l.tokens, token{ 75 Position: Position{l.line, l.col}, 76 typ: t, 77 val: value, 78 }) 79 l.ignore() 80} 81 82func (l *tomlLexer) emit(t tokenType) { 83 l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop])) 84} 85 86func (l *tomlLexer) peek() rune { 87 if l.inputIdx >= len(l.input) { 88 return eof 89 } 90 return l.input[l.inputIdx] 91} 92 93func (l *tomlLexer) peekString(size int) string { 94 maxIdx := len(l.input) 95 upperIdx := l.inputIdx + size // FIXME: potential overflow 96 if upperIdx > maxIdx { 97 upperIdx = maxIdx 98 } 99 return string(l.input[l.inputIdx:upperIdx]) 100} 101 102func (l *tomlLexer) follow(next string) bool { 103 return next == l.peekString(len(next)) 104} 105 106// Error management 107 108func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn { 109 l.tokens = append(l.tokens, token{ 110 Position: Position{l.line, l.col}, 111 typ: tokenError, 112 val: fmt.Sprintf(format, args...), 113 }) 114 return nil 115} 116 117// State functions 118 119func (l *tomlLexer) lexVoid() tomlLexStateFn { 120 for { 121 next := l.peek() 122 switch next { 123 case '}': // after '{' 124 return l.lexRightCurlyBrace 125 case '[': 126 return l.lexTableKey 127 case '#': 128 return l.lexComment(l.lexVoid) 129 case '=': 130 return l.lexEqual 131 case '\r': 132 fallthrough 133 case '\n': 134 l.skip() 135 continue 136 } 137 138 if isSpace(next) { 139 l.skip() 140 } 141 142 if isKeyStartChar(next) { 143 return l.lexKey 144 } 145 146 if next == eof { 147 l.next() 148 break 149 } 150 } 151 152 l.emit(tokenEOF) 153 return nil 154} 155 156func (l *tomlLexer) lexRvalue() tomlLexStateFn { 157 for { 158 next := l.peek() 159 switch next { 160 case '.': 161 return l.errorf("cannot start float with a dot") 162 case '=': 163 return l.lexEqual 164 case '[': 165 return l.lexLeftBracket 166 case ']': 167 return l.lexRightBracket 168 case '{': 169 return l.lexLeftCurlyBrace 170 case '}': 171 return l.lexRightCurlyBrace 172 case '#': 173 return l.lexComment(l.lexRvalue) 174 case '"': 175 return l.lexString 176 case '\'': 177 return l.lexLiteralString 178 case ',': 179 return l.lexComma 180 case '\r': 181 fallthrough 182 case '\n': 183 l.skip() 184 if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' { 185 return l.lexRvalue 186 } 187 return l.lexVoid 188 } 189 190 if l.follow("true") { 191 return l.lexTrue 192 } 193 194 if l.follow("false") { 195 return l.lexFalse 196 } 197 198 if l.follow("inf") { 199 return l.lexInf 200 } 201 202 if l.follow("nan") { 203 return l.lexNan 204 } 205 206 if isSpace(next) { 207 l.skip() 208 continue 209 } 210 211 if next == eof { 212 l.next() 213 break 214 } 215 216 if next == '+' || next == '-' { 217 return l.lexNumber 218 } 219 220 if isDigit(next) { 221 return l.lexDateTimeOrNumber 222 } 223 224 return l.errorf("no value can start with %c", next) 225 } 226 227 l.emit(tokenEOF) 228 return nil 229} 230 231func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn { 232 // Could be either a date/time, or a digit. 233 // The options for date/times are: 234 // YYYY-... => date or date-time 235 // HH:... => time 236 // Anything else should be a number. 237 238 lookAhead := l.peekString(5) 239 if len(lookAhead) < 3 { 240 return l.lexNumber() 241 } 242 243 for idx, r := range lookAhead { 244 if !isDigit(r) { 245 if idx == 2 && r == ':' { 246 return l.lexDateTimeOrTime() 247 } 248 if idx == 4 && r == '-' { 249 return l.lexDateTimeOrTime() 250 } 251 return l.lexNumber() 252 } 253 } 254 return l.lexNumber() 255} 256 257func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn { 258 l.next() 259 l.emit(tokenLeftCurlyBrace) 260 l.brackets = append(l.brackets, '{') 261 return l.lexVoid 262} 263 264func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn { 265 l.next() 266 l.emit(tokenRightCurlyBrace) 267 if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' { 268 return l.errorf("cannot have '}' here") 269 } 270 l.brackets = l.brackets[:len(l.brackets)-1] 271 return l.lexRvalue 272} 273 274func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn { 275 // Example matches: 276 // 1979-05-27T07:32:00Z 277 // 1979-05-27T00:32:00-07:00 278 // 1979-05-27T00:32:00.999999-07:00 279 // 1979-05-27 07:32:00Z 280 // 1979-05-27 00:32:00-07:00 281 // 1979-05-27 00:32:00.999999-07:00 282 // 1979-05-27T07:32:00 283 // 1979-05-27T00:32:00.999999 284 // 1979-05-27 07:32:00 285 // 1979-05-27 00:32:00.999999 286 // 1979-05-27 287 // 07:32:00 288 // 00:32:00.999999 289 290 // we already know those two are digits 291 l.next() 292 l.next() 293 294 // Got 2 digits. At that point it could be either a time or a date(-time). 295 296 r := l.next() 297 if r == ':' { 298 return l.lexTime() 299 } 300 301 return l.lexDateTime() 302} 303 304func (l *tomlLexer) lexDateTime() tomlLexStateFn { 305 // This state accepts an offset date-time, a local date-time, or a local date. 306 // 307 // v--- cursor 308 // 1979-05-27T07:32:00Z 309 // 1979-05-27T00:32:00-07:00 310 // 1979-05-27T00:32:00.999999-07:00 311 // 1979-05-27 07:32:00Z 312 // 1979-05-27 00:32:00-07:00 313 // 1979-05-27 00:32:00.999999-07:00 314 // 1979-05-27T07:32:00 315 // 1979-05-27T00:32:00.999999 316 // 1979-05-27 07:32:00 317 // 1979-05-27 00:32:00.999999 318 // 1979-05-27 319 320 // date 321 322 // already checked by lexRvalue 323 l.next() // digit 324 l.next() // - 325 326 for i := 0; i < 2; i++ { 327 r := l.next() 328 if !isDigit(r) { 329 return l.errorf("invalid month digit in date: %c", r) 330 } 331 } 332 333 r := l.next() 334 if r != '-' { 335 return l.errorf("expected - to separate month of a date, not %c", r) 336 } 337 338 for i := 0; i < 2; i++ { 339 r := l.next() 340 if !isDigit(r) { 341 return l.errorf("invalid day digit in date: %c", r) 342 } 343 } 344 345 l.emit(tokenLocalDate) 346 347 r = l.peek() 348 349 if r == eof { 350 351 return l.lexRvalue 352 } 353 354 if r != ' ' && r != 'T' { 355 return l.errorf("incorrect date/time separation character: %c", r) 356 } 357 358 if r == ' ' { 359 lookAhead := l.peekString(3)[1:] 360 if len(lookAhead) < 2 { 361 return l.lexRvalue 362 } 363 for _, r := range lookAhead { 364 if !isDigit(r) { 365 return l.lexRvalue 366 } 367 } 368 } 369 370 l.skip() // skip the T or ' ' 371 372 // time 373 374 for i := 0; i < 2; i++ { 375 r := l.next() 376 if !isDigit(r) { 377 return l.errorf("invalid hour digit in time: %c", r) 378 } 379 } 380 381 r = l.next() 382 if r != ':' { 383 return l.errorf("time hour/minute separator should be :, not %c", r) 384 } 385 386 for i := 0; i < 2; i++ { 387 r := l.next() 388 if !isDigit(r) { 389 return l.errorf("invalid minute digit in time: %c", r) 390 } 391 } 392 393 r = l.next() 394 if r != ':' { 395 return l.errorf("time minute/second separator should be :, not %c", r) 396 } 397 398 for i := 0; i < 2; i++ { 399 r := l.next() 400 if !isDigit(r) { 401 return l.errorf("invalid second digit in time: %c", r) 402 } 403 } 404 405 r = l.peek() 406 if r == '.' { 407 l.next() 408 r := l.next() 409 if !isDigit(r) { 410 return l.errorf("expected at least one digit in time's fraction, not %c", r) 411 } 412 413 for { 414 r := l.peek() 415 if !isDigit(r) { 416 break 417 } 418 l.next() 419 } 420 } 421 422 l.emit(tokenLocalTime) 423 424 return l.lexTimeOffset 425 426} 427 428func (l *tomlLexer) lexTimeOffset() tomlLexStateFn { 429 // potential offset 430 431 // Z 432 // -07:00 433 // +07:00 434 // nothing 435 436 r := l.peek() 437 438 if r == 'Z' { 439 l.next() 440 l.emit(tokenTimeOffset) 441 } else if r == '+' || r == '-' { 442 l.next() 443 444 for i := 0; i < 2; i++ { 445 r := l.next() 446 if !isDigit(r) { 447 return l.errorf("invalid hour digit in time offset: %c", r) 448 } 449 } 450 451 r = l.next() 452 if r != ':' { 453 return l.errorf("time offset hour/minute separator should be :, not %c", r) 454 } 455 456 for i := 0; i < 2; i++ { 457 r := l.next() 458 if !isDigit(r) { 459 return l.errorf("invalid minute digit in time offset: %c", r) 460 } 461 } 462 463 l.emit(tokenTimeOffset) 464 } 465 466 return l.lexRvalue 467} 468 469func (l *tomlLexer) lexTime() tomlLexStateFn { 470 // v--- cursor 471 // 07:32:00 472 // 00:32:00.999999 473 474 for i := 0; i < 2; i++ { 475 r := l.next() 476 if !isDigit(r) { 477 return l.errorf("invalid minute digit in time: %c", r) 478 } 479 } 480 481 r := l.next() 482 if r != ':' { 483 return l.errorf("time minute/second separator should be :, not %c", r) 484 } 485 486 for i := 0; i < 2; i++ { 487 r := l.next() 488 if !isDigit(r) { 489 return l.errorf("invalid second digit in time: %c", r) 490 } 491 } 492 493 r = l.peek() 494 if r == '.' { 495 l.next() 496 r := l.next() 497 if !isDigit(r) { 498 return l.errorf("expected at least one digit in time's fraction, not %c", r) 499 } 500 501 for { 502 r := l.peek() 503 if !isDigit(r) { 504 break 505 } 506 l.next() 507 } 508 } 509 510 l.emit(tokenLocalTime) 511 return l.lexRvalue 512 513} 514 515func (l *tomlLexer) lexTrue() tomlLexStateFn { 516 l.fastForward(4) 517 l.emit(tokenTrue) 518 return l.lexRvalue 519} 520 521func (l *tomlLexer) lexFalse() tomlLexStateFn { 522 l.fastForward(5) 523 l.emit(tokenFalse) 524 return l.lexRvalue 525} 526 527func (l *tomlLexer) lexInf() tomlLexStateFn { 528 l.fastForward(3) 529 l.emit(tokenInf) 530 return l.lexRvalue 531} 532 533func (l *tomlLexer) lexNan() tomlLexStateFn { 534 l.fastForward(3) 535 l.emit(tokenNan) 536 return l.lexRvalue 537} 538 539func (l *tomlLexer) lexEqual() tomlLexStateFn { 540 l.next() 541 l.emit(tokenEqual) 542 return l.lexRvalue 543} 544 545func (l *tomlLexer) lexComma() tomlLexStateFn { 546 l.next() 547 l.emit(tokenComma) 548 if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' { 549 return l.lexVoid 550 } 551 return l.lexRvalue 552} 553 554// Parse the key and emits its value without escape sequences. 555// bare keys, basic string keys and literal string keys are supported. 556func (l *tomlLexer) lexKey() tomlLexStateFn { 557 var sb strings.Builder 558 559 for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() { 560 if r == '"' { 561 l.next() 562 str, err := l.lexStringAsString(`"`, false, true) 563 if err != nil { 564 return l.errorf(err.Error()) 565 } 566 sb.WriteString("\"") 567 sb.WriteString(str) 568 sb.WriteString("\"") 569 l.next() 570 continue 571 } else if r == '\'' { 572 l.next() 573 str, err := l.lexLiteralStringAsString(`'`, false) 574 if err != nil { 575 return l.errorf(err.Error()) 576 } 577 sb.WriteString("'") 578 sb.WriteString(str) 579 sb.WriteString("'") 580 l.next() 581 continue 582 } else if r == '\n' { 583 return l.errorf("keys cannot contain new lines") 584 } else if isSpace(r) { 585 var str strings.Builder 586 str.WriteString(" ") 587 588 // skip trailing whitespace 589 l.next() 590 for r = l.peek(); isSpace(r); r = l.peek() { 591 str.WriteRune(r) 592 l.next() 593 } 594 // break loop if not a dot 595 if r != '.' { 596 break 597 } 598 str.WriteString(".") 599 // skip trailing whitespace after dot 600 l.next() 601 for r = l.peek(); isSpace(r); r = l.peek() { 602 str.WriteRune(r) 603 l.next() 604 } 605 sb.WriteString(str.String()) 606 continue 607 } else if r == '.' { 608 // skip 609 } else if !isValidBareChar(r) { 610 return l.errorf("keys cannot contain %c character", r) 611 } 612 sb.WriteRune(r) 613 l.next() 614 } 615 l.emitWithValue(tokenKey, sb.String()) 616 return l.lexVoid 617} 618 619func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn { 620 return func() tomlLexStateFn { 621 for next := l.peek(); next != '\n' && next != eof; next = l.peek() { 622 if next == '\r' && l.follow("\r\n") { 623 break 624 } 625 l.next() 626 } 627 l.ignore() 628 return previousState 629 } 630} 631 632func (l *tomlLexer) lexLeftBracket() tomlLexStateFn { 633 l.next() 634 l.emit(tokenLeftBracket) 635 l.brackets = append(l.brackets, '[') 636 return l.lexRvalue 637} 638 639func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) { 640 var sb strings.Builder 641 642 if discardLeadingNewLine { 643 if l.follow("\r\n") { 644 l.skip() 645 l.skip() 646 } else if l.peek() == '\n' { 647 l.skip() 648 } 649 } 650 651 // find end of string 652 for { 653 if l.follow(terminator) { 654 return sb.String(), nil 655 } 656 657 next := l.peek() 658 if next == eof { 659 break 660 } 661 sb.WriteRune(l.next()) 662 } 663 664 return "", errors.New("unclosed string") 665} 666 667func (l *tomlLexer) lexLiteralString() tomlLexStateFn { 668 l.skip() 669 670 // handle special case for triple-quote 671 terminator := "'" 672 discardLeadingNewLine := false 673 if l.follow("''") { 674 l.skip() 675 l.skip() 676 terminator = "'''" 677 discardLeadingNewLine = true 678 } 679 680 str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine) 681 if err != nil { 682 return l.errorf(err.Error()) 683 } 684 685 l.emitWithValue(tokenString, str) 686 l.fastForward(len(terminator)) 687 l.ignore() 688 return l.lexRvalue 689} 690 691// Lex a string and return the results as a string. 692// Terminator is the substring indicating the end of the token. 693// The resulting string does not include the terminator. 694func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) { 695 var sb strings.Builder 696 697 if discardLeadingNewLine { 698 if l.follow("\r\n") { 699 l.skip() 700 l.skip() 701 } else if l.peek() == '\n' { 702 l.skip() 703 } 704 } 705 706 for { 707 if l.follow(terminator) { 708 return sb.String(), nil 709 } 710 711 if l.follow("\\") { 712 l.next() 713 switch l.peek() { 714 case '\r': 715 fallthrough 716 case '\n': 717 fallthrough 718 case '\t': 719 fallthrough 720 case ' ': 721 // skip all whitespace chars following backslash 722 for strings.ContainsRune("\r\n\t ", l.peek()) { 723 l.next() 724 } 725 case '"': 726 sb.WriteString("\"") 727 l.next() 728 case 'n': 729 sb.WriteString("\n") 730 l.next() 731 case 'b': 732 sb.WriteString("\b") 733 l.next() 734 case 'f': 735 sb.WriteString("\f") 736 l.next() 737 case '/': 738 sb.WriteString("/") 739 l.next() 740 case 't': 741 sb.WriteString("\t") 742 l.next() 743 case 'r': 744 sb.WriteString("\r") 745 l.next() 746 case '\\': 747 sb.WriteString("\\") 748 l.next() 749 case 'u': 750 l.next() 751 var code strings.Builder 752 for i := 0; i < 4; i++ { 753 c := l.peek() 754 if !isHexDigit(c) { 755 return "", errors.New("unfinished unicode escape") 756 } 757 l.next() 758 code.WriteRune(c) 759 } 760 intcode, err := strconv.ParseInt(code.String(), 16, 32) 761 if err != nil { 762 return "", errors.New("invalid unicode escape: \\u" + code.String()) 763 } 764 sb.WriteRune(rune(intcode)) 765 case 'U': 766 l.next() 767 var code strings.Builder 768 for i := 0; i < 8; i++ { 769 c := l.peek() 770 if !isHexDigit(c) { 771 return "", errors.New("unfinished unicode escape") 772 } 773 l.next() 774 code.WriteRune(c) 775 } 776 intcode, err := strconv.ParseInt(code.String(), 16, 64) 777 if err != nil { 778 return "", errors.New("invalid unicode escape: \\U" + code.String()) 779 } 780 sb.WriteRune(rune(intcode)) 781 default: 782 return "", errors.New("invalid escape sequence: \\" + string(l.peek())) 783 } 784 } else { 785 r := l.peek() 786 787 if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) { 788 return "", fmt.Errorf("unescaped control character %U", r) 789 } 790 l.next() 791 sb.WriteRune(r) 792 } 793 794 if l.peek() == eof { 795 break 796 } 797 } 798 799 return "", errors.New("unclosed string") 800} 801 802func (l *tomlLexer) lexString() tomlLexStateFn { 803 l.skip() 804 805 // handle special case for triple-quote 806 terminator := `"` 807 discardLeadingNewLine := false 808 acceptNewLines := false 809 if l.follow(`""`) { 810 l.skip() 811 l.skip() 812 terminator = `"""` 813 discardLeadingNewLine = true 814 acceptNewLines = true 815 } 816 817 str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines) 818 if err != nil { 819 return l.errorf(err.Error()) 820 } 821 822 l.emitWithValue(tokenString, str) 823 l.fastForward(len(terminator)) 824 l.ignore() 825 return l.lexRvalue 826} 827 828func (l *tomlLexer) lexTableKey() tomlLexStateFn { 829 l.next() 830 831 if l.peek() == '[' { 832 // token '[[' signifies an array of tables 833 l.next() 834 l.emit(tokenDoubleLeftBracket) 835 return l.lexInsideTableArrayKey 836 } 837 // vanilla table key 838 l.emit(tokenLeftBracket) 839 return l.lexInsideTableKey 840} 841 842// Parse the key till "]]", but only bare keys are supported 843func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn { 844 for r := l.peek(); r != eof; r = l.peek() { 845 switch r { 846 case ']': 847 if l.currentTokenStop > l.currentTokenStart { 848 l.emit(tokenKeyGroupArray) 849 } 850 l.next() 851 if l.peek() != ']' { 852 break 853 } 854 l.next() 855 l.emit(tokenDoubleRightBracket) 856 return l.lexVoid 857 case '[': 858 return l.errorf("table array key cannot contain ']'") 859 default: 860 l.next() 861 } 862 } 863 return l.errorf("unclosed table array key") 864} 865 866// Parse the key till "]" but only bare keys are supported 867func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn { 868 for r := l.peek(); r != eof; r = l.peek() { 869 switch r { 870 case ']': 871 if l.currentTokenStop > l.currentTokenStart { 872 l.emit(tokenKeyGroup) 873 } 874 l.next() 875 l.emit(tokenRightBracket) 876 return l.lexVoid 877 case '[': 878 return l.errorf("table key cannot contain ']'") 879 default: 880 l.next() 881 } 882 } 883 return l.errorf("unclosed table key") 884} 885 886func (l *tomlLexer) lexRightBracket() tomlLexStateFn { 887 l.next() 888 l.emit(tokenRightBracket) 889 if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' { 890 return l.errorf("cannot have ']' here") 891 } 892 l.brackets = l.brackets[:len(l.brackets)-1] 893 return l.lexRvalue 894} 895 896type validRuneFn func(r rune) bool 897 898func isValidHexRune(r rune) bool { 899 return r >= 'a' && r <= 'f' || 900 r >= 'A' && r <= 'F' || 901 r >= '0' && r <= '9' || 902 r == '_' 903} 904 905func isValidOctalRune(r rune) bool { 906 return r >= '0' && r <= '7' || r == '_' 907} 908 909func isValidBinaryRune(r rune) bool { 910 return r == '0' || r == '1' || r == '_' 911} 912 913func (l *tomlLexer) lexNumber() tomlLexStateFn { 914 r := l.peek() 915 916 if r == '0' { 917 follow := l.peekString(2) 918 if len(follow) == 2 { 919 var isValidRune validRuneFn 920 switch follow[1] { 921 case 'x': 922 isValidRune = isValidHexRune 923 case 'o': 924 isValidRune = isValidOctalRune 925 case 'b': 926 isValidRune = isValidBinaryRune 927 default: 928 if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' { 929 return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1])) 930 } 931 } 932 933 if isValidRune != nil { 934 l.next() 935 l.next() 936 digitSeen := false 937 for { 938 next := l.peek() 939 if !isValidRune(next) { 940 break 941 } 942 digitSeen = true 943 l.next() 944 } 945 946 if !digitSeen { 947 return l.errorf("number needs at least one digit") 948 } 949 950 l.emit(tokenInteger) 951 952 return l.lexRvalue 953 } 954 } 955 } 956 957 if r == '+' || r == '-' { 958 l.next() 959 if l.follow("inf") { 960 return l.lexInf 961 } 962 if l.follow("nan") { 963 return l.lexNan 964 } 965 } 966 967 pointSeen := false 968 expSeen := false 969 digitSeen := false 970 for { 971 next := l.peek() 972 if next == '.' { 973 if pointSeen { 974 return l.errorf("cannot have two dots in one float") 975 } 976 l.next() 977 if !isDigit(l.peek()) { 978 return l.errorf("float cannot end with a dot") 979 } 980 pointSeen = true 981 } else if next == 'e' || next == 'E' { 982 expSeen = true 983 l.next() 984 r := l.peek() 985 if r == '+' || r == '-' { 986 l.next() 987 } 988 } else if isDigit(next) { 989 digitSeen = true 990 l.next() 991 } else if next == '_' { 992 l.next() 993 } else { 994 break 995 } 996 if pointSeen && !digitSeen { 997 return l.errorf("cannot start float with a dot") 998 } 999 } 1000 1001 if !digitSeen { 1002 return l.errorf("no digit in that number") 1003 } 1004 if pointSeen || expSeen { 1005 l.emit(tokenFloat) 1006 } else { 1007 l.emit(tokenInteger) 1008 } 1009 return l.lexRvalue 1010} 1011 1012func (l *tomlLexer) run() { 1013 for state := l.lexVoid; state != nil; { 1014 state = state() 1015 } 1016} 1017 1018// Entry point 1019func lexToml(inputBytes []byte) []token { 1020 runes := bytes.Runes(inputBytes) 1021 l := &tomlLexer{ 1022 input: runes, 1023 tokens: make([]token, 0, 256), 1024 line: 1, 1025 col: 1, 1026 endbufferLine: 1, 1027 endbufferCol: 1, 1028 } 1029 l.run() 1030 return l.tokens 1031} 1032