1package toml 2 3import ( 4 "fmt" 5 "strings" 6 "unicode" 7 "unicode/utf8" 8) 9 10type itemType int 11 12const ( 13 itemError itemType = iota 14 itemNIL // used in the parser to indicate no type 15 itemEOF 16 itemText 17 itemString 18 itemRawString 19 itemMultilineString 20 itemRawMultilineString 21 itemBool 22 itemInteger 23 itemFloat 24 itemDatetime 25 itemArray // the start of an array 26 itemArrayEnd 27 itemTableStart 28 itemTableEnd 29 itemArrayTableStart 30 itemArrayTableEnd 31 itemKeyStart 32 itemCommentStart 33 itemInlineTableStart 34 itemInlineTableEnd 35) 36 37const ( 38 eof = 0 39 comma = ',' 40 tableStart = '[' 41 tableEnd = ']' 42 arrayTableStart = '[' 43 arrayTableEnd = ']' 44 tableSep = '.' 45 keySep = '=' 46 arrayStart = '[' 47 arrayEnd = ']' 48 commentStart = '#' 49 stringStart = '"' 50 stringEnd = '"' 51 rawStringStart = '\'' 52 rawStringEnd = '\'' 53 inlineTableStart = '{' 54 inlineTableEnd = '}' 55) 56 57type stateFn func(lx *lexer) stateFn 58 59type lexer struct { 60 input string 61 start int 62 pos int 63 line int 64 state stateFn 65 items chan item 66 67 // Allow for backing up up to three runes. 68 // This is necessary because TOML contains 3-rune tokens (""" and '''). 69 prevWidths [3]int 70 nprev int // how many of prevWidths are in use 71 // If we emit an eof, we can still back up, but it is not OK to call 72 // next again. 73 atEOF bool 74 75 // A stack of state functions used to maintain context. 76 // The idea is to reuse parts of the state machine in various places. 77 // For example, values can appear at the top level or within arbitrarily 78 // nested arrays. The last state on the stack is used after a value has 79 // been lexed. Similarly for comments. 80 stack []stateFn 81} 82 83type item struct { 84 typ itemType 85 val string 86 line int 87} 88 89func (lx *lexer) nextItem() item { 90 for { 91 select { 92 case item := <-lx.items: 93 return item 94 default: 95 lx.state = lx.state(lx) 96 } 97 } 98} 99 100func lex(input string) *lexer { 101 lx := &lexer{ 102 input: input, 103 state: lexTop, 104 line: 1, 105 items: make(chan item, 10), 106 stack: make([]stateFn, 0, 10), 107 } 108 return lx 109} 110 111func (lx *lexer) push(state stateFn) { 112 lx.stack = append(lx.stack, state) 113} 114 115func (lx *lexer) pop() stateFn { 116 if len(lx.stack) == 0 { 117 return lx.errorf("BUG in lexer: no states to pop") 118 } 119 last := lx.stack[len(lx.stack)-1] 120 lx.stack = lx.stack[0 : len(lx.stack)-1] 121 return last 122} 123 124func (lx *lexer) current() string { 125 return lx.input[lx.start:lx.pos] 126} 127 128func (lx *lexer) emit(typ itemType) { 129 lx.items <- item{typ, lx.current(), lx.line} 130 lx.start = lx.pos 131} 132 133func (lx *lexer) emitTrim(typ itemType) { 134 lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line} 135 lx.start = lx.pos 136} 137 138func (lx *lexer) next() (r rune) { 139 if lx.atEOF { 140 panic("next called after EOF") 141 } 142 if lx.pos >= len(lx.input) { 143 lx.atEOF = true 144 return eof 145 } 146 147 if lx.input[lx.pos] == '\n' { 148 lx.line++ 149 } 150 lx.prevWidths[2] = lx.prevWidths[1] 151 lx.prevWidths[1] = lx.prevWidths[0] 152 if lx.nprev < 3 { 153 lx.nprev++ 154 } 155 r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) 156 lx.prevWidths[0] = w 157 lx.pos += w 158 return r 159} 160 161// ignore skips over the pending input before this point. 162func (lx *lexer) ignore() { 163 lx.start = lx.pos 164} 165 166// backup steps back one rune. Can be called only twice between calls to next. 167func (lx *lexer) backup() { 168 if lx.atEOF { 169 lx.atEOF = false 170 return 171 } 172 if lx.nprev < 1 { 173 panic("backed up too far") 174 } 175 w := lx.prevWidths[0] 176 lx.prevWidths[0] = lx.prevWidths[1] 177 lx.prevWidths[1] = lx.prevWidths[2] 178 lx.nprev-- 179 lx.pos -= w 180 if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' { 181 lx.line-- 182 } 183} 184 185// accept consumes the next rune if it's equal to `valid`. 186func (lx *lexer) accept(valid rune) bool { 187 if lx.next() == valid { 188 return true 189 } 190 lx.backup() 191 return false 192} 193 194// peek returns but does not consume the next rune in the input. 195func (lx *lexer) peek() rune { 196 r := lx.next() 197 lx.backup() 198 return r 199} 200 201// skip ignores all input that matches the given predicate. 202func (lx *lexer) skip(pred func(rune) bool) { 203 for { 204 r := lx.next() 205 if pred(r) { 206 continue 207 } 208 lx.backup() 209 lx.ignore() 210 return 211 } 212} 213 214// errorf stops all lexing by emitting an error and returning `nil`. 215// Note that any value that is a character is escaped if it's a special 216// character (newlines, tabs, etc.). 217func (lx *lexer) errorf(format string, values ...interface{}) stateFn { 218 lx.items <- item{ 219 itemError, 220 fmt.Sprintf(format, values...), 221 lx.line, 222 } 223 return nil 224} 225 226// lexTop consumes elements at the top level of TOML data. 227func lexTop(lx *lexer) stateFn { 228 r := lx.next() 229 if isWhitespace(r) || isNL(r) { 230 return lexSkip(lx, lexTop) 231 } 232 switch r { 233 case commentStart: 234 lx.push(lexTop) 235 return lexCommentStart 236 case tableStart: 237 return lexTableStart 238 case eof: 239 if lx.pos > lx.start { 240 return lx.errorf("unexpected EOF") 241 } 242 lx.emit(itemEOF) 243 return nil 244 } 245 246 // At this point, the only valid item can be a key, so we back up 247 // and let the key lexer do the rest. 248 lx.backup() 249 lx.push(lexTopEnd) 250 return lexKeyStart 251} 252 253// lexTopEnd is entered whenever a top-level item has been consumed. (A value 254// or a table.) It must see only whitespace, and will turn back to lexTop 255// upon a newline. If it sees EOF, it will quit the lexer successfully. 256func lexTopEnd(lx *lexer) stateFn { 257 r := lx.next() 258 switch { 259 case r == commentStart: 260 // a comment will read to a newline for us. 261 lx.push(lexTop) 262 return lexCommentStart 263 case isWhitespace(r): 264 return lexTopEnd 265 case isNL(r): 266 lx.ignore() 267 return lexTop 268 case r == eof: 269 lx.emit(itemEOF) 270 return nil 271 } 272 return lx.errorf("expected a top-level item to end with a newline, "+ 273 "comment, or EOF, but got %q instead", r) 274} 275 276// lexTable lexes the beginning of a table. Namely, it makes sure that 277// it starts with a character other than '.' and ']'. 278// It assumes that '[' has already been consumed. 279// It also handles the case that this is an item in an array of tables. 280// e.g., '[[name]]'. 281func lexTableStart(lx *lexer) stateFn { 282 if lx.peek() == arrayTableStart { 283 lx.next() 284 lx.emit(itemArrayTableStart) 285 lx.push(lexArrayTableEnd) 286 } else { 287 lx.emit(itemTableStart) 288 lx.push(lexTableEnd) 289 } 290 return lexTableNameStart 291} 292 293func lexTableEnd(lx *lexer) stateFn { 294 lx.emit(itemTableEnd) 295 return lexTopEnd 296} 297 298func lexArrayTableEnd(lx *lexer) stateFn { 299 if r := lx.next(); r != arrayTableEnd { 300 return lx.errorf("expected end of table array name delimiter %q, "+ 301 "but got %q instead", arrayTableEnd, r) 302 } 303 lx.emit(itemArrayTableEnd) 304 return lexTopEnd 305} 306 307func lexTableNameStart(lx *lexer) stateFn { 308 lx.skip(isWhitespace) 309 switch r := lx.peek(); { 310 case r == tableEnd || r == eof: 311 return lx.errorf("unexpected end of table name " + 312 "(table names cannot be empty)") 313 case r == tableSep: 314 return lx.errorf("unexpected table separator " + 315 "(table names cannot be empty)") 316 case r == stringStart || r == rawStringStart: 317 lx.ignore() 318 lx.push(lexTableNameEnd) 319 return lexValue // reuse string lexing 320 default: 321 return lexBareTableName 322 } 323} 324 325// lexBareTableName lexes the name of a table. It assumes that at least one 326// valid character for the table has already been read. 327func lexBareTableName(lx *lexer) stateFn { 328 r := lx.next() 329 if isBareKeyChar(r) { 330 return lexBareTableName 331 } 332 lx.backup() 333 lx.emit(itemText) 334 return lexTableNameEnd 335} 336 337// lexTableNameEnd reads the end of a piece of a table name, optionally 338// consuming whitespace. 339func lexTableNameEnd(lx *lexer) stateFn { 340 lx.skip(isWhitespace) 341 switch r := lx.next(); { 342 case isWhitespace(r): 343 return lexTableNameEnd 344 case r == tableSep: 345 lx.ignore() 346 return lexTableNameStart 347 case r == tableEnd: 348 return lx.pop() 349 default: 350 return lx.errorf("expected '.' or ']' to end table name, "+ 351 "but got %q instead", r) 352 } 353} 354 355// lexKeyStart consumes a key name up until the first non-whitespace character. 356// lexKeyStart will ignore whitespace. 357func lexKeyStart(lx *lexer) stateFn { 358 r := lx.peek() 359 switch { 360 case r == keySep: 361 return lx.errorf("unexpected key separator %q", keySep) 362 case isWhitespace(r) || isNL(r): 363 lx.next() 364 return lexSkip(lx, lexKeyStart) 365 case r == stringStart || r == rawStringStart: 366 lx.ignore() 367 lx.emit(itemKeyStart) 368 lx.push(lexKeyEnd) 369 return lexValue // reuse string lexing 370 default: 371 lx.ignore() 372 lx.emit(itemKeyStart) 373 return lexBareKey 374 } 375} 376 377// lexBareKey consumes the text of a bare key. Assumes that the first character 378// (which is not whitespace) has not yet been consumed. 379func lexBareKey(lx *lexer) stateFn { 380 switch r := lx.next(); { 381 case isBareKeyChar(r): 382 return lexBareKey 383 case isWhitespace(r): 384 lx.backup() 385 lx.emit(itemText) 386 return lexKeyEnd 387 case r == keySep: 388 lx.backup() 389 lx.emit(itemText) 390 return lexKeyEnd 391 default: 392 return lx.errorf("bare keys cannot contain %q", r) 393 } 394} 395 396// lexKeyEnd consumes the end of a key and trims whitespace (up to the key 397// separator). 398func lexKeyEnd(lx *lexer) stateFn { 399 switch r := lx.next(); { 400 case r == keySep: 401 return lexSkip(lx, lexValue) 402 case isWhitespace(r): 403 return lexSkip(lx, lexKeyEnd) 404 default: 405 return lx.errorf("expected key separator %q, but got %q instead", 406 keySep, r) 407 } 408} 409 410// lexValue starts the consumption of a value anywhere a value is expected. 411// lexValue will ignore whitespace. 412// After a value is lexed, the last state on the next is popped and returned. 413func lexValue(lx *lexer) stateFn { 414 // We allow whitespace to precede a value, but NOT newlines. 415 // In array syntax, the array states are responsible for ignoring newlines. 416 r := lx.next() 417 switch { 418 case isWhitespace(r): 419 return lexSkip(lx, lexValue) 420 case isDigit(r): 421 lx.backup() // avoid an extra state and use the same as above 422 return lexNumberOrDateStart 423 } 424 switch r { 425 case arrayStart: 426 lx.ignore() 427 lx.emit(itemArray) 428 return lexArrayValue 429 case inlineTableStart: 430 lx.ignore() 431 lx.emit(itemInlineTableStart) 432 return lexInlineTableValue 433 case stringStart: 434 if lx.accept(stringStart) { 435 if lx.accept(stringStart) { 436 lx.ignore() // Ignore """ 437 return lexMultilineString 438 } 439 lx.backup() 440 } 441 lx.ignore() // ignore the '"' 442 return lexString 443 case rawStringStart: 444 if lx.accept(rawStringStart) { 445 if lx.accept(rawStringStart) { 446 lx.ignore() // Ignore """ 447 return lexMultilineRawString 448 } 449 lx.backup() 450 } 451 lx.ignore() // ignore the "'" 452 return lexRawString 453 case '+', '-': 454 return lexNumberStart 455 case '.': // special error case, be kind to users 456 return lx.errorf("floats must start with a digit, not '.'") 457 } 458 if unicode.IsLetter(r) { 459 // Be permissive here; lexBool will give a nice error if the 460 // user wrote something like 461 // x = foo 462 // (i.e. not 'true' or 'false' but is something else word-like.) 463 lx.backup() 464 return lexBool 465 } 466 return lx.errorf("expected value but found %q instead", r) 467} 468 469// lexArrayValue consumes one value in an array. It assumes that '[' or ',' 470// have already been consumed. All whitespace and newlines are ignored. 471func lexArrayValue(lx *lexer) stateFn { 472 r := lx.next() 473 switch { 474 case isWhitespace(r) || isNL(r): 475 return lexSkip(lx, lexArrayValue) 476 case r == commentStart: 477 lx.push(lexArrayValue) 478 return lexCommentStart 479 case r == comma: 480 return lx.errorf("unexpected comma") 481 case r == arrayEnd: 482 // NOTE(caleb): The spec isn't clear about whether you can have 483 // a trailing comma or not, so we'll allow it. 484 return lexArrayEnd 485 } 486 487 lx.backup() 488 lx.push(lexArrayValueEnd) 489 return lexValue 490} 491 492// lexArrayValueEnd consumes everything between the end of an array value and 493// the next value (or the end of the array): it ignores whitespace and newlines 494// and expects either a ',' or a ']'. 495func lexArrayValueEnd(lx *lexer) stateFn { 496 r := lx.next() 497 switch { 498 case isWhitespace(r) || isNL(r): 499 return lexSkip(lx, lexArrayValueEnd) 500 case r == commentStart: 501 lx.push(lexArrayValueEnd) 502 return lexCommentStart 503 case r == comma: 504 lx.ignore() 505 return lexArrayValue // move on to the next value 506 case r == arrayEnd: 507 return lexArrayEnd 508 } 509 return lx.errorf( 510 "expected a comma or array terminator %q, but got %q instead", 511 arrayEnd, r, 512 ) 513} 514 515// lexArrayEnd finishes the lexing of an array. 516// It assumes that a ']' has just been consumed. 517func lexArrayEnd(lx *lexer) stateFn { 518 lx.ignore() 519 lx.emit(itemArrayEnd) 520 return lx.pop() 521} 522 523// lexInlineTableValue consumes one key/value pair in an inline table. 524// It assumes that '{' or ',' have already been consumed. Whitespace is ignored. 525func lexInlineTableValue(lx *lexer) stateFn { 526 r := lx.next() 527 switch { 528 case isWhitespace(r): 529 return lexSkip(lx, lexInlineTableValue) 530 case isNL(r): 531 return lx.errorf("newlines not allowed within inline tables") 532 case r == commentStart: 533 lx.push(lexInlineTableValue) 534 return lexCommentStart 535 case r == comma: 536 return lx.errorf("unexpected comma") 537 case r == inlineTableEnd: 538 return lexInlineTableEnd 539 } 540 lx.backup() 541 lx.push(lexInlineTableValueEnd) 542 return lexKeyStart 543} 544 545// lexInlineTableValueEnd consumes everything between the end of an inline table 546// key/value pair and the next pair (or the end of the table): 547// it ignores whitespace and expects either a ',' or a '}'. 548func lexInlineTableValueEnd(lx *lexer) stateFn { 549 r := lx.next() 550 switch { 551 case isWhitespace(r): 552 return lexSkip(lx, lexInlineTableValueEnd) 553 case isNL(r): 554 return lx.errorf("newlines not allowed within inline tables") 555 case r == commentStart: 556 lx.push(lexInlineTableValueEnd) 557 return lexCommentStart 558 case r == comma: 559 lx.ignore() 560 return lexInlineTableValue 561 case r == inlineTableEnd: 562 return lexInlineTableEnd 563 } 564 return lx.errorf("expected a comma or an inline table terminator %q, "+ 565 "but got %q instead", inlineTableEnd, r) 566} 567 568// lexInlineTableEnd finishes the lexing of an inline table. 569// It assumes that a '}' has just been consumed. 570func lexInlineTableEnd(lx *lexer) stateFn { 571 lx.ignore() 572 lx.emit(itemInlineTableEnd) 573 return lx.pop() 574} 575 576// lexString consumes the inner contents of a string. It assumes that the 577// beginning '"' has already been consumed and ignored. 578func lexString(lx *lexer) stateFn { 579 r := lx.next() 580 switch { 581 case r == eof: 582 return lx.errorf("unexpected EOF") 583 case isNL(r): 584 return lx.errorf("strings cannot contain newlines") 585 case r == '\\': 586 lx.push(lexString) 587 return lexStringEscape 588 case r == stringEnd: 589 lx.backup() 590 lx.emit(itemString) 591 lx.next() 592 lx.ignore() 593 return lx.pop() 594 } 595 return lexString 596} 597 598// lexMultilineString consumes the inner contents of a string. It assumes that 599// the beginning '"""' has already been consumed and ignored. 600func lexMultilineString(lx *lexer) stateFn { 601 switch lx.next() { 602 case eof: 603 return lx.errorf("unexpected EOF") 604 case '\\': 605 return lexMultilineStringEscape 606 case stringEnd: 607 if lx.accept(stringEnd) { 608 if lx.accept(stringEnd) { 609 lx.backup() 610 lx.backup() 611 lx.backup() 612 lx.emit(itemMultilineString) 613 lx.next() 614 lx.next() 615 lx.next() 616 lx.ignore() 617 return lx.pop() 618 } 619 lx.backup() 620 } 621 } 622 return lexMultilineString 623} 624 625// lexRawString consumes a raw string. Nothing can be escaped in such a string. 626// It assumes that the beginning "'" has already been consumed and ignored. 627func lexRawString(lx *lexer) stateFn { 628 r := lx.next() 629 switch { 630 case r == eof: 631 return lx.errorf("unexpected EOF") 632 case isNL(r): 633 return lx.errorf("strings cannot contain newlines") 634 case r == rawStringEnd: 635 lx.backup() 636 lx.emit(itemRawString) 637 lx.next() 638 lx.ignore() 639 return lx.pop() 640 } 641 return lexRawString 642} 643 644// lexMultilineRawString consumes a raw string. Nothing can be escaped in such 645// a string. It assumes that the beginning "'''" has already been consumed and 646// ignored. 647func lexMultilineRawString(lx *lexer) stateFn { 648 switch lx.next() { 649 case eof: 650 return lx.errorf("unexpected EOF") 651 case rawStringEnd: 652 if lx.accept(rawStringEnd) { 653 if lx.accept(rawStringEnd) { 654 lx.backup() 655 lx.backup() 656 lx.backup() 657 lx.emit(itemRawMultilineString) 658 lx.next() 659 lx.next() 660 lx.next() 661 lx.ignore() 662 return lx.pop() 663 } 664 lx.backup() 665 } 666 } 667 return lexMultilineRawString 668} 669 670// lexMultilineStringEscape consumes an escaped character. It assumes that the 671// preceding '\\' has already been consumed. 672func lexMultilineStringEscape(lx *lexer) stateFn { 673 // Handle the special case first: 674 if isNL(lx.next()) { 675 return lexMultilineString 676 } 677 lx.backup() 678 lx.push(lexMultilineString) 679 return lexStringEscape(lx) 680} 681 682func lexStringEscape(lx *lexer) stateFn { 683 r := lx.next() 684 switch r { 685 case 'b': 686 fallthrough 687 case 't': 688 fallthrough 689 case 'n': 690 fallthrough 691 case 'f': 692 fallthrough 693 case 'r': 694 fallthrough 695 case '"': 696 fallthrough 697 case '\\': 698 return lx.pop() 699 case 'u': 700 return lexShortUnicodeEscape 701 case 'U': 702 return lexLongUnicodeEscape 703 } 704 return lx.errorf("invalid escape character %q; only the following "+ 705 "escape characters are allowed: "+ 706 `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r) 707} 708 709func lexShortUnicodeEscape(lx *lexer) stateFn { 710 var r rune 711 for i := 0; i < 4; i++ { 712 r = lx.next() 713 if !isHexadecimal(r) { 714 return lx.errorf(`expected four hexadecimal digits after '\u', `+ 715 "but got %q instead", lx.current()) 716 } 717 } 718 return lx.pop() 719} 720 721func lexLongUnicodeEscape(lx *lexer) stateFn { 722 var r rune 723 for i := 0; i < 8; i++ { 724 r = lx.next() 725 if !isHexadecimal(r) { 726 return lx.errorf(`expected eight hexadecimal digits after '\U', `+ 727 "but got %q instead", lx.current()) 728 } 729 } 730 return lx.pop() 731} 732 733// lexNumberOrDateStart consumes either an integer, a float, or datetime. 734func lexNumberOrDateStart(lx *lexer) stateFn { 735 r := lx.next() 736 if isDigit(r) { 737 return lexNumberOrDate 738 } 739 switch r { 740 case '_': 741 return lexNumber 742 case 'e', 'E': 743 return lexFloat 744 case '.': 745 return lx.errorf("floats must start with a digit, not '.'") 746 } 747 return lx.errorf("expected a digit but got %q", r) 748} 749 750// lexNumberOrDate consumes either an integer, float or datetime. 751func lexNumberOrDate(lx *lexer) stateFn { 752 r := lx.next() 753 if isDigit(r) { 754 return lexNumberOrDate 755 } 756 switch r { 757 case '-': 758 return lexDatetime 759 case '_': 760 return lexNumber 761 case '.', 'e', 'E': 762 return lexFloat 763 } 764 765 lx.backup() 766 lx.emit(itemInteger) 767 return lx.pop() 768} 769 770// lexDatetime consumes a Datetime, to a first approximation. 771// The parser validates that it matches one of the accepted formats. 772func lexDatetime(lx *lexer) stateFn { 773 r := lx.next() 774 if isDigit(r) { 775 return lexDatetime 776 } 777 switch r { 778 case '-', 'T', ':', '.', 'Z': 779 return lexDatetime 780 } 781 782 lx.backup() 783 lx.emit(itemDatetime) 784 return lx.pop() 785} 786 787// lexNumberStart consumes either an integer or a float. It assumes that a sign 788// has already been read, but that *no* digits have been consumed. 789// lexNumberStart will move to the appropriate integer or float states. 790func lexNumberStart(lx *lexer) stateFn { 791 // We MUST see a digit. Even floats have to start with a digit. 792 r := lx.next() 793 if !isDigit(r) { 794 if r == '.' { 795 return lx.errorf("floats must start with a digit, not '.'") 796 } 797 return lx.errorf("expected a digit but got %q", r) 798 } 799 return lexNumber 800} 801 802// lexNumber consumes an integer or a float after seeing the first digit. 803func lexNumber(lx *lexer) stateFn { 804 r := lx.next() 805 if isDigit(r) { 806 return lexNumber 807 } 808 switch r { 809 case '_': 810 return lexNumber 811 case '.', 'e', 'E': 812 return lexFloat 813 } 814 815 lx.backup() 816 lx.emit(itemInteger) 817 return lx.pop() 818} 819 820// lexFloat consumes the elements of a float. It allows any sequence of 821// float-like characters, so floats emitted by the lexer are only a first 822// approximation and must be validated by the parser. 823func lexFloat(lx *lexer) stateFn { 824 r := lx.next() 825 if isDigit(r) { 826 return lexFloat 827 } 828 switch r { 829 case '_', '.', '-', '+', 'e', 'E': 830 return lexFloat 831 } 832 833 lx.backup() 834 lx.emit(itemFloat) 835 return lx.pop() 836} 837 838// lexBool consumes a bool string: 'true' or 'false. 839func lexBool(lx *lexer) stateFn { 840 var rs []rune 841 for { 842 r := lx.next() 843 if !unicode.IsLetter(r) { 844 lx.backup() 845 break 846 } 847 rs = append(rs, r) 848 } 849 s := string(rs) 850 switch s { 851 case "true", "false": 852 lx.emit(itemBool) 853 return lx.pop() 854 } 855 return lx.errorf("expected value but found %q instead", s) 856} 857 858// lexCommentStart begins the lexing of a comment. It will emit 859// itemCommentStart and consume no characters, passing control to lexComment. 860func lexCommentStart(lx *lexer) stateFn { 861 lx.ignore() 862 lx.emit(itemCommentStart) 863 return lexComment 864} 865 866// lexComment lexes an entire comment. It assumes that '#' has been consumed. 867// It will consume *up to* the first newline character, and pass control 868// back to the last state on the stack. 869func lexComment(lx *lexer) stateFn { 870 r := lx.peek() 871 if isNL(r) || r == eof { 872 lx.emit(itemText) 873 return lx.pop() 874 } 875 lx.next() 876 return lexComment 877} 878 879// lexSkip ignores all slurped input and moves on to the next state. 880func lexSkip(lx *lexer, nextState stateFn) stateFn { 881 return func(lx *lexer) stateFn { 882 lx.ignore() 883 return nextState 884 } 885} 886 887// isWhitespace returns true if `r` is a whitespace character according 888// to the spec. 889func isWhitespace(r rune) bool { 890 return r == '\t' || r == ' ' 891} 892 893func isNL(r rune) bool { 894 return r == '\n' || r == '\r' 895} 896 897func isDigit(r rune) bool { 898 return r >= '0' && r <= '9' 899} 900 901func isHexadecimal(r rune) bool { 902 return (r >= '0' && r <= '9') || 903 (r >= 'a' && r <= 'f') || 904 (r >= 'A' && r <= 'F') 905} 906 907func isBareKeyChar(r rune) bool { 908 return (r >= 'A' && r <= 'Z') || 909 (r >= 'a' && r <= 'z') || 910 (r >= '0' && r <= '9') || 911 r == '_' || 912 r == '-' 913} 914 915func (itype itemType) String() string { 916 switch itype { 917 case itemError: 918 return "Error" 919 case itemNIL: 920 return "NIL" 921 case itemEOF: 922 return "EOF" 923 case itemText: 924 return "Text" 925 case itemString, itemRawString, itemMultilineString, itemRawMultilineString: 926 return "String" 927 case itemBool: 928 return "Bool" 929 case itemInteger: 930 return "Integer" 931 case itemFloat: 932 return "Float" 933 case itemDatetime: 934 return "DateTime" 935 case itemTableStart: 936 return "TableStart" 937 case itemTableEnd: 938 return "TableEnd" 939 case itemKeyStart: 940 return "KeyStart" 941 case itemArray: 942 return "Array" 943 case itemArrayEnd: 944 return "ArrayEnd" 945 case itemCommentStart: 946 return "CommentStart" 947 } 948 panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype))) 949} 950 951func (item item) String() string { 952 return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) 953} 954