1package toml
2
3import (
4	"fmt"
5	"strings"
6	"unicode"
7	"unicode/utf8"
8)
9
10type itemType int
11
12const (
13	itemError itemType = iota
14	itemNIL            // used in the parser to indicate no type
15	itemEOF
16	itemText
17	itemString
18	itemRawString
19	itemMultilineString
20	itemRawMultilineString
21	itemBool
22	itemInteger
23	itemFloat
24	itemDatetime
25	itemArray // the start of an array
26	itemArrayEnd
27	itemTableStart
28	itemTableEnd
29	itemArrayTableStart
30	itemArrayTableEnd
31	itemKeyStart
32	itemCommentStart
33	itemInlineTableStart
34	itemInlineTableEnd
35)
36
37const (
38	eof              = 0
39	comma            = ','
40	tableStart       = '['
41	tableEnd         = ']'
42	arrayTableStart  = '['
43	arrayTableEnd    = ']'
44	tableSep         = '.'
45	keySep           = '='
46	arrayStart       = '['
47	arrayEnd         = ']'
48	commentStart     = '#'
49	stringStart      = '"'
50	stringEnd        = '"'
51	rawStringStart   = '\''
52	rawStringEnd     = '\''
53	inlineTableStart = '{'
54	inlineTableEnd   = '}'
55)
56
57type stateFn func(lx *lexer) stateFn
58
59type lexer struct {
60	input string
61	start int
62	pos   int
63	line  int
64	state stateFn
65	items chan item
66
67	// Allow for backing up up to three runes.
68	// This is necessary because TOML contains 3-rune tokens (""" and ''').
69	prevWidths [3]int
70	nprev      int // how many of prevWidths are in use
71	// If we emit an eof, we can still back up, but it is not OK to call
72	// next again.
73	atEOF bool
74
75	// A stack of state functions used to maintain context.
76	// The idea is to reuse parts of the state machine in various places.
77	// For example, values can appear at the top level or within arbitrarily
78	// nested arrays. The last state on the stack is used after a value has
79	// been lexed. Similarly for comments.
80	stack []stateFn
81}
82
83type item struct {
84	typ  itemType
85	val  string
86	line int
87}
88
89func (lx *lexer) nextItem() item {
90	for {
91		select {
92		case item := <-lx.items:
93			return item
94		default:
95			lx.state = lx.state(lx)
96		}
97	}
98}
99
100func lex(input string) *lexer {
101	lx := &lexer{
102		input: input,
103		state: lexTop,
104		line:  1,
105		items: make(chan item, 10),
106		stack: make([]stateFn, 0, 10),
107	}
108	return lx
109}
110
111func (lx *lexer) push(state stateFn) {
112	lx.stack = append(lx.stack, state)
113}
114
115func (lx *lexer) pop() stateFn {
116	if len(lx.stack) == 0 {
117		return lx.errorf("BUG in lexer: no states to pop")
118	}
119	last := lx.stack[len(lx.stack)-1]
120	lx.stack = lx.stack[0 : len(lx.stack)-1]
121	return last
122}
123
124func (lx *lexer) current() string {
125	return lx.input[lx.start:lx.pos]
126}
127
128func (lx *lexer) emit(typ itemType) {
129	lx.items <- item{typ, lx.current(), lx.line}
130	lx.start = lx.pos
131}
132
133func (lx *lexer) emitTrim(typ itemType) {
134	lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
135	lx.start = lx.pos
136}
137
138func (lx *lexer) next() (r rune) {
139	if lx.atEOF {
140		panic("next called after EOF")
141	}
142	if lx.pos >= len(lx.input) {
143		lx.atEOF = true
144		return eof
145	}
146
147	if lx.input[lx.pos] == '\n' {
148		lx.line++
149	}
150	lx.prevWidths[2] = lx.prevWidths[1]
151	lx.prevWidths[1] = lx.prevWidths[0]
152	if lx.nprev < 3 {
153		lx.nprev++
154	}
155	r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
156	lx.prevWidths[0] = w
157	lx.pos += w
158	return r
159}
160
161// ignore skips over the pending input before this point.
162func (lx *lexer) ignore() {
163	lx.start = lx.pos
164}
165
166// backup steps back one rune. Can be called only twice between calls to next.
167func (lx *lexer) backup() {
168	if lx.atEOF {
169		lx.atEOF = false
170		return
171	}
172	if lx.nprev < 1 {
173		panic("backed up too far")
174	}
175	w := lx.prevWidths[0]
176	lx.prevWidths[0] = lx.prevWidths[1]
177	lx.prevWidths[1] = lx.prevWidths[2]
178	lx.nprev--
179	lx.pos -= w
180	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
181		lx.line--
182	}
183}
184
185// accept consumes the next rune if it's equal to `valid`.
186func (lx *lexer) accept(valid rune) bool {
187	if lx.next() == valid {
188		return true
189	}
190	lx.backup()
191	return false
192}
193
194// peek returns but does not consume the next rune in the input.
195func (lx *lexer) peek() rune {
196	r := lx.next()
197	lx.backup()
198	return r
199}
200
201// skip ignores all input that matches the given predicate.
202func (lx *lexer) skip(pred func(rune) bool) {
203	for {
204		r := lx.next()
205		if pred(r) {
206			continue
207		}
208		lx.backup()
209		lx.ignore()
210		return
211	}
212}
213
214// errorf stops all lexing by emitting an error and returning `nil`.
215// Note that any value that is a character is escaped if it's a special
216// character (newlines, tabs, etc.).
217func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
218	lx.items <- item{
219		itemError,
220		fmt.Sprintf(format, values...),
221		lx.line,
222	}
223	return nil
224}
225
226// lexTop consumes elements at the top level of TOML data.
227func lexTop(lx *lexer) stateFn {
228	r := lx.next()
229	if isWhitespace(r) || isNL(r) {
230		return lexSkip(lx, lexTop)
231	}
232	switch r {
233	case commentStart:
234		lx.push(lexTop)
235		return lexCommentStart
236	case tableStart:
237		return lexTableStart
238	case eof:
239		if lx.pos > lx.start {
240			return lx.errorf("unexpected EOF")
241		}
242		lx.emit(itemEOF)
243		return nil
244	}
245
246	// At this point, the only valid item can be a key, so we back up
247	// and let the key lexer do the rest.
248	lx.backup()
249	lx.push(lexTopEnd)
250	return lexKeyStart
251}
252
253// lexTopEnd is entered whenever a top-level item has been consumed. (A value
254// or a table.) It must see only whitespace, and will turn back to lexTop
255// upon a newline. If it sees EOF, it will quit the lexer successfully.
256func lexTopEnd(lx *lexer) stateFn {
257	r := lx.next()
258	switch {
259	case r == commentStart:
260		// a comment will read to a newline for us.
261		lx.push(lexTop)
262		return lexCommentStart
263	case isWhitespace(r):
264		return lexTopEnd
265	case isNL(r):
266		lx.ignore()
267		return lexTop
268	case r == eof:
269		lx.emit(itemEOF)
270		return nil
271	}
272	return lx.errorf("expected a top-level item to end with a newline, "+
273		"comment, or EOF, but got %q instead", r)
274}
275
276// lexTable lexes the beginning of a table. Namely, it makes sure that
277// it starts with a character other than '.' and ']'.
278// It assumes that '[' has already been consumed.
279// It also handles the case that this is an item in an array of tables.
280// e.g., '[[name]]'.
281func lexTableStart(lx *lexer) stateFn {
282	if lx.peek() == arrayTableStart {
283		lx.next()
284		lx.emit(itemArrayTableStart)
285		lx.push(lexArrayTableEnd)
286	} else {
287		lx.emit(itemTableStart)
288		lx.push(lexTableEnd)
289	}
290	return lexTableNameStart
291}
292
293func lexTableEnd(lx *lexer) stateFn {
294	lx.emit(itemTableEnd)
295	return lexTopEnd
296}
297
298func lexArrayTableEnd(lx *lexer) stateFn {
299	if r := lx.next(); r != arrayTableEnd {
300		return lx.errorf("expected end of table array name delimiter %q, "+
301			"but got %q instead", arrayTableEnd, r)
302	}
303	lx.emit(itemArrayTableEnd)
304	return lexTopEnd
305}
306
307func lexTableNameStart(lx *lexer) stateFn {
308	lx.skip(isWhitespace)
309	switch r := lx.peek(); {
310	case r == tableEnd || r == eof:
311		return lx.errorf("unexpected end of table name " +
312			"(table names cannot be empty)")
313	case r == tableSep:
314		return lx.errorf("unexpected table separator " +
315			"(table names cannot be empty)")
316	case r == stringStart || r == rawStringStart:
317		lx.ignore()
318		lx.push(lexTableNameEnd)
319		return lexValue // reuse string lexing
320	default:
321		return lexBareTableName
322	}
323}
324
325// lexBareTableName lexes the name of a table. It assumes that at least one
326// valid character for the table has already been read.
327func lexBareTableName(lx *lexer) stateFn {
328	r := lx.next()
329	if isBareKeyChar(r) {
330		return lexBareTableName
331	}
332	lx.backup()
333	lx.emit(itemText)
334	return lexTableNameEnd
335}
336
337// lexTableNameEnd reads the end of a piece of a table name, optionally
338// consuming whitespace.
339func lexTableNameEnd(lx *lexer) stateFn {
340	lx.skip(isWhitespace)
341	switch r := lx.next(); {
342	case isWhitespace(r):
343		return lexTableNameEnd
344	case r == tableSep:
345		lx.ignore()
346		return lexTableNameStart
347	case r == tableEnd:
348		return lx.pop()
349	default:
350		return lx.errorf("expected '.' or ']' to end table name, "+
351			"but got %q instead", r)
352	}
353}
354
355// lexKeyStart consumes a key name up until the first non-whitespace character.
356// lexKeyStart will ignore whitespace.
357func lexKeyStart(lx *lexer) stateFn {
358	r := lx.peek()
359	switch {
360	case r == keySep:
361		return lx.errorf("unexpected key separator %q", keySep)
362	case isWhitespace(r) || isNL(r):
363		lx.next()
364		return lexSkip(lx, lexKeyStart)
365	case r == stringStart || r == rawStringStart:
366		lx.ignore()
367		lx.emit(itemKeyStart)
368		lx.push(lexKeyEnd)
369		return lexValue // reuse string lexing
370	default:
371		lx.ignore()
372		lx.emit(itemKeyStart)
373		return lexBareKey
374	}
375}
376
377// lexBareKey consumes the text of a bare key. Assumes that the first character
378// (which is not whitespace) has not yet been consumed.
379func lexBareKey(lx *lexer) stateFn {
380	switch r := lx.next(); {
381	case isBareKeyChar(r):
382		return lexBareKey
383	case isWhitespace(r):
384		lx.backup()
385		lx.emit(itemText)
386		return lexKeyEnd
387	case r == keySep:
388		lx.backup()
389		lx.emit(itemText)
390		return lexKeyEnd
391	default:
392		return lx.errorf("bare keys cannot contain %q", r)
393	}
394}
395
396// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
397// separator).
398func lexKeyEnd(lx *lexer) stateFn {
399	switch r := lx.next(); {
400	case r == keySep:
401		return lexSkip(lx, lexValue)
402	case isWhitespace(r):
403		return lexSkip(lx, lexKeyEnd)
404	default:
405		return lx.errorf("expected key separator %q, but got %q instead",
406			keySep, r)
407	}
408}
409
410// lexValue starts the consumption of a value anywhere a value is expected.
411// lexValue will ignore whitespace.
412// After a value is lexed, the last state on the next is popped and returned.
413func lexValue(lx *lexer) stateFn {
414	// We allow whitespace to precede a value, but NOT newlines.
415	// In array syntax, the array states are responsible for ignoring newlines.
416	r := lx.next()
417	switch {
418	case isWhitespace(r):
419		return lexSkip(lx, lexValue)
420	case isDigit(r):
421		lx.backup() // avoid an extra state and use the same as above
422		return lexNumberOrDateStart
423	}
424	switch r {
425	case arrayStart:
426		lx.ignore()
427		lx.emit(itemArray)
428		return lexArrayValue
429	case inlineTableStart:
430		lx.ignore()
431		lx.emit(itemInlineTableStart)
432		return lexInlineTableValue
433	case stringStart:
434		if lx.accept(stringStart) {
435			if lx.accept(stringStart) {
436				lx.ignore() // Ignore """
437				return lexMultilineString
438			}
439			lx.backup()
440		}
441		lx.ignore() // ignore the '"'
442		return lexString
443	case rawStringStart:
444		if lx.accept(rawStringStart) {
445			if lx.accept(rawStringStart) {
446				lx.ignore() // Ignore """
447				return lexMultilineRawString
448			}
449			lx.backup()
450		}
451		lx.ignore() // ignore the "'"
452		return lexRawString
453	case '+', '-':
454		return lexNumberStart
455	case '.': // special error case, be kind to users
456		return lx.errorf("floats must start with a digit, not '.'")
457	}
458	if unicode.IsLetter(r) {
459		// Be permissive here; lexBool will give a nice error if the
460		// user wrote something like
461		//   x = foo
462		// (i.e. not 'true' or 'false' but is something else word-like.)
463		lx.backup()
464		return lexBool
465	}
466	return lx.errorf("expected value but found %q instead", r)
467}
468
469// lexArrayValue consumes one value in an array. It assumes that '[' or ','
470// have already been consumed. All whitespace and newlines are ignored.
471func lexArrayValue(lx *lexer) stateFn {
472	r := lx.next()
473	switch {
474	case isWhitespace(r) || isNL(r):
475		return lexSkip(lx, lexArrayValue)
476	case r == commentStart:
477		lx.push(lexArrayValue)
478		return lexCommentStart
479	case r == comma:
480		return lx.errorf("unexpected comma")
481	case r == arrayEnd:
482		// NOTE(caleb): The spec isn't clear about whether you can have
483		// a trailing comma or not, so we'll allow it.
484		return lexArrayEnd
485	}
486
487	lx.backup()
488	lx.push(lexArrayValueEnd)
489	return lexValue
490}
491
492// lexArrayValueEnd consumes everything between the end of an array value and
493// the next value (or the end of the array): it ignores whitespace and newlines
494// and expects either a ',' or a ']'.
495func lexArrayValueEnd(lx *lexer) stateFn {
496	r := lx.next()
497	switch {
498	case isWhitespace(r) || isNL(r):
499		return lexSkip(lx, lexArrayValueEnd)
500	case r == commentStart:
501		lx.push(lexArrayValueEnd)
502		return lexCommentStart
503	case r == comma:
504		lx.ignore()
505		return lexArrayValue // move on to the next value
506	case r == arrayEnd:
507		return lexArrayEnd
508	}
509	return lx.errorf(
510		"expected a comma or array terminator %q, but got %q instead",
511		arrayEnd, r,
512	)
513}
514
515// lexArrayEnd finishes the lexing of an array.
516// It assumes that a ']' has just been consumed.
517func lexArrayEnd(lx *lexer) stateFn {
518	lx.ignore()
519	lx.emit(itemArrayEnd)
520	return lx.pop()
521}
522
523// lexInlineTableValue consumes one key/value pair in an inline table.
524// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
525func lexInlineTableValue(lx *lexer) stateFn {
526	r := lx.next()
527	switch {
528	case isWhitespace(r):
529		return lexSkip(lx, lexInlineTableValue)
530	case isNL(r):
531		return lx.errorf("newlines not allowed within inline tables")
532	case r == commentStart:
533		lx.push(lexInlineTableValue)
534		return lexCommentStart
535	case r == comma:
536		return lx.errorf("unexpected comma")
537	case r == inlineTableEnd:
538		return lexInlineTableEnd
539	}
540	lx.backup()
541	lx.push(lexInlineTableValueEnd)
542	return lexKeyStart
543}
544
545// lexInlineTableValueEnd consumes everything between the end of an inline table
546// key/value pair and the next pair (or the end of the table):
547// it ignores whitespace and expects either a ',' or a '}'.
548func lexInlineTableValueEnd(lx *lexer) stateFn {
549	r := lx.next()
550	switch {
551	case isWhitespace(r):
552		return lexSkip(lx, lexInlineTableValueEnd)
553	case isNL(r):
554		return lx.errorf("newlines not allowed within inline tables")
555	case r == commentStart:
556		lx.push(lexInlineTableValueEnd)
557		return lexCommentStart
558	case r == comma:
559		lx.ignore()
560		return lexInlineTableValue
561	case r == inlineTableEnd:
562		return lexInlineTableEnd
563	}
564	return lx.errorf("expected a comma or an inline table terminator %q, "+
565		"but got %q instead", inlineTableEnd, r)
566}
567
568// lexInlineTableEnd finishes the lexing of an inline table.
569// It assumes that a '}' has just been consumed.
570func lexInlineTableEnd(lx *lexer) stateFn {
571	lx.ignore()
572	lx.emit(itemInlineTableEnd)
573	return lx.pop()
574}
575
576// lexString consumes the inner contents of a string. It assumes that the
577// beginning '"' has already been consumed and ignored.
578func lexString(lx *lexer) stateFn {
579	r := lx.next()
580	switch {
581	case r == eof:
582		return lx.errorf("unexpected EOF")
583	case isNL(r):
584		return lx.errorf("strings cannot contain newlines")
585	case r == '\\':
586		lx.push(lexString)
587		return lexStringEscape
588	case r == stringEnd:
589		lx.backup()
590		lx.emit(itemString)
591		lx.next()
592		lx.ignore()
593		return lx.pop()
594	}
595	return lexString
596}
597
598// lexMultilineString consumes the inner contents of a string. It assumes that
599// the beginning '"""' has already been consumed and ignored.
600func lexMultilineString(lx *lexer) stateFn {
601	switch lx.next() {
602	case eof:
603		return lx.errorf("unexpected EOF")
604	case '\\':
605		return lexMultilineStringEscape
606	case stringEnd:
607		if lx.accept(stringEnd) {
608			if lx.accept(stringEnd) {
609				lx.backup()
610				lx.backup()
611				lx.backup()
612				lx.emit(itemMultilineString)
613				lx.next()
614				lx.next()
615				lx.next()
616				lx.ignore()
617				return lx.pop()
618			}
619			lx.backup()
620		}
621	}
622	return lexMultilineString
623}
624
625// lexRawString consumes a raw string. Nothing can be escaped in such a string.
626// It assumes that the beginning "'" has already been consumed and ignored.
627func lexRawString(lx *lexer) stateFn {
628	r := lx.next()
629	switch {
630	case r == eof:
631		return lx.errorf("unexpected EOF")
632	case isNL(r):
633		return lx.errorf("strings cannot contain newlines")
634	case r == rawStringEnd:
635		lx.backup()
636		lx.emit(itemRawString)
637		lx.next()
638		lx.ignore()
639		return lx.pop()
640	}
641	return lexRawString
642}
643
644// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
645// a string. It assumes that the beginning "'''" has already been consumed and
646// ignored.
647func lexMultilineRawString(lx *lexer) stateFn {
648	switch lx.next() {
649	case eof:
650		return lx.errorf("unexpected EOF")
651	case rawStringEnd:
652		if lx.accept(rawStringEnd) {
653			if lx.accept(rawStringEnd) {
654				lx.backup()
655				lx.backup()
656				lx.backup()
657				lx.emit(itemRawMultilineString)
658				lx.next()
659				lx.next()
660				lx.next()
661				lx.ignore()
662				return lx.pop()
663			}
664			lx.backup()
665		}
666	}
667	return lexMultilineRawString
668}
669
670// lexMultilineStringEscape consumes an escaped character. It assumes that the
671// preceding '\\' has already been consumed.
672func lexMultilineStringEscape(lx *lexer) stateFn {
673	// Handle the special case first:
674	if isNL(lx.next()) {
675		return lexMultilineString
676	}
677	lx.backup()
678	lx.push(lexMultilineString)
679	return lexStringEscape(lx)
680}
681
682func lexStringEscape(lx *lexer) stateFn {
683	r := lx.next()
684	switch r {
685	case 'b':
686		fallthrough
687	case 't':
688		fallthrough
689	case 'n':
690		fallthrough
691	case 'f':
692		fallthrough
693	case 'r':
694		fallthrough
695	case '"':
696		fallthrough
697	case '\\':
698		return lx.pop()
699	case 'u':
700		return lexShortUnicodeEscape
701	case 'U':
702		return lexLongUnicodeEscape
703	}
704	return lx.errorf("invalid escape character %q; only the following "+
705		"escape characters are allowed: "+
706		`\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
707}
708
709func lexShortUnicodeEscape(lx *lexer) stateFn {
710	var r rune
711	for i := 0; i < 4; i++ {
712		r = lx.next()
713		if !isHexadecimal(r) {
714			return lx.errorf(`expected four hexadecimal digits after '\u', `+
715				"but got %q instead", lx.current())
716		}
717	}
718	return lx.pop()
719}
720
721func lexLongUnicodeEscape(lx *lexer) stateFn {
722	var r rune
723	for i := 0; i < 8; i++ {
724		r = lx.next()
725		if !isHexadecimal(r) {
726			return lx.errorf(`expected eight hexadecimal digits after '\U', `+
727				"but got %q instead", lx.current())
728		}
729	}
730	return lx.pop()
731}
732
733// lexNumberOrDateStart consumes either an integer, a float, or datetime.
734func lexNumberOrDateStart(lx *lexer) stateFn {
735	r := lx.next()
736	if isDigit(r) {
737		return lexNumberOrDate
738	}
739	switch r {
740	case '_':
741		return lexNumber
742	case 'e', 'E':
743		return lexFloat
744	case '.':
745		return lx.errorf("floats must start with a digit, not '.'")
746	}
747	return lx.errorf("expected a digit but got %q", r)
748}
749
750// lexNumberOrDate consumes either an integer, float or datetime.
751func lexNumberOrDate(lx *lexer) stateFn {
752	r := lx.next()
753	if isDigit(r) {
754		return lexNumberOrDate
755	}
756	switch r {
757	case '-':
758		return lexDatetime
759	case '_':
760		return lexNumber
761	case '.', 'e', 'E':
762		return lexFloat
763	}
764
765	lx.backup()
766	lx.emit(itemInteger)
767	return lx.pop()
768}
769
770// lexDatetime consumes a Datetime, to a first approximation.
771// The parser validates that it matches one of the accepted formats.
772func lexDatetime(lx *lexer) stateFn {
773	r := lx.next()
774	if isDigit(r) {
775		return lexDatetime
776	}
777	switch r {
778	case '-', 'T', ':', '.', 'Z', '+':
779		return lexDatetime
780	}
781
782	lx.backup()
783	lx.emit(itemDatetime)
784	return lx.pop()
785}
786
787// lexNumberStart consumes either an integer or a float. It assumes that a sign
788// has already been read, but that *no* digits have been consumed.
789// lexNumberStart will move to the appropriate integer or float states.
790func lexNumberStart(lx *lexer) stateFn {
791	// We MUST see a digit. Even floats have to start with a digit.
792	r := lx.next()
793	if !isDigit(r) {
794		if r == '.' {
795			return lx.errorf("floats must start with a digit, not '.'")
796		}
797		return lx.errorf("expected a digit but got %q", r)
798	}
799	return lexNumber
800}
801
802// lexNumber consumes an integer or a float after seeing the first digit.
803func lexNumber(lx *lexer) stateFn {
804	r := lx.next()
805	if isDigit(r) {
806		return lexNumber
807	}
808	switch r {
809	case '_':
810		return lexNumber
811	case '.', 'e', 'E':
812		return lexFloat
813	}
814
815	lx.backup()
816	lx.emit(itemInteger)
817	return lx.pop()
818}
819
820// lexFloat consumes the elements of a float. It allows any sequence of
821// float-like characters, so floats emitted by the lexer are only a first
822// approximation and must be validated by the parser.
823func lexFloat(lx *lexer) stateFn {
824	r := lx.next()
825	if isDigit(r) {
826		return lexFloat
827	}
828	switch r {
829	case '_', '.', '-', '+', 'e', 'E':
830		return lexFloat
831	}
832
833	lx.backup()
834	lx.emit(itemFloat)
835	return lx.pop()
836}
837
838// lexBool consumes a bool string: 'true' or 'false.
839func lexBool(lx *lexer) stateFn {
840	var rs []rune
841	for {
842		r := lx.next()
843		if !unicode.IsLetter(r) {
844			lx.backup()
845			break
846		}
847		rs = append(rs, r)
848	}
849	s := string(rs)
850	switch s {
851	case "true", "false":
852		lx.emit(itemBool)
853		return lx.pop()
854	}
855	return lx.errorf("expected value but found %q instead", s)
856}
857
858// lexCommentStart begins the lexing of a comment. It will emit
859// itemCommentStart and consume no characters, passing control to lexComment.
860func lexCommentStart(lx *lexer) stateFn {
861	lx.ignore()
862	lx.emit(itemCommentStart)
863	return lexComment
864}
865
866// lexComment lexes an entire comment. It assumes that '#' has been consumed.
867// It will consume *up to* the first newline character, and pass control
868// back to the last state on the stack.
869func lexComment(lx *lexer) stateFn {
870	r := lx.peek()
871	if isNL(r) || r == eof {
872		lx.emit(itemText)
873		return lx.pop()
874	}
875	lx.next()
876	return lexComment
877}
878
879// lexSkip ignores all slurped input and moves on to the next state.
880func lexSkip(lx *lexer, nextState stateFn) stateFn {
881	return func(lx *lexer) stateFn {
882		lx.ignore()
883		return nextState
884	}
885}
886
887// isWhitespace returns true if `r` is a whitespace character according
888// to the spec.
889func isWhitespace(r rune) bool {
890	return r == '\t' || r == ' '
891}
892
893func isNL(r rune) bool {
894	return r == '\n' || r == '\r'
895}
896
897func isDigit(r rune) bool {
898	return r >= '0' && r <= '9'
899}
900
901func isHexadecimal(r rune) bool {
902	return (r >= '0' && r <= '9') ||
903		(r >= 'a' && r <= 'f') ||
904		(r >= 'A' && r <= 'F')
905}
906
907func isBareKeyChar(r rune) bool {
908	return (r >= 'A' && r <= 'Z') ||
909		(r >= 'a' && r <= 'z') ||
910		(r >= '0' && r <= '9') ||
911		r == '_' ||
912		r == '-'
913}
914
915func (itype itemType) String() string {
916	switch itype {
917	case itemError:
918		return "Error"
919	case itemNIL:
920		return "NIL"
921	case itemEOF:
922		return "EOF"
923	case itemText:
924		return "Text"
925	case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
926		return "String"
927	case itemBool:
928		return "Bool"
929	case itemInteger:
930		return "Integer"
931	case itemFloat:
932		return "Float"
933	case itemDatetime:
934		return "DateTime"
935	case itemTableStart:
936		return "TableStart"
937	case itemTableEnd:
938		return "TableEnd"
939	case itemKeyStart:
940		return "KeyStart"
941	case itemArray:
942		return "Array"
943	case itemArrayEnd:
944		return "ArrayEnd"
945	case itemCommentStart:
946		return "CommentStart"
947	}
948	panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
949}
950
951func (item item) String() string {
952	return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
953}
954