1// TOML lexer.
2//
3// Written using the principles developed by Rob Pike in
4// http://www.youtube.com/watch?v=HxaD_trXwRE
5
6package toml
7
8import (
9	"bytes"
10	"errors"
11	"fmt"
12	"strconv"
13	"strings"
14)
15
16// Define state functions
17type tomlLexStateFn func() tomlLexStateFn
18
19// Define lexer
20type tomlLexer struct {
21	inputIdx          int
22	input             []rune // Textual source
23	currentTokenStart int
24	currentTokenStop  int
25	tokens            []token
26	brackets          []rune
27	line              int
28	col               int
29	endbufferLine     int
30	endbufferCol      int
31}
32
33// Basic read operations on input
34
35func (l *tomlLexer) read() rune {
36	r := l.peek()
37	if r == '\n' {
38		l.endbufferLine++
39		l.endbufferCol = 1
40	} else {
41		l.endbufferCol++
42	}
43	l.inputIdx++
44	return r
45}
46
47func (l *tomlLexer) next() rune {
48	r := l.read()
49
50	if r != eof {
51		l.currentTokenStop++
52	}
53	return r
54}
55
56func (l *tomlLexer) ignore() {
57	l.currentTokenStart = l.currentTokenStop
58	l.line = l.endbufferLine
59	l.col = l.endbufferCol
60}
61
62func (l *tomlLexer) skip() {
63	l.next()
64	l.ignore()
65}
66
67func (l *tomlLexer) fastForward(n int) {
68	for i := 0; i < n; i++ {
69		l.next()
70	}
71}
72
73func (l *tomlLexer) emitWithValue(t tokenType, value string) {
74	l.tokens = append(l.tokens, token{
75		Position: Position{l.line, l.col},
76		typ:      t,
77		val:      value,
78	})
79	l.ignore()
80}
81
82func (l *tomlLexer) emit(t tokenType) {
83	l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
84}
85
86func (l *tomlLexer) peek() rune {
87	if l.inputIdx >= len(l.input) {
88		return eof
89	}
90	return l.input[l.inputIdx]
91}
92
93func (l *tomlLexer) peekString(size int) string {
94	maxIdx := len(l.input)
95	upperIdx := l.inputIdx + size // FIXME: potential overflow
96	if upperIdx > maxIdx {
97		upperIdx = maxIdx
98	}
99	return string(l.input[l.inputIdx:upperIdx])
100}
101
102func (l *tomlLexer) follow(next string) bool {
103	return next == l.peekString(len(next))
104}
105
106// Error management
107
108func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
109	l.tokens = append(l.tokens, token{
110		Position: Position{l.line, l.col},
111		typ:      tokenError,
112		val:      fmt.Sprintf(format, args...),
113	})
114	return nil
115}
116
117// State functions
118
119func (l *tomlLexer) lexVoid() tomlLexStateFn {
120	for {
121		next := l.peek()
122		switch next {
123		case '}': // after '{'
124			return l.lexRightCurlyBrace
125		case '[':
126			return l.lexTableKey
127		case '#':
128			return l.lexComment(l.lexVoid)
129		case '=':
130			return l.lexEqual
131		case '\r':
132			fallthrough
133		case '\n':
134			l.skip()
135			continue
136		}
137
138		if isSpace(next) {
139			l.skip()
140		}
141
142		if isKeyStartChar(next) {
143			return l.lexKey
144		}
145
146		if next == eof {
147			l.next()
148			break
149		}
150	}
151
152	l.emit(tokenEOF)
153	return nil
154}
155
156func (l *tomlLexer) lexRvalue() tomlLexStateFn {
157	for {
158		next := l.peek()
159		switch next {
160		case '.':
161			return l.errorf("cannot start float with a dot")
162		case '=':
163			return l.lexEqual
164		case '[':
165			return l.lexLeftBracket
166		case ']':
167			return l.lexRightBracket
168		case '{':
169			return l.lexLeftCurlyBrace
170		case '}':
171			return l.lexRightCurlyBrace
172		case '#':
173			return l.lexComment(l.lexRvalue)
174		case '"':
175			return l.lexString
176		case '\'':
177			return l.lexLiteralString
178		case ',':
179			return l.lexComma
180		case '\r':
181			fallthrough
182		case '\n':
183			l.skip()
184			if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' {
185				return l.lexRvalue
186			}
187			return l.lexVoid
188		}
189
190		if l.follow("true") {
191			return l.lexTrue
192		}
193
194		if l.follow("false") {
195			return l.lexFalse
196		}
197
198		if l.follow("inf") {
199			return l.lexInf
200		}
201
202		if l.follow("nan") {
203			return l.lexNan
204		}
205
206		if isSpace(next) {
207			l.skip()
208			continue
209		}
210
211		if next == eof {
212			l.next()
213			break
214		}
215
216		if next == '+' || next == '-' {
217			return l.lexNumber
218		}
219
220		if isDigit(next) {
221			return l.lexDateTimeOrNumber
222		}
223
224		return l.errorf("no value can start with %c", next)
225	}
226
227	l.emit(tokenEOF)
228	return nil
229}
230
231func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn {
232	// Could be either a date/time, or a digit.
233	// The options for date/times are:
234	//   YYYY-... => date or date-time
235	//   HH:... => time
236	// Anything else should be a number.
237
238	lookAhead := l.peekString(5)
239	if len(lookAhead) < 3 {
240		return l.lexNumber()
241	}
242
243	for idx, r := range lookAhead {
244		if !isDigit(r) {
245			if idx == 2 && r == ':' {
246				return l.lexDateTimeOrTime()
247			}
248			if idx == 4 && r == '-' {
249				return l.lexDateTimeOrTime()
250			}
251			return l.lexNumber()
252		}
253	}
254	return l.lexNumber()
255}
256
257func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
258	l.next()
259	l.emit(tokenLeftCurlyBrace)
260	l.brackets = append(l.brackets, '{')
261	return l.lexVoid
262}
263
264func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
265	l.next()
266	l.emit(tokenRightCurlyBrace)
267	if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' {
268		return l.errorf("cannot have '}' here")
269	}
270	l.brackets = l.brackets[:len(l.brackets)-1]
271	return l.lexRvalue
272}
273
274func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn {
275	// Example matches:
276	// 1979-05-27T07:32:00Z
277	// 1979-05-27T00:32:00-07:00
278	// 1979-05-27T00:32:00.999999-07:00
279	// 1979-05-27 07:32:00Z
280	// 1979-05-27 00:32:00-07:00
281	// 1979-05-27 00:32:00.999999-07:00
282	// 1979-05-27T07:32:00
283	// 1979-05-27T00:32:00.999999
284	// 1979-05-27 07:32:00
285	// 1979-05-27 00:32:00.999999
286	// 1979-05-27
287	// 07:32:00
288	// 00:32:00.999999
289
290	// we already know those two are digits
291	l.next()
292	l.next()
293
294	// Got 2 digits. At that point it could be either a time or a date(-time).
295
296	r := l.next()
297	if r == ':' {
298		return l.lexTime()
299	}
300
301	return l.lexDateTime()
302}
303
304func (l *tomlLexer) lexDateTime() tomlLexStateFn {
305	// This state accepts an offset date-time, a local date-time, or a local date.
306	//
307	//   v--- cursor
308	// 1979-05-27T07:32:00Z
309	// 1979-05-27T00:32:00-07:00
310	// 1979-05-27T00:32:00.999999-07:00
311	// 1979-05-27 07:32:00Z
312	// 1979-05-27 00:32:00-07:00
313	// 1979-05-27 00:32:00.999999-07:00
314	// 1979-05-27T07:32:00
315	// 1979-05-27T00:32:00.999999
316	// 1979-05-27 07:32:00
317	// 1979-05-27 00:32:00.999999
318	// 1979-05-27
319
320	// date
321
322	// already checked by lexRvalue
323	l.next() // digit
324	l.next() // -
325
326	for i := 0; i < 2; i++ {
327		r := l.next()
328		if !isDigit(r) {
329			return l.errorf("invalid month digit in date: %c", r)
330		}
331	}
332
333	r := l.next()
334	if r != '-' {
335		return l.errorf("expected - to separate month of a date, not %c", r)
336	}
337
338	for i := 0; i < 2; i++ {
339		r := l.next()
340		if !isDigit(r) {
341			return l.errorf("invalid day digit in date: %c", r)
342		}
343	}
344
345	l.emit(tokenLocalDate)
346
347	r = l.peek()
348
349	if r == eof {
350
351		return l.lexRvalue
352	}
353
354	if r != ' ' && r != 'T' {
355		return l.errorf("incorrect date/time separation character: %c", r)
356	}
357
358	if r == ' ' {
359		lookAhead := l.peekString(3)[1:]
360		if len(lookAhead) < 2 {
361			return l.lexRvalue
362		}
363		for _, r := range lookAhead {
364			if !isDigit(r) {
365				return l.lexRvalue
366			}
367		}
368	}
369
370	l.skip() // skip the T or ' '
371
372	// time
373
374	for i := 0; i < 2; i++ {
375		r := l.next()
376		if !isDigit(r) {
377			return l.errorf("invalid hour digit in time: %c", r)
378		}
379	}
380
381	r = l.next()
382	if r != ':' {
383		return l.errorf("time hour/minute separator should be :, not %c", r)
384	}
385
386	for i := 0; i < 2; i++ {
387		r := l.next()
388		if !isDigit(r) {
389			return l.errorf("invalid minute digit in time: %c", r)
390		}
391	}
392
393	r = l.next()
394	if r != ':' {
395		return l.errorf("time minute/second separator should be :, not %c", r)
396	}
397
398	for i := 0; i < 2; i++ {
399		r := l.next()
400		if !isDigit(r) {
401			return l.errorf("invalid second digit in time: %c", r)
402		}
403	}
404
405	r = l.peek()
406	if r == '.' {
407		l.next()
408		r := l.next()
409		if !isDigit(r) {
410			return l.errorf("expected at least one digit in time's fraction, not %c", r)
411		}
412
413		for {
414			r := l.peek()
415			if !isDigit(r) {
416				break
417			}
418			l.next()
419		}
420	}
421
422	l.emit(tokenLocalTime)
423
424	return l.lexTimeOffset
425
426}
427
428func (l *tomlLexer) lexTimeOffset() tomlLexStateFn {
429	// potential offset
430
431	// Z
432	// -07:00
433	// +07:00
434	// nothing
435
436	r := l.peek()
437
438	if r == 'Z' {
439		l.next()
440		l.emit(tokenTimeOffset)
441	} else if r == '+' || r == '-' {
442		l.next()
443
444		for i := 0; i < 2; i++ {
445			r := l.next()
446			if !isDigit(r) {
447				return l.errorf("invalid hour digit in time offset: %c", r)
448			}
449		}
450
451		r = l.next()
452		if r != ':' {
453			return l.errorf("time offset hour/minute separator should be :, not %c", r)
454		}
455
456		for i := 0; i < 2; i++ {
457			r := l.next()
458			if !isDigit(r) {
459				return l.errorf("invalid minute digit in time offset: %c", r)
460			}
461		}
462
463		l.emit(tokenTimeOffset)
464	}
465
466	return l.lexRvalue
467}
468
469func (l *tomlLexer) lexTime() tomlLexStateFn {
470	//   v--- cursor
471	// 07:32:00
472	// 00:32:00.999999
473
474	for i := 0; i < 2; i++ {
475		r := l.next()
476		if !isDigit(r) {
477			return l.errorf("invalid minute digit in time: %c", r)
478		}
479	}
480
481	r := l.next()
482	if r != ':' {
483		return l.errorf("time minute/second separator should be :, not %c", r)
484	}
485
486	for i := 0; i < 2; i++ {
487		r := l.next()
488		if !isDigit(r) {
489			return l.errorf("invalid second digit in time: %c", r)
490		}
491	}
492
493	r = l.peek()
494	if r == '.' {
495		l.next()
496		r := l.next()
497		if !isDigit(r) {
498			return l.errorf("expected at least one digit in time's fraction, not %c", r)
499		}
500
501		for {
502			r := l.peek()
503			if !isDigit(r) {
504				break
505			}
506			l.next()
507		}
508	}
509
510	l.emit(tokenLocalTime)
511	return l.lexRvalue
512
513}
514
515func (l *tomlLexer) lexTrue() tomlLexStateFn {
516	l.fastForward(4)
517	l.emit(tokenTrue)
518	return l.lexRvalue
519}
520
521func (l *tomlLexer) lexFalse() tomlLexStateFn {
522	l.fastForward(5)
523	l.emit(tokenFalse)
524	return l.lexRvalue
525}
526
527func (l *tomlLexer) lexInf() tomlLexStateFn {
528	l.fastForward(3)
529	l.emit(tokenInf)
530	return l.lexRvalue
531}
532
533func (l *tomlLexer) lexNan() tomlLexStateFn {
534	l.fastForward(3)
535	l.emit(tokenNan)
536	return l.lexRvalue
537}
538
539func (l *tomlLexer) lexEqual() tomlLexStateFn {
540	l.next()
541	l.emit(tokenEqual)
542	return l.lexRvalue
543}
544
545func (l *tomlLexer) lexComma() tomlLexStateFn {
546	l.next()
547	l.emit(tokenComma)
548	if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' {
549		return l.lexVoid
550	}
551	return l.lexRvalue
552}
553
554// Parse the key and emits its value without escape sequences.
555// bare keys, basic string keys and literal string keys are supported.
556func (l *tomlLexer) lexKey() tomlLexStateFn {
557	var sb strings.Builder
558
559	for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
560		if r == '"' {
561			l.next()
562			str, err := l.lexStringAsString(`"`, false, true)
563			if err != nil {
564				return l.errorf(err.Error())
565			}
566			sb.WriteString("\"")
567			sb.WriteString(str)
568			sb.WriteString("\"")
569			l.next()
570			continue
571		} else if r == '\'' {
572			l.next()
573			str, err := l.lexLiteralStringAsString(`'`, false)
574			if err != nil {
575				return l.errorf(err.Error())
576			}
577			sb.WriteString("'")
578			sb.WriteString(str)
579			sb.WriteString("'")
580			l.next()
581			continue
582		} else if r == '\n' {
583			return l.errorf("keys cannot contain new lines")
584		} else if isSpace(r) {
585			var str strings.Builder
586			str.WriteString(" ")
587
588			// skip trailing whitespace
589			l.next()
590			for r = l.peek(); isSpace(r); r = l.peek() {
591				str.WriteRune(r)
592				l.next()
593			}
594			// break loop if not a dot
595			if r != '.' {
596				break
597			}
598			str.WriteString(".")
599			// skip trailing whitespace after dot
600			l.next()
601			for r = l.peek(); isSpace(r); r = l.peek() {
602				str.WriteRune(r)
603				l.next()
604			}
605			sb.WriteString(str.String())
606			continue
607		} else if r == '.' {
608			// skip
609		} else if !isValidBareChar(r) {
610			return l.errorf("keys cannot contain %c character", r)
611		}
612		sb.WriteRune(r)
613		l.next()
614	}
615	l.emitWithValue(tokenKey, sb.String())
616	return l.lexVoid
617}
618
619func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
620	return func() tomlLexStateFn {
621		for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
622			if next == '\r' && l.follow("\r\n") {
623				break
624			}
625			l.next()
626		}
627		l.ignore()
628		return previousState
629	}
630}
631
632func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
633	l.next()
634	l.emit(tokenLeftBracket)
635	l.brackets = append(l.brackets, '[')
636	return l.lexRvalue
637}
638
639func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
640	var sb strings.Builder
641
642	if discardLeadingNewLine {
643		if l.follow("\r\n") {
644			l.skip()
645			l.skip()
646		} else if l.peek() == '\n' {
647			l.skip()
648		}
649	}
650
651	// find end of string
652	for {
653		if l.follow(terminator) {
654			return sb.String(), nil
655		}
656
657		next := l.peek()
658		if next == eof {
659			break
660		}
661		sb.WriteRune(l.next())
662	}
663
664	return "", errors.New("unclosed string")
665}
666
667func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
668	l.skip()
669
670	// handle special case for triple-quote
671	terminator := "'"
672	discardLeadingNewLine := false
673	if l.follow("''") {
674		l.skip()
675		l.skip()
676		terminator = "'''"
677		discardLeadingNewLine = true
678	}
679
680	str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
681	if err != nil {
682		return l.errorf(err.Error())
683	}
684
685	l.emitWithValue(tokenString, str)
686	l.fastForward(len(terminator))
687	l.ignore()
688	return l.lexRvalue
689}
690
691// Lex a string and return the results as a string.
692// Terminator is the substring indicating the end of the token.
693// The resulting string does not include the terminator.
694func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
695	var sb strings.Builder
696
697	if discardLeadingNewLine {
698		if l.follow("\r\n") {
699			l.skip()
700			l.skip()
701		} else if l.peek() == '\n' {
702			l.skip()
703		}
704	}
705
706	for {
707		if l.follow(terminator) {
708			return sb.String(), nil
709		}
710
711		if l.follow("\\") {
712			l.next()
713			switch l.peek() {
714			case '\r':
715				fallthrough
716			case '\n':
717				fallthrough
718			case '\t':
719				fallthrough
720			case ' ':
721				// skip all whitespace chars following backslash
722				for strings.ContainsRune("\r\n\t ", l.peek()) {
723					l.next()
724				}
725			case '"':
726				sb.WriteString("\"")
727				l.next()
728			case 'n':
729				sb.WriteString("\n")
730				l.next()
731			case 'b':
732				sb.WriteString("\b")
733				l.next()
734			case 'f':
735				sb.WriteString("\f")
736				l.next()
737			case '/':
738				sb.WriteString("/")
739				l.next()
740			case 't':
741				sb.WriteString("\t")
742				l.next()
743			case 'r':
744				sb.WriteString("\r")
745				l.next()
746			case '\\':
747				sb.WriteString("\\")
748				l.next()
749			case 'u':
750				l.next()
751				var code strings.Builder
752				for i := 0; i < 4; i++ {
753					c := l.peek()
754					if !isHexDigit(c) {
755						return "", errors.New("unfinished unicode escape")
756					}
757					l.next()
758					code.WriteRune(c)
759				}
760				intcode, err := strconv.ParseInt(code.String(), 16, 32)
761				if err != nil {
762					return "", errors.New("invalid unicode escape: \\u" + code.String())
763				}
764				sb.WriteRune(rune(intcode))
765			case 'U':
766				l.next()
767				var code strings.Builder
768				for i := 0; i < 8; i++ {
769					c := l.peek()
770					if !isHexDigit(c) {
771						return "", errors.New("unfinished unicode escape")
772					}
773					l.next()
774					code.WriteRune(c)
775				}
776				intcode, err := strconv.ParseInt(code.String(), 16, 64)
777				if err != nil {
778					return "", errors.New("invalid unicode escape: \\U" + code.String())
779				}
780				sb.WriteRune(rune(intcode))
781			default:
782				return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
783			}
784		} else {
785			r := l.peek()
786
787			if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) {
788				return "", fmt.Errorf("unescaped control character %U", r)
789			}
790			l.next()
791			sb.WriteRune(r)
792		}
793
794		if l.peek() == eof {
795			break
796		}
797	}
798
799	return "", errors.New("unclosed string")
800}
801
802func (l *tomlLexer) lexString() tomlLexStateFn {
803	l.skip()
804
805	// handle special case for triple-quote
806	terminator := `"`
807	discardLeadingNewLine := false
808	acceptNewLines := false
809	if l.follow(`""`) {
810		l.skip()
811		l.skip()
812		terminator = `"""`
813		discardLeadingNewLine = true
814		acceptNewLines = true
815	}
816
817	str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
818	if err != nil {
819		return l.errorf(err.Error())
820	}
821
822	l.emitWithValue(tokenString, str)
823	l.fastForward(len(terminator))
824	l.ignore()
825	return l.lexRvalue
826}
827
828func (l *tomlLexer) lexTableKey() tomlLexStateFn {
829	l.next()
830
831	if l.peek() == '[' {
832		// token '[[' signifies an array of tables
833		l.next()
834		l.emit(tokenDoubleLeftBracket)
835		return l.lexInsideTableArrayKey
836	}
837	// vanilla table key
838	l.emit(tokenLeftBracket)
839	return l.lexInsideTableKey
840}
841
842// Parse the key till "]]", but only bare keys are supported
843func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
844	for r := l.peek(); r != eof; r = l.peek() {
845		switch r {
846		case ']':
847			if l.currentTokenStop > l.currentTokenStart {
848				l.emit(tokenKeyGroupArray)
849			}
850			l.next()
851			if l.peek() != ']' {
852				break
853			}
854			l.next()
855			l.emit(tokenDoubleRightBracket)
856			return l.lexVoid
857		case '[':
858			return l.errorf("table array key cannot contain ']'")
859		default:
860			l.next()
861		}
862	}
863	return l.errorf("unclosed table array key")
864}
865
866// Parse the key till "]" but only bare keys are supported
867func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
868	for r := l.peek(); r != eof; r = l.peek() {
869		switch r {
870		case ']':
871			if l.currentTokenStop > l.currentTokenStart {
872				l.emit(tokenKeyGroup)
873			}
874			l.next()
875			l.emit(tokenRightBracket)
876			return l.lexVoid
877		case '[':
878			return l.errorf("table key cannot contain ']'")
879		default:
880			l.next()
881		}
882	}
883	return l.errorf("unclosed table key")
884}
885
886func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
887	l.next()
888	l.emit(tokenRightBracket)
889	if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' {
890		return l.errorf("cannot have ']' here")
891	}
892	l.brackets = l.brackets[:len(l.brackets)-1]
893	return l.lexRvalue
894}
895
896type validRuneFn func(r rune) bool
897
898func isValidHexRune(r rune) bool {
899	return r >= 'a' && r <= 'f' ||
900		r >= 'A' && r <= 'F' ||
901		r >= '0' && r <= '9' ||
902		r == '_'
903}
904
905func isValidOctalRune(r rune) bool {
906	return r >= '0' && r <= '7' || r == '_'
907}
908
909func isValidBinaryRune(r rune) bool {
910	return r == '0' || r == '1' || r == '_'
911}
912
913func (l *tomlLexer) lexNumber() tomlLexStateFn {
914	r := l.peek()
915
916	if r == '0' {
917		follow := l.peekString(2)
918		if len(follow) == 2 {
919			var isValidRune validRuneFn
920			switch follow[1] {
921			case 'x':
922				isValidRune = isValidHexRune
923			case 'o':
924				isValidRune = isValidOctalRune
925			case 'b':
926				isValidRune = isValidBinaryRune
927			default:
928				if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
929					return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
930				}
931			}
932
933			if isValidRune != nil {
934				l.next()
935				l.next()
936				digitSeen := false
937				for {
938					next := l.peek()
939					if !isValidRune(next) {
940						break
941					}
942					digitSeen = true
943					l.next()
944				}
945
946				if !digitSeen {
947					return l.errorf("number needs at least one digit")
948				}
949
950				l.emit(tokenInteger)
951
952				return l.lexRvalue
953			}
954		}
955	}
956
957	if r == '+' || r == '-' {
958		l.next()
959		if l.follow("inf") {
960			return l.lexInf
961		}
962		if l.follow("nan") {
963			return l.lexNan
964		}
965	}
966
967	pointSeen := false
968	expSeen := false
969	digitSeen := false
970	for {
971		next := l.peek()
972		if next == '.' {
973			if pointSeen {
974				return l.errorf("cannot have two dots in one float")
975			}
976			l.next()
977			if !isDigit(l.peek()) {
978				return l.errorf("float cannot end with a dot")
979			}
980			pointSeen = true
981		} else if next == 'e' || next == 'E' {
982			expSeen = true
983			l.next()
984			r := l.peek()
985			if r == '+' || r == '-' {
986				l.next()
987			}
988		} else if isDigit(next) {
989			digitSeen = true
990			l.next()
991		} else if next == '_' {
992			l.next()
993		} else {
994			break
995		}
996		if pointSeen && !digitSeen {
997			return l.errorf("cannot start float with a dot")
998		}
999	}
1000
1001	if !digitSeen {
1002		return l.errorf("no digit in that number")
1003	}
1004	if pointSeen || expSeen {
1005		l.emit(tokenFloat)
1006	} else {
1007		l.emit(tokenInteger)
1008	}
1009	return l.lexRvalue
1010}
1011
1012func (l *tomlLexer) run() {
1013	for state := l.lexVoid; state != nil; {
1014		state = state()
1015	}
1016}
1017
1018// Entry point
1019func lexToml(inputBytes []byte) []token {
1020	runes := bytes.Runes(inputBytes)
1021	l := &tomlLexer{
1022		input:         runes,
1023		tokens:        make([]token, 0, 256),
1024		line:          1,
1025		col:           1,
1026		endbufferLine: 1,
1027		endbufferCol:  1,
1028	}
1029	l.run()
1030	return l.tokens
1031}
1032