1package toml
2
3import (
4	"errors"
5	"fmt"
6	"strconv"
7	"strings"
8	"time"
9	"unicode/utf8"
10
11	"github.com/BurntSushi/toml/internal"
12)
13
14type parser struct {
15	mapping map[string]interface{}
16	types   map[string]tomlType
17	lx      *lexer
18
19	ordered    []Key           // List of keys in the order that they appear in the TOML data.
20	context    Key             // Full key for the current hash in scope.
21	currentKey string          // Base key name for everything except hashes.
22	approxLine int             // Rough approximation of line number
23	implicits  map[string]bool // Record implied keys (e.g. 'key.group.names').
24}
25
26// ParseError is used when a file can't be parsed: for example invalid integer
27// literals, duplicate keys, etc.
28type ParseError struct {
29	Message string
30	Line    int
31	LastKey string
32}
33
34func (pe ParseError) Error() string {
35	return fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
36		pe.Line, pe.LastKey, pe.Message)
37}
38
39func parse(data string) (p *parser, err error) {
40	defer func() {
41		if r := recover(); r != nil {
42			var ok bool
43			if err, ok = r.(ParseError); ok {
44				return
45			}
46			panic(r)
47		}
48	}()
49
50	// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
51	// which mangles stuff.
52	if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
53		data = data[2:]
54	}
55
56	// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
57	// file (second byte in surrogate pair being NULL). Again, do this here to
58	// avoid having to deal with UTF-8/16 stuff in the lexer.
59	ex := 6
60	if len(data) < 6 {
61		ex = len(data)
62	}
63	if strings.ContainsRune(data[:ex], 0) {
64		return nil, errors.New("files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8")
65	}
66
67	p = &parser{
68		mapping:   make(map[string]interface{}),
69		types:     make(map[string]tomlType),
70		lx:        lex(data),
71		ordered:   make([]Key, 0),
72		implicits: make(map[string]bool),
73	}
74	for {
75		item := p.next()
76		if item.typ == itemEOF {
77			break
78		}
79		p.topLevel(item)
80	}
81
82	return p, nil
83}
84
85func (p *parser) panicf(format string, v ...interface{}) {
86	msg := fmt.Sprintf(format, v...)
87	panic(ParseError{
88		Message: msg,
89		Line:    p.approxLine,
90		LastKey: p.current(),
91	})
92}
93
94func (p *parser) next() item {
95	it := p.lx.nextItem()
96	//fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.line, it.val)
97	if it.typ == itemError {
98		p.panicf("%s", it.val)
99	}
100	return it
101}
102
103func (p *parser) bug(format string, v ...interface{}) {
104	panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
105}
106
107func (p *parser) expect(typ itemType) item {
108	it := p.next()
109	p.assertEqual(typ, it.typ)
110	return it
111}
112
113func (p *parser) assertEqual(expected, got itemType) {
114	if expected != got {
115		p.bug("Expected '%s' but got '%s'.", expected, got)
116	}
117}
118
119func (p *parser) topLevel(item item) {
120	switch item.typ {
121	case itemCommentStart: // # ..
122		p.approxLine = item.line
123		p.expect(itemText)
124	case itemTableStart: // [ .. ]
125		name := p.next()
126		p.approxLine = name.line
127
128		var key Key
129		for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
130			key = append(key, p.keyString(name))
131		}
132		p.assertEqual(itemTableEnd, name.typ)
133
134		p.addContext(key, false)
135		p.setType("", tomlHash)
136		p.ordered = append(p.ordered, key)
137	case itemArrayTableStart: // [[ .. ]]
138		name := p.next()
139		p.approxLine = name.line
140
141		var key Key
142		for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
143			key = append(key, p.keyString(name))
144		}
145		p.assertEqual(itemArrayTableEnd, name.typ)
146
147		p.addContext(key, true)
148		p.setType("", tomlArrayHash)
149		p.ordered = append(p.ordered, key)
150	case itemKeyStart: // key = ..
151		outerContext := p.context
152		/// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
153		k := p.next()
154		p.approxLine = k.line
155		var key Key
156		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
157			key = append(key, p.keyString(k))
158		}
159		p.assertEqual(itemKeyEnd, k.typ)
160
161		/// The current key is the last part.
162		p.currentKey = key[len(key)-1]
163
164		/// All the other parts (if any) are the context; need to set each part
165		/// as implicit.
166		context := key[:len(key)-1]
167		for i := range context {
168			p.addImplicitContext(append(p.context, context[i:i+1]...))
169		}
170
171		/// Set value.
172		val, typ := p.value(p.next(), false)
173		p.set(p.currentKey, val, typ)
174		p.ordered = append(p.ordered, p.context.add(p.currentKey))
175
176		/// Remove the context we added (preserving any context from [tbl] lines).
177		p.context = outerContext
178		p.currentKey = ""
179	default:
180		p.bug("Unexpected type at top level: %s", item.typ)
181	}
182}
183
184// Gets a string for a key (or part of a key in a table name).
185func (p *parser) keyString(it item) string {
186	switch it.typ {
187	case itemText:
188		return it.val
189	case itemString, itemMultilineString,
190		itemRawString, itemRawMultilineString:
191		s, _ := p.value(it, false)
192		return s.(string)
193	default:
194		p.bug("Unexpected key type: %s", it.typ)
195	}
196	panic("unreachable")
197}
198
199var datetimeRepl = strings.NewReplacer(
200	"z", "Z",
201	"t", "T",
202	" ", "T")
203
204// value translates an expected value from the lexer into a Go value wrapped
205// as an empty interface.
206func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
207	switch it.typ {
208	case itemString:
209		return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
210	case itemMultilineString:
211		return p.replaceEscapes(stripFirstNewline(stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
212	case itemRawString:
213		return it.val, p.typeOfPrimitive(it)
214	case itemRawMultilineString:
215		return stripFirstNewline(it.val), p.typeOfPrimitive(it)
216	case itemInteger:
217		return p.valueInteger(it)
218	case itemFloat:
219		return p.valueFloat(it)
220	case itemBool:
221		switch it.val {
222		case "true":
223			return true, p.typeOfPrimitive(it)
224		case "false":
225			return false, p.typeOfPrimitive(it)
226		default:
227			p.bug("Expected boolean value, but got '%s'.", it.val)
228		}
229	case itemDatetime:
230		return p.valueDatetime(it)
231	case itemArray:
232		return p.valueArray(it)
233	case itemInlineTableStart:
234		return p.valueInlineTable(it, parentIsArray)
235	default:
236		p.bug("Unexpected value type: %s", it.typ)
237	}
238	panic("unreachable")
239}
240
241func (p *parser) valueInteger(it item) (interface{}, tomlType) {
242	if !numUnderscoresOK(it.val) {
243		p.panicf("Invalid integer %q: underscores must be surrounded by digits", it.val)
244	}
245	if numHasLeadingZero(it.val) {
246		p.panicf("Invalid integer %q: cannot have leading zeroes", it.val)
247	}
248
249	num, err := strconv.ParseInt(it.val, 0, 64)
250	if err != nil {
251		// Distinguish integer values. Normally, it'd be a bug if the lexer
252		// provides an invalid integer, but it's possible that the number is
253		// out of range of valid values (which the lexer cannot determine).
254		// So mark the former as a bug but the latter as a legitimate user
255		// error.
256		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
257			p.panicf("Integer '%s' is out of the range of 64-bit signed integers.", it.val)
258		} else {
259			p.bug("Expected integer value, but got '%s'.", it.val)
260		}
261	}
262	return num, p.typeOfPrimitive(it)
263}
264
265func (p *parser) valueFloat(it item) (interface{}, tomlType) {
266	parts := strings.FieldsFunc(it.val, func(r rune) bool {
267		switch r {
268		case '.', 'e', 'E':
269			return true
270		}
271		return false
272	})
273	for _, part := range parts {
274		if !numUnderscoresOK(part) {
275			p.panicf("Invalid float %q: underscores must be surrounded by digits", it.val)
276		}
277	}
278	if len(parts) > 0 && numHasLeadingZero(parts[0]) {
279		p.panicf("Invalid float %q: cannot have leading zeroes", it.val)
280	}
281	if !numPeriodsOK(it.val) {
282		// As a special case, numbers like '123.' or '1.e2',
283		// which are valid as far as Go/strconv are concerned,
284		// must be rejected because TOML says that a fractional
285		// part consists of '.' followed by 1+ digits.
286		p.panicf("Invalid float %q: '.' must be followed by one or more digits", it.val)
287	}
288	val := strings.Replace(it.val, "_", "", -1)
289	if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
290		val = "nan"
291	}
292	num, err := strconv.ParseFloat(val, 64)
293	if err != nil {
294		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
295			p.panicf("Float '%s' is out of the range of 64-bit IEEE-754 floating-point numbers.", it.val)
296		} else {
297			p.panicf("Invalid float value: %q", it.val)
298		}
299	}
300	return num, p.typeOfPrimitive(it)
301}
302
303var dtTypes = []struct {
304	fmt  string
305	zone *time.Location
306}{
307	{time.RFC3339Nano, time.Local},
308	{"2006-01-02T15:04:05.999999999", internal.LocalDatetime},
309	{"2006-01-02", internal.LocalDate},
310	{"15:04:05.999999999", internal.LocalTime},
311}
312
313func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
314	it.val = datetimeRepl.Replace(it.val)
315	var (
316		t   time.Time
317		ok  bool
318		err error
319	)
320	for _, dt := range dtTypes {
321		t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
322		if err == nil {
323			ok = true
324			break
325		}
326	}
327	if !ok {
328		p.panicf("Invalid TOML Datetime: %q.", it.val)
329	}
330	return t, p.typeOfPrimitive(it)
331}
332
333func (p *parser) valueArray(it item) (interface{}, tomlType) {
334	p.setType(p.currentKey, tomlArray)
335
336	// p.setType(p.currentKey, typ)
337	var (
338		array []interface{}
339		types []tomlType
340	)
341	for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
342		if it.typ == itemCommentStart {
343			p.expect(itemText)
344			continue
345		}
346
347		val, typ := p.value(it, true)
348		array = append(array, val)
349		types = append(types, typ)
350	}
351	return array, tomlArray
352}
353
354func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
355	var (
356		hash         = make(map[string]interface{})
357		outerContext = p.context
358		outerKey     = p.currentKey
359	)
360
361	p.context = append(p.context, p.currentKey)
362	prevContext := p.context
363	p.currentKey = ""
364
365	p.addImplicit(p.context)
366	p.addContext(p.context, parentIsArray)
367
368	/// Loop over all table key/value pairs.
369	for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
370		if it.typ == itemCommentStart {
371			p.expect(itemText)
372			continue
373		}
374
375		/// Read all key parts.
376		k := p.next()
377		p.approxLine = k.line
378		var key Key
379		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
380			key = append(key, p.keyString(k))
381		}
382		p.assertEqual(itemKeyEnd, k.typ)
383
384		/// The current key is the last part.
385		p.currentKey = key[len(key)-1]
386
387		/// All the other parts (if any) are the context; need to set each part
388		/// as implicit.
389		context := key[:len(key)-1]
390		for i := range context {
391			p.addImplicitContext(append(p.context, context[i:i+1]...))
392		}
393
394		/// Set the value.
395		val, typ := p.value(p.next(), false)
396		p.set(p.currentKey, val, typ)
397		p.ordered = append(p.ordered, p.context.add(p.currentKey))
398		hash[p.currentKey] = val
399
400		/// Restore context.
401		p.context = prevContext
402	}
403	p.context = outerContext
404	p.currentKey = outerKey
405	return hash, tomlHash
406}
407
408// numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
409// +/- signs, and base prefixes.
410func numHasLeadingZero(s string) bool {
411	if len(s) > 1 && s[0] == '0' && isDigit(rune(s[1])) { // >1 to allow "0" and isDigit to allow 0x
412		return true
413	}
414	if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
415		return true
416	}
417	return false
418}
419
420// numUnderscoresOK checks whether each underscore in s is surrounded by
421// characters that are not underscores.
422func numUnderscoresOK(s string) bool {
423	switch s {
424	case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
425		return true
426	}
427	accept := false
428	for _, r := range s {
429		if r == '_' {
430			if !accept {
431				return false
432			}
433		}
434
435		// isHexadecimal is a superset of all the permissable characters
436		// surrounding an underscore.
437		accept = isHexadecimal(r)
438	}
439	return accept
440}
441
442// numPeriodsOK checks whether every period in s is followed by a digit.
443func numPeriodsOK(s string) bool {
444	period := false
445	for _, r := range s {
446		if period && !isDigit(r) {
447			return false
448		}
449		period = r == '.'
450	}
451	return !period
452}
453
454// Set the current context of the parser, where the context is either a hash or
455// an array of hashes, depending on the value of the `array` parameter.
456//
457// Establishing the context also makes sure that the key isn't a duplicate, and
458// will create implicit hashes automatically.
459func (p *parser) addContext(key Key, array bool) {
460	var ok bool
461
462	// Always start at the top level and drill down for our context.
463	hashContext := p.mapping
464	keyContext := make(Key, 0)
465
466	// We only need implicit hashes for key[0:-1]
467	for _, k := range key[0 : len(key)-1] {
468		_, ok = hashContext[k]
469		keyContext = append(keyContext, k)
470
471		// No key? Make an implicit hash and move on.
472		if !ok {
473			p.addImplicit(keyContext)
474			hashContext[k] = make(map[string]interface{})
475		}
476
477		// If the hash context is actually an array of tables, then set
478		// the hash context to the last element in that array.
479		//
480		// Otherwise, it better be a table, since this MUST be a key group (by
481		// virtue of it not being the last element in a key).
482		switch t := hashContext[k].(type) {
483		case []map[string]interface{}:
484			hashContext = t[len(t)-1]
485		case map[string]interface{}:
486			hashContext = t
487		default:
488			p.panicf("Key '%s' was already created as a hash.", keyContext)
489		}
490	}
491
492	p.context = keyContext
493	if array {
494		// If this is the first element for this array, then allocate a new
495		// list of tables for it.
496		k := key[len(key)-1]
497		if _, ok := hashContext[k]; !ok {
498			hashContext[k] = make([]map[string]interface{}, 0, 4)
499		}
500
501		// Add a new table. But make sure the key hasn't already been used
502		// for something else.
503		if hash, ok := hashContext[k].([]map[string]interface{}); ok {
504			hashContext[k] = append(hash, make(map[string]interface{}))
505		} else {
506			p.panicf("Key '%s' was already created and cannot be used as an array.", keyContext)
507		}
508	} else {
509		p.setValue(key[len(key)-1], make(map[string]interface{}))
510	}
511	p.context = append(p.context, key[len(key)-1])
512}
513
514// set calls setValue and setType.
515func (p *parser) set(key string, val interface{}, typ tomlType) {
516	p.setValue(p.currentKey, val)
517	p.setType(p.currentKey, typ)
518}
519
520// setValue sets the given key to the given value in the current context.
521// It will make sure that the key hasn't already been defined, account for
522// implicit key groups.
523func (p *parser) setValue(key string, value interface{}) {
524	var (
525		tmpHash    interface{}
526		ok         bool
527		hash       = p.mapping
528		keyContext Key
529	)
530	for _, k := range p.context {
531		keyContext = append(keyContext, k)
532		if tmpHash, ok = hash[k]; !ok {
533			p.bug("Context for key '%s' has not been established.", keyContext)
534		}
535		switch t := tmpHash.(type) {
536		case []map[string]interface{}:
537			// The context is a table of hashes. Pick the most recent table
538			// defined as the current hash.
539			hash = t[len(t)-1]
540		case map[string]interface{}:
541			hash = t
542		default:
543			p.panicf("Key '%s' has already been defined.", keyContext)
544		}
545	}
546	keyContext = append(keyContext, key)
547
548	if _, ok := hash[key]; ok {
549		// Normally redefining keys isn't allowed, but the key could have been
550		// defined implicitly and it's allowed to be redefined concretely. (See
551		// the `valid/implicit-and-explicit-after.toml` in toml-test)
552		//
553		// But we have to make sure to stop marking it as an implicit. (So that
554		// another redefinition provokes an error.)
555		//
556		// Note that since it has already been defined (as a hash), we don't
557		// want to overwrite it. So our business is done.
558		if p.isArray(keyContext) {
559			p.removeImplicit(keyContext)
560			hash[key] = value
561			return
562		}
563		if p.isImplicit(keyContext) {
564			p.removeImplicit(keyContext)
565			return
566		}
567
568		// Otherwise, we have a concrete key trying to override a previous
569		// key, which is *always* wrong.
570		p.panicf("Key '%s' has already been defined.", keyContext)
571	}
572
573	hash[key] = value
574}
575
576// setType sets the type of a particular value at a given key.
577// It should be called immediately AFTER setValue.
578//
579// Note that if `key` is empty, then the type given will be applied to the
580// current context (which is either a table or an array of tables).
581func (p *parser) setType(key string, typ tomlType) {
582	keyContext := make(Key, 0, len(p.context)+1)
583	for _, k := range p.context {
584		keyContext = append(keyContext, k)
585	}
586	if len(key) > 0 { // allow type setting for hashes
587		keyContext = append(keyContext, key)
588	}
589	p.types[keyContext.String()] = typ
590}
591
592// Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
593// "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
594func (p *parser) addImplicit(key Key)     { p.implicits[key.String()] = true }
595func (p *parser) removeImplicit(key Key)  { p.implicits[key.String()] = false }
596func (p *parser) isImplicit(key Key) bool { return p.implicits[key.String()] }
597func (p *parser) isArray(key Key) bool    { return p.types[key.String()] == tomlArray }
598func (p *parser) addImplicitContext(key Key) {
599	p.addImplicit(key)
600	p.addContext(key, false)
601}
602
603// current returns the full key name of the current context.
604func (p *parser) current() string {
605	if len(p.currentKey) == 0 {
606		return p.context.String()
607	}
608	if len(p.context) == 0 {
609		return p.currentKey
610	}
611	return fmt.Sprintf("%s.%s", p.context, p.currentKey)
612}
613
614func stripFirstNewline(s string) string {
615	if len(s) > 0 && s[0] == '\n' {
616		return s[1:]
617	}
618	if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
619		return s[2:]
620	}
621	return s
622}
623
624// Remove newlines inside triple-quoted strings if a line ends with "\".
625func stripEscapedNewlines(s string) string {
626	split := strings.Split(s, "\n")
627	if len(split) < 1 {
628		return s
629	}
630
631	escNL := false // Keep track of the last non-blank line was escaped.
632	for i, line := range split {
633		line = strings.TrimRight(line, " \t\r")
634
635		if len(line) == 0 || line[len(line)-1] != '\\' {
636			split[i] = strings.TrimRight(split[i], "\r")
637			if !escNL && i != len(split)-1 {
638				split[i] += "\n"
639			}
640			continue
641		}
642
643		escBS := true
644		for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
645			escBS = !escBS
646		}
647		if escNL {
648			line = strings.TrimLeft(line, " \t\r")
649		}
650		escNL = !escBS
651
652		if escBS {
653			split[i] += "\n"
654			continue
655		}
656
657		split[i] = line[:len(line)-1] // Remove \
658		if len(split)-1 > i {
659			split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
660		}
661	}
662	return strings.Join(split, "")
663}
664
665func (p *parser) replaceEscapes(str string) string {
666	var replaced []rune
667	s := []byte(str)
668	r := 0
669	for r < len(s) {
670		if s[r] != '\\' {
671			c, size := utf8.DecodeRune(s[r:])
672			r += size
673			replaced = append(replaced, c)
674			continue
675		}
676		r += 1
677		if r >= len(s) {
678			p.bug("Escape sequence at end of string.")
679			return ""
680		}
681		switch s[r] {
682		default:
683			p.bug("Expected valid escape code after \\, but got %q.", s[r])
684			return ""
685		case ' ', '\t':
686			p.panicf("invalid escape: '\\%c'", s[r])
687			return ""
688		case 'b':
689			replaced = append(replaced, rune(0x0008))
690			r += 1
691		case 't':
692			replaced = append(replaced, rune(0x0009))
693			r += 1
694		case 'n':
695			replaced = append(replaced, rune(0x000A))
696			r += 1
697		case 'f':
698			replaced = append(replaced, rune(0x000C))
699			r += 1
700		case 'r':
701			replaced = append(replaced, rune(0x000D))
702			r += 1
703		case '"':
704			replaced = append(replaced, rune(0x0022))
705			r += 1
706		case '\\':
707			replaced = append(replaced, rune(0x005C))
708			r += 1
709		case 'u':
710			// At this point, we know we have a Unicode escape of the form
711			// `uXXXX` at [r, r+5). (Because the lexer guarantees this
712			// for us.)
713			escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
714			replaced = append(replaced, escaped)
715			r += 5
716		case 'U':
717			// At this point, we know we have a Unicode escape of the form
718			// `uXXXX` at [r, r+9). (Because the lexer guarantees this
719			// for us.)
720			escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
721			replaced = append(replaced, escaped)
722			r += 9
723		}
724	}
725	return string(replaced)
726}
727
728func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
729	s := string(bs)
730	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
731	if err != nil {
732		p.bug("Could not parse '%s' as a hexadecimal number, but the "+
733			"lexer claims it's OK: %s", s, err)
734	}
735	if !utf8.ValidRune(rune(hex)) {
736		p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
737	}
738	return rune(hex)
739}
740