1package toml
2
3import (
4	"fmt"
5	"strconv"
6	"strings"
7	"time"
8	"unicode"
9	"unicode/utf8"
10)
11
12type parser struct {
13	mapping map[string]interface{}
14	types   map[string]tomlType
15	lx      *lexer
16
17	// A list of keys in the order that they appear in the TOML data.
18	ordered []Key
19
20	// the full key for the current hash in scope
21	context Key
22
23	// the base key name for everything except hashes
24	currentKey string
25
26	// rough approximation of line number
27	approxLine int
28
29	// A map of 'key.group.names' to whether they were created implicitly.
30	implicits map[string]bool
31}
32
33type parseError string
34
35func (pe parseError) Error() string {
36	return string(pe)
37}
38
39func parse(data string) (p *parser, err error) {
40	defer func() {
41		if r := recover(); r != nil {
42			var ok bool
43			if err, ok = r.(parseError); ok {
44				return
45			}
46			panic(r)
47		}
48	}()
49
50	p = &parser{
51		mapping:   make(map[string]interface{}),
52		types:     make(map[string]tomlType),
53		lx:        lex(data),
54		ordered:   make([]Key, 0),
55		implicits: make(map[string]bool),
56	}
57	for {
58		item := p.next()
59		if item.typ == itemEOF {
60			break
61		}
62		p.topLevel(item)
63	}
64
65	return p, nil
66}
67
68func (p *parser) panicf(format string, v ...interface{}) {
69	msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
70		p.approxLine, p.current(), fmt.Sprintf(format, v...))
71	panic(parseError(msg))
72}
73
74func (p *parser) next() item {
75	it := p.lx.nextItem()
76	if it.typ == itemError {
77		p.panicf("%s", it.val)
78	}
79	return it
80}
81
82func (p *parser) bug(format string, v ...interface{}) {
83	panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
84}
85
86func (p *parser) expect(typ itemType) item {
87	it := p.next()
88	p.assertEqual(typ, it.typ)
89	return it
90}
91
92func (p *parser) assertEqual(expected, got itemType) {
93	if expected != got {
94		p.bug("Expected '%s' but got '%s'.", expected, got)
95	}
96}
97
98func (p *parser) topLevel(item item) {
99	switch item.typ {
100	case itemCommentStart:
101		p.approxLine = item.line
102		p.expect(itemText)
103	case itemTableStart:
104		kg := p.next()
105		p.approxLine = kg.line
106
107		var key Key
108		for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
109			key = append(key, p.keyString(kg))
110		}
111		p.assertEqual(itemTableEnd, kg.typ)
112
113		p.establishContext(key, false)
114		p.setType("", tomlHash)
115		p.ordered = append(p.ordered, key)
116	case itemArrayTableStart:
117		kg := p.next()
118		p.approxLine = kg.line
119
120		var key Key
121		for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
122			key = append(key, p.keyString(kg))
123		}
124		p.assertEqual(itemArrayTableEnd, kg.typ)
125
126		p.establishContext(key, true)
127		p.setType("", tomlArrayHash)
128		p.ordered = append(p.ordered, key)
129	case itemKeyStart:
130		kname := p.next()
131		p.approxLine = kname.line
132		p.currentKey = p.keyString(kname)
133
134		val, typ := p.value(p.next())
135		p.setValue(p.currentKey, val)
136		p.setType(p.currentKey, typ)
137		p.ordered = append(p.ordered, p.context.add(p.currentKey))
138		p.currentKey = ""
139	default:
140		p.bug("Unexpected type at top level: %s", item.typ)
141	}
142}
143
144// Gets a string for a key (or part of a key in a table name).
145func (p *parser) keyString(it item) string {
146	switch it.typ {
147	case itemText:
148		return it.val
149	case itemString, itemMultilineString,
150		itemRawString, itemRawMultilineString:
151		s, _ := p.value(it)
152		return s.(string)
153	default:
154		p.bug("Unexpected key type: %s", it.typ)
155		panic("unreachable")
156	}
157}
158
159// value translates an expected value from the lexer into a Go value wrapped
160// as an empty interface.
161func (p *parser) value(it item) (interface{}, tomlType) {
162	switch it.typ {
163	case itemString:
164		return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
165	case itemMultilineString:
166		trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
167		return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
168	case itemRawString:
169		return it.val, p.typeOfPrimitive(it)
170	case itemRawMultilineString:
171		return stripFirstNewline(it.val), p.typeOfPrimitive(it)
172	case itemBool:
173		switch it.val {
174		case "true":
175			return true, p.typeOfPrimitive(it)
176		case "false":
177			return false, p.typeOfPrimitive(it)
178		}
179		p.bug("Expected boolean value, but got '%s'.", it.val)
180	case itemInteger:
181		if !numUnderscoresOK(it.val) {
182			p.panicf("Invalid integer %q: underscores must be surrounded by digits",
183				it.val)
184		}
185		val := strings.Replace(it.val, "_", "", -1)
186		num, err := strconv.ParseInt(val, 10, 64)
187		if err != nil {
188			// Distinguish integer values. Normally, it'd be a bug if the lexer
189			// provides an invalid integer, but it's possible that the number is
190			// out of range of valid values (which the lexer cannot determine).
191			// So mark the former as a bug but the latter as a legitimate user
192			// error.
193			if e, ok := err.(*strconv.NumError); ok &&
194				e.Err == strconv.ErrRange {
195
196				p.panicf("Integer '%s' is out of the range of 64-bit "+
197					"signed integers.", it.val)
198			} else {
199				p.bug("Expected integer value, but got '%s'.", it.val)
200			}
201		}
202		return num, p.typeOfPrimitive(it)
203	case itemFloat:
204		parts := strings.FieldsFunc(it.val, func(r rune) bool {
205			switch r {
206			case '.', 'e', 'E':
207				return true
208			}
209			return false
210		})
211		for _, part := range parts {
212			if !numUnderscoresOK(part) {
213				p.panicf("Invalid float %q: underscores must be "+
214					"surrounded by digits", it.val)
215			}
216		}
217		if !numPeriodsOK(it.val) {
218			// As a special case, numbers like '123.' or '1.e2',
219			// which are valid as far as Go/strconv are concerned,
220			// must be rejected because TOML says that a fractional
221			// part consists of '.' followed by 1+ digits.
222			p.panicf("Invalid float %q: '.' must be followed "+
223				"by one or more digits", it.val)
224		}
225		val := strings.Replace(it.val, "_", "", -1)
226		num, err := strconv.ParseFloat(val, 64)
227		if err != nil {
228			if e, ok := err.(*strconv.NumError); ok &&
229				e.Err == strconv.ErrRange {
230
231				p.panicf("Float '%s' is out of the range of 64-bit "+
232					"IEEE-754 floating-point numbers.", it.val)
233			} else {
234				p.panicf("Invalid float value: %q", it.val)
235			}
236		}
237		return num, p.typeOfPrimitive(it)
238	case itemDatetime:
239		var t time.Time
240		var ok bool
241		var err error
242		for _, format := range []string{
243			"2006-01-02T15:04:05Z07:00",
244			"2006-01-02T15:04:05",
245			"2006-01-02",
246		} {
247			t, err = time.ParseInLocation(format, it.val, time.Local)
248			if err == nil {
249				ok = true
250				break
251			}
252		}
253		if !ok {
254			p.panicf("Invalid TOML Datetime: %q.", it.val)
255		}
256		return t, p.typeOfPrimitive(it)
257	case itemArray:
258		array := make([]interface{}, 0)
259		types := make([]tomlType, 0)
260
261		for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
262			if it.typ == itemCommentStart {
263				p.expect(itemText)
264				continue
265			}
266
267			val, typ := p.value(it)
268			array = append(array, val)
269			types = append(types, typ)
270		}
271		return array, p.typeOfArray(types)
272	case itemInlineTableStart:
273		var (
274			hash         = make(map[string]interface{})
275			outerContext = p.context
276			outerKey     = p.currentKey
277		)
278
279		p.context = append(p.context, p.currentKey)
280		p.currentKey = ""
281		for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
282			if it.typ != itemKeyStart {
283				p.bug("Expected key start but instead found %q, around line %d",
284					it.val, p.approxLine)
285			}
286			if it.typ == itemCommentStart {
287				p.expect(itemText)
288				continue
289			}
290
291			// retrieve key
292			k := p.next()
293			p.approxLine = k.line
294			kname := p.keyString(k)
295
296			// retrieve value
297			p.currentKey = kname
298			val, typ := p.value(p.next())
299			// make sure we keep metadata up to date
300			p.setType(kname, typ)
301			p.ordered = append(p.ordered, p.context.add(p.currentKey))
302			hash[kname] = val
303		}
304		p.context = outerContext
305		p.currentKey = outerKey
306		return hash, tomlHash
307	}
308	p.bug("Unexpected value type: %s", it.typ)
309	panic("unreachable")
310}
311
312// numUnderscoresOK checks whether each underscore in s is surrounded by
313// characters that are not underscores.
314func numUnderscoresOK(s string) bool {
315	accept := false
316	for _, r := range s {
317		if r == '_' {
318			if !accept {
319				return false
320			}
321			accept = false
322			continue
323		}
324		accept = true
325	}
326	return accept
327}
328
329// numPeriodsOK checks whether every period in s is followed by a digit.
330func numPeriodsOK(s string) bool {
331	period := false
332	for _, r := range s {
333		if period && !isDigit(r) {
334			return false
335		}
336		period = r == '.'
337	}
338	return !period
339}
340
341// establishContext sets the current context of the parser,
342// where the context is either a hash or an array of hashes. Which one is
343// set depends on the value of the `array` parameter.
344//
345// Establishing the context also makes sure that the key isn't a duplicate, and
346// will create implicit hashes automatically.
347func (p *parser) establishContext(key Key, array bool) {
348	var ok bool
349
350	// Always start at the top level and drill down for our context.
351	hashContext := p.mapping
352	keyContext := make(Key, 0)
353
354	// We only need implicit hashes for key[0:-1]
355	for _, k := range key[0 : len(key)-1] {
356		_, ok = hashContext[k]
357		keyContext = append(keyContext, k)
358
359		// No key? Make an implicit hash and move on.
360		if !ok {
361			p.addImplicit(keyContext)
362			hashContext[k] = make(map[string]interface{})
363		}
364
365		// If the hash context is actually an array of tables, then set
366		// the hash context to the last element in that array.
367		//
368		// Otherwise, it better be a table, since this MUST be a key group (by
369		// virtue of it not being the last element in a key).
370		switch t := hashContext[k].(type) {
371		case []map[string]interface{}:
372			hashContext = t[len(t)-1]
373		case map[string]interface{}:
374			hashContext = t
375		default:
376			p.panicf("Key '%s' was already created as a hash.", keyContext)
377		}
378	}
379
380	p.context = keyContext
381	if array {
382		// If this is the first element for this array, then allocate a new
383		// list of tables for it.
384		k := key[len(key)-1]
385		if _, ok := hashContext[k]; !ok {
386			hashContext[k] = make([]map[string]interface{}, 0, 5)
387		}
388
389		// Add a new table. But make sure the key hasn't already been used
390		// for something else.
391		if hash, ok := hashContext[k].([]map[string]interface{}); ok {
392			hashContext[k] = append(hash, make(map[string]interface{}))
393		} else {
394			p.panicf("Key '%s' was already created and cannot be used as "+
395				"an array.", keyContext)
396		}
397	} else {
398		p.setValue(key[len(key)-1], make(map[string]interface{}))
399	}
400	p.context = append(p.context, key[len(key)-1])
401}
402
403// setValue sets the given key to the given value in the current context.
404// It will make sure that the key hasn't already been defined, account for
405// implicit key groups.
406func (p *parser) setValue(key string, value interface{}) {
407	var tmpHash interface{}
408	var ok bool
409
410	hash := p.mapping
411	keyContext := make(Key, 0)
412	for _, k := range p.context {
413		keyContext = append(keyContext, k)
414		if tmpHash, ok = hash[k]; !ok {
415			p.bug("Context for key '%s' has not been established.", keyContext)
416		}
417		switch t := tmpHash.(type) {
418		case []map[string]interface{}:
419			// The context is a table of hashes. Pick the most recent table
420			// defined as the current hash.
421			hash = t[len(t)-1]
422		case map[string]interface{}:
423			hash = t
424		default:
425			p.bug("Expected hash to have type 'map[string]interface{}', but "+
426				"it has '%T' instead.", tmpHash)
427		}
428	}
429	keyContext = append(keyContext, key)
430
431	if _, ok := hash[key]; ok {
432		// Typically, if the given key has already been set, then we have
433		// to raise an error since duplicate keys are disallowed. However,
434		// it's possible that a key was previously defined implicitly. In this
435		// case, it is allowed to be redefined concretely. (See the
436		// `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
437		//
438		// But we have to make sure to stop marking it as an implicit. (So that
439		// another redefinition provokes an error.)
440		//
441		// Note that since it has already been defined (as a hash), we don't
442		// want to overwrite it. So our business is done.
443		if p.isImplicit(keyContext) {
444			p.removeImplicit(keyContext)
445			return
446		}
447
448		// Otherwise, we have a concrete key trying to override a previous
449		// key, which is *always* wrong.
450		p.panicf("Key '%s' has already been defined.", keyContext)
451	}
452	hash[key] = value
453}
454
455// setType sets the type of a particular value at a given key.
456// It should be called immediately AFTER setValue.
457//
458// Note that if `key` is empty, then the type given will be applied to the
459// current context (which is either a table or an array of tables).
460func (p *parser) setType(key string, typ tomlType) {
461	keyContext := make(Key, 0, len(p.context)+1)
462	for _, k := range p.context {
463		keyContext = append(keyContext, k)
464	}
465	if len(key) > 0 { // allow type setting for hashes
466		keyContext = append(keyContext, key)
467	}
468	p.types[keyContext.String()] = typ
469}
470
471// addImplicit sets the given Key as having been created implicitly.
472func (p *parser) addImplicit(key Key) {
473	p.implicits[key.String()] = true
474}
475
476// removeImplicit stops tagging the given key as having been implicitly
477// created.
478func (p *parser) removeImplicit(key Key) {
479	p.implicits[key.String()] = false
480}
481
482// isImplicit returns true if the key group pointed to by the key was created
483// implicitly.
484func (p *parser) isImplicit(key Key) bool {
485	return p.implicits[key.String()]
486}
487
488// current returns the full key name of the current context.
489func (p *parser) current() string {
490	if len(p.currentKey) == 0 {
491		return p.context.String()
492	}
493	if len(p.context) == 0 {
494		return p.currentKey
495	}
496	return fmt.Sprintf("%s.%s", p.context, p.currentKey)
497}
498
499func stripFirstNewline(s string) string {
500	if len(s) == 0 || s[0] != '\n' {
501		return s
502	}
503	return s[1:]
504}
505
506func stripEscapedWhitespace(s string) string {
507	esc := strings.Split(s, "\\\n")
508	if len(esc) > 1 {
509		for i := 1; i < len(esc); i++ {
510			esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
511		}
512	}
513	return strings.Join(esc, "")
514}
515
516func (p *parser) replaceEscapes(str string) string {
517	var replaced []rune
518	s := []byte(str)
519	r := 0
520	for r < len(s) {
521		if s[r] != '\\' {
522			c, size := utf8.DecodeRune(s[r:])
523			r += size
524			replaced = append(replaced, c)
525			continue
526		}
527		r += 1
528		if r >= len(s) {
529			p.bug("Escape sequence at end of string.")
530			return ""
531		}
532		switch s[r] {
533		default:
534			p.bug("Expected valid escape code after \\, but got %q.", s[r])
535			return ""
536		case 'b':
537			replaced = append(replaced, rune(0x0008))
538			r += 1
539		case 't':
540			replaced = append(replaced, rune(0x0009))
541			r += 1
542		case 'n':
543			replaced = append(replaced, rune(0x000A))
544			r += 1
545		case 'f':
546			replaced = append(replaced, rune(0x000C))
547			r += 1
548		case 'r':
549			replaced = append(replaced, rune(0x000D))
550			r += 1
551		case '"':
552			replaced = append(replaced, rune(0x0022))
553			r += 1
554		case '\\':
555			replaced = append(replaced, rune(0x005C))
556			r += 1
557		case 'u':
558			// At this point, we know we have a Unicode escape of the form
559			// `uXXXX` at [r, r+5). (Because the lexer guarantees this
560			// for us.)
561			escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
562			replaced = append(replaced, escaped)
563			r += 5
564		case 'U':
565			// At this point, we know we have a Unicode escape of the form
566			// `uXXXX` at [r, r+9). (Because the lexer guarantees this
567			// for us.)
568			escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
569			replaced = append(replaced, escaped)
570			r += 9
571		}
572	}
573	return string(replaced)
574}
575
576func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
577	s := string(bs)
578	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
579	if err != nil {
580		p.bug("Could not parse '%s' as a hexadecimal number, but the "+
581			"lexer claims it's OK: %s", s, err)
582	}
583	if !utf8.ValidRune(rune(hex)) {
584		p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
585	}
586	return rune(hex)
587}
588
589func isStringType(ty itemType) bool {
590	return ty == itemString || ty == itemMultilineString ||
591		ty == itemRawString || ty == itemRawMultilineString
592}
593