1// TOML Parser.
2
3package toml
4
5import (
6	"errors"
7	"fmt"
8	"math"
9	"reflect"
10	"strconv"
11	"strings"
12	"time"
13)
14
15type tomlParser struct {
16	flowIdx       int
17	flow          []token
18	tree          *Tree
19	currentTable  []string
20	seenTableKeys []string
21}
22
23type tomlParserStateFn func() tomlParserStateFn
24
25// Formats and panics an error message based on a token
26func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
27	panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
28}
29
30func (p *tomlParser) run() {
31	for state := p.parseStart; state != nil; {
32		state = state()
33	}
34}
35
36func (p *tomlParser) peek() *token {
37	if p.flowIdx >= len(p.flow) {
38		return nil
39	}
40	return &p.flow[p.flowIdx]
41}
42
43func (p *tomlParser) assume(typ tokenType) {
44	tok := p.getToken()
45	if tok == nil {
46		p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
47	}
48	if tok.typ != typ {
49		p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
50	}
51}
52
53func (p *tomlParser) getToken() *token {
54	tok := p.peek()
55	if tok == nil {
56		return nil
57	}
58	p.flowIdx++
59	return tok
60}
61
62func (p *tomlParser) parseStart() tomlParserStateFn {
63	tok := p.peek()
64
65	// end of stream, parsing is finished
66	if tok == nil {
67		return nil
68	}
69
70	switch tok.typ {
71	case tokenDoubleLeftBracket:
72		return p.parseGroupArray
73	case tokenLeftBracket:
74		return p.parseGroup
75	case tokenKey:
76		return p.parseAssign
77	case tokenEOF:
78		return nil
79	case tokenError:
80		p.raiseError(tok, "parsing error: %s", tok.String())
81	default:
82		p.raiseError(tok, "unexpected token %s", tok.typ)
83	}
84	return nil
85}
86
87func (p *tomlParser) parseGroupArray() tomlParserStateFn {
88	startToken := p.getToken() // discard the [[
89	key := p.getToken()
90	if key.typ != tokenKeyGroupArray {
91		p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
92	}
93
94	// get or create table array element at the indicated part in the path
95	keys, err := parseKey(key.val)
96	if err != nil {
97		p.raiseError(key, "invalid table array key: %s", err)
98	}
99	p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
100	destTree := p.tree.GetPath(keys)
101	var array []*Tree
102	if destTree == nil {
103		array = make([]*Tree, 0)
104	} else if target, ok := destTree.([]*Tree); ok && target != nil {
105		array = destTree.([]*Tree)
106	} else {
107		p.raiseError(key, "key %s is already assigned and not of type table array", key)
108	}
109	p.currentTable = keys
110
111	// add a new tree to the end of the table array
112	newTree := newTree()
113	newTree.position = startToken.Position
114	array = append(array, newTree)
115	p.tree.SetPath(p.currentTable, array)
116
117	// remove all keys that were children of this table array
118	prefix := key.val + "."
119	found := false
120	for ii := 0; ii < len(p.seenTableKeys); {
121		tableKey := p.seenTableKeys[ii]
122		if strings.HasPrefix(tableKey, prefix) {
123			p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
124		} else {
125			found = (tableKey == key.val)
126			ii++
127		}
128	}
129
130	// keep this key name from use by other kinds of assignments
131	if !found {
132		p.seenTableKeys = append(p.seenTableKeys, key.val)
133	}
134
135	// move to next parser state
136	p.assume(tokenDoubleRightBracket)
137	return p.parseStart
138}
139
140func (p *tomlParser) parseGroup() tomlParserStateFn {
141	startToken := p.getToken() // discard the [
142	key := p.getToken()
143	if key.typ != tokenKeyGroup {
144		p.raiseError(key, "unexpected token %s, was expecting a table key", key)
145	}
146	for _, item := range p.seenTableKeys {
147		if item == key.val {
148			p.raiseError(key, "duplicated tables")
149		}
150	}
151
152	p.seenTableKeys = append(p.seenTableKeys, key.val)
153	keys, err := parseKey(key.val)
154	if err != nil {
155		p.raiseError(key, "invalid table array key: %s", err)
156	}
157	if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
158		p.raiseError(key, "%s", err)
159	}
160	destTree := p.tree.GetPath(keys)
161	if target, ok := destTree.(*Tree); ok && target != nil && target.inline {
162		p.raiseError(key, "could not re-define exist inline table or its sub-table : %s",
163			strings.Join(keys, "."))
164	}
165	p.assume(tokenRightBracket)
166	p.currentTable = keys
167	return p.parseStart
168}
169
170func (p *tomlParser) parseAssign() tomlParserStateFn {
171	key := p.getToken()
172	p.assume(tokenEqual)
173
174	parsedKey, err := parseKey(key.val)
175	if err != nil {
176		p.raiseError(key, "invalid key: %s", err.Error())
177	}
178
179	value := p.parseRvalue()
180	var tableKey []string
181	if len(p.currentTable) > 0 {
182		tableKey = p.currentTable
183	} else {
184		tableKey = []string{}
185	}
186
187	prefixKey := parsedKey[0 : len(parsedKey)-1]
188	tableKey = append(tableKey, prefixKey...)
189
190	// find the table to assign, looking out for arrays of tables
191	var targetNode *Tree
192	switch node := p.tree.GetPath(tableKey).(type) {
193	case []*Tree:
194		targetNode = node[len(node)-1]
195	case *Tree:
196		targetNode = node
197	case nil:
198		// create intermediate
199		if err := p.tree.createSubTree(tableKey, key.Position); err != nil {
200			p.raiseError(key, "could not create intermediate group: %s", err)
201		}
202		targetNode = p.tree.GetPath(tableKey).(*Tree)
203	default:
204		p.raiseError(key, "Unknown table type for path: %s",
205			strings.Join(tableKey, "."))
206	}
207
208	if targetNode.inline {
209		p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s",
210			strings.Join(tableKey, "."))
211	}
212
213	// assign value to the found table
214	keyVal := parsedKey[len(parsedKey)-1]
215	localKey := []string{keyVal}
216	finalKey := append(tableKey, keyVal)
217	if targetNode.GetPath(localKey) != nil {
218		p.raiseError(key, "The following key was defined twice: %s",
219			strings.Join(finalKey, "."))
220	}
221	var toInsert interface{}
222
223	switch value.(type) {
224	case *Tree, []*Tree:
225		toInsert = value
226	default:
227		toInsert = &tomlValue{value: value, position: key.Position}
228	}
229	targetNode.values[keyVal] = toInsert
230	return p.parseStart
231}
232
233var errInvalidUnderscore = errors.New("invalid use of _ in number")
234
235func numberContainsInvalidUnderscore(value string) error {
236	// For large numbers, you may use underscores between digits to enhance
237	// readability. Each underscore must be surrounded by at least one digit on
238	// each side.
239
240	hasBefore := false
241	for idx, r := range value {
242		if r == '_' {
243			if !hasBefore || idx+1 >= len(value) {
244				// can't end with an underscore
245				return errInvalidUnderscore
246			}
247		}
248		hasBefore = isDigit(r)
249	}
250	return nil
251}
252
253var errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number")
254
255func hexNumberContainsInvalidUnderscore(value string) error {
256	hasBefore := false
257	for idx, r := range value {
258		if r == '_' {
259			if !hasBefore || idx+1 >= len(value) {
260				// can't end with an underscore
261				return errInvalidUnderscoreHex
262			}
263		}
264		hasBefore = isHexDigit(r)
265	}
266	return nil
267}
268
269func cleanupNumberToken(value string) string {
270	cleanedVal := strings.Replace(value, "_", "", -1)
271	return cleanedVal
272}
273
274func (p *tomlParser) parseRvalue() interface{} {
275	tok := p.getToken()
276	if tok == nil || tok.typ == tokenEOF {
277		p.raiseError(tok, "expecting a value")
278	}
279
280	switch tok.typ {
281	case tokenString:
282		return tok.val
283	case tokenTrue:
284		return true
285	case tokenFalse:
286		return false
287	case tokenInf:
288		if tok.val[0] == '-' {
289			return math.Inf(-1)
290		}
291		return math.Inf(1)
292	case tokenNan:
293		return math.NaN()
294	case tokenInteger:
295		cleanedVal := cleanupNumberToken(tok.val)
296		var err error
297		var val int64
298		if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
299			switch cleanedVal[1] {
300			case 'x':
301				err = hexNumberContainsInvalidUnderscore(tok.val)
302				if err != nil {
303					p.raiseError(tok, "%s", err)
304				}
305				val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
306			case 'o':
307				err = numberContainsInvalidUnderscore(tok.val)
308				if err != nil {
309					p.raiseError(tok, "%s", err)
310				}
311				val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
312			case 'b':
313				err = numberContainsInvalidUnderscore(tok.val)
314				if err != nil {
315					p.raiseError(tok, "%s", err)
316				}
317				val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
318			default:
319				panic("invalid base") // the lexer should catch this first
320			}
321		} else {
322			err = numberContainsInvalidUnderscore(tok.val)
323			if err != nil {
324				p.raiseError(tok, "%s", err)
325			}
326			val, err = strconv.ParseInt(cleanedVal, 10, 64)
327		}
328		if err != nil {
329			p.raiseError(tok, "%s", err)
330		}
331		return val
332	case tokenFloat:
333		err := numberContainsInvalidUnderscore(tok.val)
334		if err != nil {
335			p.raiseError(tok, "%s", err)
336		}
337		cleanedVal := cleanupNumberToken(tok.val)
338		val, err := strconv.ParseFloat(cleanedVal, 64)
339		if err != nil {
340			p.raiseError(tok, "%s", err)
341		}
342		return val
343	case tokenLocalTime:
344		val, err := ParseLocalTime(tok.val)
345		if err != nil {
346			p.raiseError(tok, "%s", err)
347		}
348		return val
349	case tokenLocalDate:
350		// a local date may be followed by:
351		// * nothing: this is a local date
352		// * a local time: this is a local date-time
353
354		next := p.peek()
355		if next == nil || next.typ != tokenLocalTime {
356			val, err := ParseLocalDate(tok.val)
357			if err != nil {
358				p.raiseError(tok, "%s", err)
359			}
360			return val
361		}
362
363		localDate := tok
364		localTime := p.getToken()
365
366		next = p.peek()
367		if next == nil || next.typ != tokenTimeOffset {
368			v := localDate.val + "T" + localTime.val
369			val, err := ParseLocalDateTime(v)
370			if err != nil {
371				p.raiseError(tok, "%s", err)
372			}
373			return val
374		}
375
376		offset := p.getToken()
377
378		layout := time.RFC3339Nano
379		v := localDate.val + "T" + localTime.val + offset.val
380		val, err := time.ParseInLocation(layout, v, time.UTC)
381		if err != nil {
382			p.raiseError(tok, "%s", err)
383		}
384		return val
385	case tokenLeftBracket:
386		return p.parseArray()
387	case tokenLeftCurlyBrace:
388		return p.parseInlineTable()
389	case tokenEqual:
390		p.raiseError(tok, "cannot have multiple equals for the same key")
391	case tokenError:
392		p.raiseError(tok, "%s", tok)
393	default:
394		panic(fmt.Errorf("unhandled token: %v", tok))
395	}
396
397	return nil
398}
399
400func tokenIsComma(t *token) bool {
401	return t != nil && t.typ == tokenComma
402}
403
404func (p *tomlParser) parseInlineTable() *Tree {
405	tree := newTree()
406	var previous *token
407Loop:
408	for {
409		follow := p.peek()
410		if follow == nil || follow.typ == tokenEOF {
411			p.raiseError(follow, "unterminated inline table")
412		}
413		switch follow.typ {
414		case tokenRightCurlyBrace:
415			p.getToken()
416			break Loop
417		case tokenKey, tokenInteger, tokenString:
418			if !tokenIsComma(previous) && previous != nil {
419				p.raiseError(follow, "comma expected between fields in inline table")
420			}
421			key := p.getToken()
422			p.assume(tokenEqual)
423
424			parsedKey, err := parseKey(key.val)
425			if err != nil {
426				p.raiseError(key, "invalid key: %s", err)
427			}
428
429			value := p.parseRvalue()
430			tree.SetPath(parsedKey, value)
431		case tokenComma:
432			if tokenIsComma(previous) {
433				p.raiseError(follow, "need field between two commas in inline table")
434			}
435			p.getToken()
436		default:
437			p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
438		}
439		previous = follow
440	}
441	if tokenIsComma(previous) {
442		p.raiseError(previous, "trailing comma at the end of inline table")
443	}
444	tree.inline = true
445	return tree
446}
447
448func (p *tomlParser) parseArray() interface{} {
449	var array []interface{}
450	arrayType := reflect.TypeOf(newTree())
451	for {
452		follow := p.peek()
453		if follow == nil || follow.typ == tokenEOF {
454			p.raiseError(follow, "unterminated array")
455		}
456		if follow.typ == tokenRightBracket {
457			p.getToken()
458			break
459		}
460		val := p.parseRvalue()
461		if reflect.TypeOf(val) != arrayType {
462			arrayType = nil
463		}
464		array = append(array, val)
465		follow = p.peek()
466		if follow == nil || follow.typ == tokenEOF {
467			p.raiseError(follow, "unterminated array")
468		}
469		if follow.typ != tokenRightBracket && follow.typ != tokenComma {
470			p.raiseError(follow, "missing comma")
471		}
472		if follow.typ == tokenComma {
473			p.getToken()
474		}
475	}
476
477	// if the array is a mixed-type array or its length is 0,
478	// don't convert it to a table array
479	if len(array) <= 0 {
480		arrayType = nil
481	}
482	// An array of Trees is actually an array of inline
483	// tables, which is a shorthand for a table array. If the
484	// array was not converted from []interface{} to []*Tree,
485	// the two notations would not be equivalent.
486	if arrayType == reflect.TypeOf(newTree()) {
487		tomlArray := make([]*Tree, len(array))
488		for i, v := range array {
489			tomlArray[i] = v.(*Tree)
490		}
491		return tomlArray
492	}
493	return array
494}
495
496func parseToml(flow []token) *Tree {
497	result := newTree()
498	result.position = Position{1, 1}
499	parser := &tomlParser{
500		flowIdx:       0,
501		flow:          flow,
502		tree:          result,
503		currentTable:  make([]string, 0),
504		seenTableKeys: make([]string, 0),
505	}
506	parser.run()
507	return result
508}
509