1// TOML Parser.
2
3package toml
4
5import (
6	"errors"
7	"fmt"
8	"math"
9	"reflect"
10	"regexp"
11	"strconv"
12	"strings"
13	"time"
14)
15
16type tomlParser struct {
17	flowIdx       int
18	flow          []token
19	tree          *Tree
20	currentTable  []string
21	seenTableKeys []string
22}
23
24type tomlParserStateFn func() tomlParserStateFn
25
26// Formats and panics an error message based on a token
27func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
28	panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
29}
30
31func (p *tomlParser) run() {
32	for state := p.parseStart; state != nil; {
33		state = state()
34	}
35}
36
37func (p *tomlParser) peek() *token {
38	if p.flowIdx >= len(p.flow) {
39		return nil
40	}
41	return &p.flow[p.flowIdx]
42}
43
44func (p *tomlParser) assume(typ tokenType) {
45	tok := p.getToken()
46	if tok == nil {
47		p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
48	}
49	if tok.typ != typ {
50		p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
51	}
52}
53
54func (p *tomlParser) getToken() *token {
55	tok := p.peek()
56	if tok == nil {
57		return nil
58	}
59	p.flowIdx++
60	return tok
61}
62
63func (p *tomlParser) parseStart() tomlParserStateFn {
64	tok := p.peek()
65
66	// end of stream, parsing is finished
67	if tok == nil {
68		return nil
69	}
70
71	switch tok.typ {
72	case tokenDoubleLeftBracket:
73		return p.parseGroupArray
74	case tokenLeftBracket:
75		return p.parseGroup
76	case tokenKey:
77		return p.parseAssign
78	case tokenEOF:
79		return nil
80	case tokenError:
81		p.raiseError(tok, "parsing error: %s", tok.String())
82	default:
83		p.raiseError(tok, "unexpected token %s", tok.typ)
84	}
85	return nil
86}
87
88func (p *tomlParser) parseGroupArray() tomlParserStateFn {
89	startToken := p.getToken() // discard the [[
90	key := p.getToken()
91	if key.typ != tokenKeyGroupArray {
92		p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
93	}
94
95	// get or create table array element at the indicated part in the path
96	keys, err := parseKey(key.val)
97	if err != nil {
98		p.raiseError(key, "invalid table array key: %s", err)
99	}
100	p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
101	destTree := p.tree.GetPath(keys)
102	var array []*Tree
103	if destTree == nil {
104		array = make([]*Tree, 0)
105	} else if target, ok := destTree.([]*Tree); ok && target != nil {
106		array = destTree.([]*Tree)
107	} else {
108		p.raiseError(key, "key %s is already assigned and not of type table array", key)
109	}
110	p.currentTable = keys
111
112	// add a new tree to the end of the table array
113	newTree := newTree()
114	newTree.position = startToken.Position
115	array = append(array, newTree)
116	p.tree.SetPath(p.currentTable, array)
117
118	// remove all keys that were children of this table array
119	prefix := key.val + "."
120	found := false
121	for ii := 0; ii < len(p.seenTableKeys); {
122		tableKey := p.seenTableKeys[ii]
123		if strings.HasPrefix(tableKey, prefix) {
124			p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
125		} else {
126			found = (tableKey == key.val)
127			ii++
128		}
129	}
130
131	// keep this key name from use by other kinds of assignments
132	if !found {
133		p.seenTableKeys = append(p.seenTableKeys, key.val)
134	}
135
136	// move to next parser state
137	p.assume(tokenDoubleRightBracket)
138	return p.parseStart
139}
140
141func (p *tomlParser) parseGroup() tomlParserStateFn {
142	startToken := p.getToken() // discard the [
143	key := p.getToken()
144	if key.typ != tokenKeyGroup {
145		p.raiseError(key, "unexpected token %s, was expecting a table key", key)
146	}
147	for _, item := range p.seenTableKeys {
148		if item == key.val {
149			p.raiseError(key, "duplicated tables")
150		}
151	}
152
153	p.seenTableKeys = append(p.seenTableKeys, key.val)
154	keys, err := parseKey(key.val)
155	if err != nil {
156		p.raiseError(key, "invalid table array key: %s", err)
157	}
158	if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
159		p.raiseError(key, "%s", err)
160	}
161	destTree := p.tree.GetPath(keys)
162	if target, ok := destTree.(*Tree); ok && target != nil && target.inline {
163		p.raiseError(key, "could not re-define exist inline table or its sub-table : %s",
164			strings.Join(keys, "."))
165	}
166	p.assume(tokenRightBracket)
167	p.currentTable = keys
168	return p.parseStart
169}
170
171func (p *tomlParser) parseAssign() tomlParserStateFn {
172	key := p.getToken()
173	p.assume(tokenEqual)
174
175	parsedKey, err := parseKey(key.val)
176	if err != nil {
177		p.raiseError(key, "invalid key: %s", err.Error())
178	}
179
180	value := p.parseRvalue()
181	var tableKey []string
182	if len(p.currentTable) > 0 {
183		tableKey = p.currentTable
184	} else {
185		tableKey = []string{}
186	}
187
188	prefixKey := parsedKey[0 : len(parsedKey)-1]
189	tableKey = append(tableKey, prefixKey...)
190
191	// find the table to assign, looking out for arrays of tables
192	var targetNode *Tree
193	switch node := p.tree.GetPath(tableKey).(type) {
194	case []*Tree:
195		targetNode = node[len(node)-1]
196	case *Tree:
197		targetNode = node
198	case nil:
199		// create intermediate
200		if err := p.tree.createSubTree(tableKey, key.Position); err != nil {
201			p.raiseError(key, "could not create intermediate group: %s", err)
202		}
203		targetNode = p.tree.GetPath(tableKey).(*Tree)
204	default:
205		p.raiseError(key, "Unknown table type for path: %s",
206			strings.Join(tableKey, "."))
207	}
208
209	if targetNode.inline {
210		p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s",
211			strings.Join(tableKey, "."))
212	}
213
214	// assign value to the found table
215	keyVal := parsedKey[len(parsedKey)-1]
216	localKey := []string{keyVal}
217	finalKey := append(tableKey, keyVal)
218	if targetNode.GetPath(localKey) != nil {
219		p.raiseError(key, "The following key was defined twice: %s",
220			strings.Join(finalKey, "."))
221	}
222	var toInsert interface{}
223
224	switch value.(type) {
225	case *Tree, []*Tree:
226		toInsert = value
227	default:
228		toInsert = &tomlValue{value: value, position: key.Position}
229	}
230	targetNode.values[keyVal] = toInsert
231	return p.parseStart
232}
233
234var numberUnderscoreInvalidRegexp *regexp.Regexp
235var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
236
237func numberContainsInvalidUnderscore(value string) error {
238	if numberUnderscoreInvalidRegexp.MatchString(value) {
239		return errors.New("invalid use of _ in number")
240	}
241	return nil
242}
243
244func hexNumberContainsInvalidUnderscore(value string) error {
245	if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
246		return errors.New("invalid use of _ in hex number")
247	}
248	return nil
249}
250
251func cleanupNumberToken(value string) string {
252	cleanedVal := strings.Replace(value, "_", "", -1)
253	return cleanedVal
254}
255
256func (p *tomlParser) parseRvalue() interface{} {
257	tok := p.getToken()
258	if tok == nil || tok.typ == tokenEOF {
259		p.raiseError(tok, "expecting a value")
260	}
261
262	switch tok.typ {
263	case tokenString:
264		return tok.val
265	case tokenTrue:
266		return true
267	case tokenFalse:
268		return false
269	case tokenInf:
270		if tok.val[0] == '-' {
271			return math.Inf(-1)
272		}
273		return math.Inf(1)
274	case tokenNan:
275		return math.NaN()
276	case tokenInteger:
277		cleanedVal := cleanupNumberToken(tok.val)
278		var err error
279		var val int64
280		if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
281			switch cleanedVal[1] {
282			case 'x':
283				err = hexNumberContainsInvalidUnderscore(tok.val)
284				if err != nil {
285					p.raiseError(tok, "%s", err)
286				}
287				val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
288			case 'o':
289				err = numberContainsInvalidUnderscore(tok.val)
290				if err != nil {
291					p.raiseError(tok, "%s", err)
292				}
293				val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
294			case 'b':
295				err = numberContainsInvalidUnderscore(tok.val)
296				if err != nil {
297					p.raiseError(tok, "%s", err)
298				}
299				val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
300			default:
301				panic("invalid base") // the lexer should catch this first
302			}
303		} else {
304			err = numberContainsInvalidUnderscore(tok.val)
305			if err != nil {
306				p.raiseError(tok, "%s", err)
307			}
308			val, err = strconv.ParseInt(cleanedVal, 10, 64)
309		}
310		if err != nil {
311			p.raiseError(tok, "%s", err)
312		}
313		return val
314	case tokenFloat:
315		err := numberContainsInvalidUnderscore(tok.val)
316		if err != nil {
317			p.raiseError(tok, "%s", err)
318		}
319		cleanedVal := cleanupNumberToken(tok.val)
320		val, err := strconv.ParseFloat(cleanedVal, 64)
321		if err != nil {
322			p.raiseError(tok, "%s", err)
323		}
324		return val
325	case tokenDate:
326		layout := time.RFC3339Nano
327		if !strings.Contains(tok.val, "T") {
328			layout = strings.Replace(layout, "T", " ", 1)
329		}
330		val, err := time.ParseInLocation(layout, tok.val, time.UTC)
331		if err != nil {
332			p.raiseError(tok, "%s", err)
333		}
334		return val
335	case tokenLocalDate:
336		v := strings.Replace(tok.val, " ", "T", -1)
337		isDateTime := false
338		isTime := false
339		for _, c := range v {
340			if c == 'T' || c == 't' {
341				isDateTime = true
342				break
343			}
344			if c == ':' {
345				isTime = true
346				break
347			}
348		}
349
350		var val interface{}
351		var err error
352
353		if isDateTime {
354			val, err = ParseLocalDateTime(v)
355		} else if isTime {
356			val, err = ParseLocalTime(v)
357		} else {
358			val, err = ParseLocalDate(v)
359		}
360
361		if err != nil {
362			p.raiseError(tok, "%s", err)
363		}
364		return val
365	case tokenLeftBracket:
366		return p.parseArray()
367	case tokenLeftCurlyBrace:
368		return p.parseInlineTable()
369	case tokenEqual:
370		p.raiseError(tok, "cannot have multiple equals for the same key")
371	case tokenError:
372		p.raiseError(tok, "%s", tok)
373	}
374
375	p.raiseError(tok, "never reached")
376
377	return nil
378}
379
380func tokenIsComma(t *token) bool {
381	return t != nil && t.typ == tokenComma
382}
383
384func (p *tomlParser) parseInlineTable() *Tree {
385	tree := newTree()
386	var previous *token
387Loop:
388	for {
389		follow := p.peek()
390		if follow == nil || follow.typ == tokenEOF {
391			p.raiseError(follow, "unterminated inline table")
392		}
393		switch follow.typ {
394		case tokenRightCurlyBrace:
395			p.getToken()
396			break Loop
397		case tokenKey, tokenInteger, tokenString:
398			if !tokenIsComma(previous) && previous != nil {
399				p.raiseError(follow, "comma expected between fields in inline table")
400			}
401			key := p.getToken()
402			p.assume(tokenEqual)
403
404			parsedKey, err := parseKey(key.val)
405			if err != nil {
406				p.raiseError(key, "invalid key: %s", err)
407			}
408
409			value := p.parseRvalue()
410			tree.SetPath(parsedKey, value)
411		case tokenComma:
412			if tokenIsComma(previous) {
413				p.raiseError(follow, "need field between two commas in inline table")
414			}
415			p.getToken()
416		default:
417			p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
418		}
419		previous = follow
420	}
421	if tokenIsComma(previous) {
422		p.raiseError(previous, "trailing comma at the end of inline table")
423	}
424	tree.inline = true
425	return tree
426}
427
428func (p *tomlParser) parseArray() interface{} {
429	var array []interface{}
430	arrayType := reflect.TypeOf(newTree())
431	for {
432		follow := p.peek()
433		if follow == nil || follow.typ == tokenEOF {
434			p.raiseError(follow, "unterminated array")
435		}
436		if follow.typ == tokenRightBracket {
437			p.getToken()
438			break
439		}
440		val := p.parseRvalue()
441		if reflect.TypeOf(val) != arrayType {
442			arrayType = nil
443		}
444		array = append(array, val)
445		follow = p.peek()
446		if follow == nil || follow.typ == tokenEOF {
447			p.raiseError(follow, "unterminated array")
448		}
449		if follow.typ != tokenRightBracket && follow.typ != tokenComma {
450			p.raiseError(follow, "missing comma")
451		}
452		if follow.typ == tokenComma {
453			p.getToken()
454		}
455	}
456
457	// if the array is a mixed-type array or its length is 0,
458	// don't convert it to a table array
459	if len(array) <= 0 {
460		arrayType = nil
461	}
462	// An array of Trees is actually an array of inline
463	// tables, which is a shorthand for a table array. If the
464	// array was not converted from []interface{} to []*Tree,
465	// the two notations would not be equivalent.
466	if arrayType == reflect.TypeOf(newTree()) {
467		tomlArray := make([]*Tree, len(array))
468		for i, v := range array {
469			tomlArray[i] = v.(*Tree)
470		}
471		return tomlArray
472	}
473	return array
474}
475
476func parseToml(flow []token) *Tree {
477	result := newTree()
478	result.position = Position{1, 1}
479	parser := &tomlParser{
480		flowIdx:       0,
481		flow:          flow,
482		tree:          result,
483		currentTable:  make([]string, 0),
484		seenTableKeys: make([]string, 0),
485	}
486	parser.run()
487	return result
488}
489
490func init() {
491	numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
492	hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
493}
494