1// TOML Parser.
2
3package toml
4
5import (
6	"errors"
7	"fmt"
8	"math"
9	"reflect"
10	"regexp"
11	"strconv"
12	"strings"
13	"time"
14)
15
16type tomlParser struct {
17	flowIdx       int
18	flow          []token
19	tree          *Tree
20	currentTable  []string
21	seenTableKeys []string
22}
23
24type tomlParserStateFn func() tomlParserStateFn
25
26// Formats and panics an error message based on a token
27func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
28	panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
29}
30
31func (p *tomlParser) run() {
32	for state := p.parseStart; state != nil; {
33		state = state()
34	}
35}
36
37func (p *tomlParser) peek() *token {
38	if p.flowIdx >= len(p.flow) {
39		return nil
40	}
41	return &p.flow[p.flowIdx]
42}
43
44func (p *tomlParser) assume(typ tokenType) {
45	tok := p.getToken()
46	if tok == nil {
47		p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
48	}
49	if tok.typ != typ {
50		p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
51	}
52}
53
54func (p *tomlParser) getToken() *token {
55	tok := p.peek()
56	if tok == nil {
57		return nil
58	}
59	p.flowIdx++
60	return tok
61}
62
63func (p *tomlParser) parseStart() tomlParserStateFn {
64	tok := p.peek()
65
66	// end of stream, parsing is finished
67	if tok == nil {
68		return nil
69	}
70
71	switch tok.typ {
72	case tokenDoubleLeftBracket:
73		return p.parseGroupArray
74	case tokenLeftBracket:
75		return p.parseGroup
76	case tokenKey:
77		return p.parseAssign
78	case tokenEOF:
79		return nil
80	case tokenError:
81		p.raiseError(tok, "parsing error: %s", tok.String())
82	default:
83		p.raiseError(tok, "unexpected token %s", tok.typ)
84	}
85	return nil
86}
87
88func (p *tomlParser) parseGroupArray() tomlParserStateFn {
89	startToken := p.getToken() // discard the [[
90	key := p.getToken()
91	if key.typ != tokenKeyGroupArray {
92		p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
93	}
94
95	// get or create table array element at the indicated part in the path
96	keys, err := parseKey(key.val)
97	if err != nil {
98		p.raiseError(key, "invalid table array key: %s", err)
99	}
100	p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
101	destTree := p.tree.GetPath(keys)
102	var array []*Tree
103	if destTree == nil {
104		array = make([]*Tree, 0)
105	} else if target, ok := destTree.([]*Tree); ok && target != nil {
106		array = destTree.([]*Tree)
107	} else {
108		p.raiseError(key, "key %s is already assigned and not of type table array", key)
109	}
110	p.currentTable = keys
111
112	// add a new tree to the end of the table array
113	newTree := newTree()
114	newTree.position = startToken.Position
115	array = append(array, newTree)
116	p.tree.SetPath(p.currentTable, array)
117
118	// remove all keys that were children of this table array
119	prefix := key.val + "."
120	found := false
121	for ii := 0; ii < len(p.seenTableKeys); {
122		tableKey := p.seenTableKeys[ii]
123		if strings.HasPrefix(tableKey, prefix) {
124			p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
125		} else {
126			found = (tableKey == key.val)
127			ii++
128		}
129	}
130
131	// keep this key name from use by other kinds of assignments
132	if !found {
133		p.seenTableKeys = append(p.seenTableKeys, key.val)
134	}
135
136	// move to next parser state
137	p.assume(tokenDoubleRightBracket)
138	return p.parseStart
139}
140
141func (p *tomlParser) parseGroup() tomlParserStateFn {
142	startToken := p.getToken() // discard the [
143	key := p.getToken()
144	if key.typ != tokenKeyGroup {
145		p.raiseError(key, "unexpected token %s, was expecting a table key", key)
146	}
147	for _, item := range p.seenTableKeys {
148		if item == key.val {
149			p.raiseError(key, "duplicated tables")
150		}
151	}
152
153	p.seenTableKeys = append(p.seenTableKeys, key.val)
154	keys, err := parseKey(key.val)
155	if err != nil {
156		p.raiseError(key, "invalid table array key: %s", err)
157	}
158	if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
159		p.raiseError(key, "%s", err)
160	}
161	p.assume(tokenRightBracket)
162	p.currentTable = keys
163	return p.parseStart
164}
165
166func (p *tomlParser) parseAssign() tomlParserStateFn {
167	key := p.getToken()
168	p.assume(tokenEqual)
169
170	parsedKey, err := parseKey(key.val)
171	if err != nil {
172		p.raiseError(key, "invalid key: %s", err.Error())
173	}
174
175	value := p.parseRvalue()
176	var tableKey []string
177	if len(p.currentTable) > 0 {
178		tableKey = p.currentTable
179	} else {
180		tableKey = []string{}
181	}
182
183	prefixKey := parsedKey[0 : len(parsedKey)-1]
184	tableKey = append(tableKey, prefixKey...)
185
186	// find the table to assign, looking out for arrays of tables
187	var targetNode *Tree
188	switch node := p.tree.GetPath(tableKey).(type) {
189	case []*Tree:
190		targetNode = node[len(node)-1]
191	case *Tree:
192		targetNode = node
193	case nil:
194		// create intermediate
195		if err := p.tree.createSubTree(tableKey, key.Position); err != nil {
196			p.raiseError(key, "could not create intermediate group: %s", err)
197		}
198		targetNode = p.tree.GetPath(tableKey).(*Tree)
199	default:
200		p.raiseError(key, "Unknown table type for path: %s",
201			strings.Join(tableKey, "."))
202	}
203
204	// assign value to the found table
205	keyVal := parsedKey[len(parsedKey)-1]
206	localKey := []string{keyVal}
207	finalKey := append(tableKey, keyVal)
208	if targetNode.GetPath(localKey) != nil {
209		p.raiseError(key, "The following key was defined twice: %s",
210			strings.Join(finalKey, "."))
211	}
212	var toInsert interface{}
213
214	switch value.(type) {
215	case *Tree, []*Tree:
216		toInsert = value
217	default:
218		toInsert = &tomlValue{value: value, position: key.Position}
219	}
220	targetNode.values[keyVal] = toInsert
221	return p.parseStart
222}
223
224var numberUnderscoreInvalidRegexp *regexp.Regexp
225var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
226
227func numberContainsInvalidUnderscore(value string) error {
228	if numberUnderscoreInvalidRegexp.MatchString(value) {
229		return errors.New("invalid use of _ in number")
230	}
231	return nil
232}
233
234func hexNumberContainsInvalidUnderscore(value string) error {
235	if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
236		return errors.New("invalid use of _ in hex number")
237	}
238	return nil
239}
240
241func cleanupNumberToken(value string) string {
242	cleanedVal := strings.Replace(value, "_", "", -1)
243	return cleanedVal
244}
245
246func (p *tomlParser) parseRvalue() interface{} {
247	tok := p.getToken()
248	if tok == nil || tok.typ == tokenEOF {
249		p.raiseError(tok, "expecting a value")
250	}
251
252	switch tok.typ {
253	case tokenString:
254		return tok.val
255	case tokenTrue:
256		return true
257	case tokenFalse:
258		return false
259	case tokenInf:
260		if tok.val[0] == '-' {
261			return math.Inf(-1)
262		}
263		return math.Inf(1)
264	case tokenNan:
265		return math.NaN()
266	case tokenInteger:
267		cleanedVal := cleanupNumberToken(tok.val)
268		var err error
269		var val int64
270		if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
271			switch cleanedVal[1] {
272			case 'x':
273				err = hexNumberContainsInvalidUnderscore(tok.val)
274				if err != nil {
275					p.raiseError(tok, "%s", err)
276				}
277				val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
278			case 'o':
279				err = numberContainsInvalidUnderscore(tok.val)
280				if err != nil {
281					p.raiseError(tok, "%s", err)
282				}
283				val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
284			case 'b':
285				err = numberContainsInvalidUnderscore(tok.val)
286				if err != nil {
287					p.raiseError(tok, "%s", err)
288				}
289				val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
290			default:
291				panic("invalid base") // the lexer should catch this first
292			}
293		} else {
294			err = numberContainsInvalidUnderscore(tok.val)
295			if err != nil {
296				p.raiseError(tok, "%s", err)
297			}
298			val, err = strconv.ParseInt(cleanedVal, 10, 64)
299		}
300		if err != nil {
301			p.raiseError(tok, "%s", err)
302		}
303		return val
304	case tokenFloat:
305		err := numberContainsInvalidUnderscore(tok.val)
306		if err != nil {
307			p.raiseError(tok, "%s", err)
308		}
309		cleanedVal := cleanupNumberToken(tok.val)
310		val, err := strconv.ParseFloat(cleanedVal, 64)
311		if err != nil {
312			p.raiseError(tok, "%s", err)
313		}
314		return val
315	case tokenDate:
316		layout := time.RFC3339Nano
317		if !strings.Contains(tok.val, "T") {
318			layout = strings.Replace(layout, "T", " ", 1)
319		}
320		val, err := time.ParseInLocation(layout, tok.val, time.UTC)
321		if err != nil {
322			p.raiseError(tok, "%s", err)
323		}
324		return val
325	case tokenLocalDate:
326		v := strings.Replace(tok.val, " ", "T", -1)
327		isDateTime := false
328		isTime := false
329		for _, c := range v {
330			if c == 'T' || c == 't' {
331				isDateTime = true
332				break
333			}
334			if c == ':' {
335				isTime = true
336				break
337			}
338		}
339
340		var val interface{}
341		var err error
342
343		if isDateTime {
344			val, err = ParseLocalDateTime(v)
345		} else if isTime {
346			val, err = ParseLocalTime(v)
347		} else {
348			val, err = ParseLocalDate(v)
349		}
350
351		if err != nil {
352			p.raiseError(tok, "%s", err)
353		}
354		return val
355	case tokenLeftBracket:
356		return p.parseArray()
357	case tokenLeftCurlyBrace:
358		return p.parseInlineTable()
359	case tokenEqual:
360		p.raiseError(tok, "cannot have multiple equals for the same key")
361	case tokenError:
362		p.raiseError(tok, "%s", tok)
363	}
364
365	p.raiseError(tok, "never reached")
366
367	return nil
368}
369
370func tokenIsComma(t *token) bool {
371	return t != nil && t.typ == tokenComma
372}
373
374func (p *tomlParser) parseInlineTable() *Tree {
375	tree := newTree()
376	var previous *token
377Loop:
378	for {
379		follow := p.peek()
380		if follow == nil || follow.typ == tokenEOF {
381			p.raiseError(follow, "unterminated inline table")
382		}
383		switch follow.typ {
384		case tokenRightCurlyBrace:
385			p.getToken()
386			break Loop
387		case tokenKey, tokenInteger, tokenString:
388			if !tokenIsComma(previous) && previous != nil {
389				p.raiseError(follow, "comma expected between fields in inline table")
390			}
391			key := p.getToken()
392			p.assume(tokenEqual)
393
394			parsedKey, err := parseKey(key.val)
395			if err != nil {
396				p.raiseError(key, "invalid key: %s", err)
397			}
398
399			value := p.parseRvalue()
400			tree.SetPath(parsedKey, value)
401		case tokenComma:
402			if tokenIsComma(previous) {
403				p.raiseError(follow, "need field between two commas in inline table")
404			}
405			p.getToken()
406		default:
407			p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
408		}
409		previous = follow
410	}
411	if tokenIsComma(previous) {
412		p.raiseError(previous, "trailing comma at the end of inline table")
413	}
414	return tree
415}
416
417func (p *tomlParser) parseArray() interface{} {
418	var array []interface{}
419	arrayType := reflect.TypeOf(nil)
420	for {
421		follow := p.peek()
422		if follow == nil || follow.typ == tokenEOF {
423			p.raiseError(follow, "unterminated array")
424		}
425		if follow.typ == tokenRightBracket {
426			p.getToken()
427			break
428		}
429		val := p.parseRvalue()
430		if arrayType == nil {
431			arrayType = reflect.TypeOf(val)
432		}
433		if reflect.TypeOf(val) != arrayType {
434			p.raiseError(follow, "mixed types in array")
435		}
436		array = append(array, val)
437		follow = p.peek()
438		if follow == nil || follow.typ == tokenEOF {
439			p.raiseError(follow, "unterminated array")
440		}
441		if follow.typ != tokenRightBracket && follow.typ != tokenComma {
442			p.raiseError(follow, "missing comma")
443		}
444		if follow.typ == tokenComma {
445			p.getToken()
446		}
447	}
448	// An array of Trees is actually an array of inline
449	// tables, which is a shorthand for a table array. If the
450	// array was not converted from []interface{} to []*Tree,
451	// the two notations would not be equivalent.
452	if arrayType == reflect.TypeOf(newTree()) {
453		tomlArray := make([]*Tree, len(array))
454		for i, v := range array {
455			tomlArray[i] = v.(*Tree)
456		}
457		return tomlArray
458	}
459	return array
460}
461
462func parseToml(flow []token) *Tree {
463	result := newTree()
464	result.position = Position{1, 1}
465	parser := &tomlParser{
466		flowIdx:       0,
467		flow:          flow,
468		tree:          result,
469		currentTable:  make([]string, 0),
470		seenTableKeys: make([]string, 0),
471	}
472	parser.run()
473	return result
474}
475
476func init() {
477	numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
478	hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
479}
480