1// TOML Parser.
2
3package toml
4
5import (
6	"errors"
7	"fmt"
8	"math"
9	"reflect"
10	"regexp"
11	"strconv"
12	"strings"
13	"time"
14)
15
16type tomlParser struct {
17	flowIdx       int
18	flow          []token
19	tree          *Tree
20	currentTable  []string
21	seenTableKeys []string
22}
23
24type tomlParserStateFn func() tomlParserStateFn
25
26// Formats and panics an error message based on a token
27func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
28	panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
29}
30
31func (p *tomlParser) run() {
32	for state := p.parseStart; state != nil; {
33		state = state()
34	}
35}
36
37func (p *tomlParser) peek() *token {
38	if p.flowIdx >= len(p.flow) {
39		return nil
40	}
41	return &p.flow[p.flowIdx]
42}
43
44func (p *tomlParser) assume(typ tokenType) {
45	tok := p.getToken()
46	if tok == nil {
47		p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
48	}
49	if tok.typ != typ {
50		p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
51	}
52}
53
54func (p *tomlParser) getToken() *token {
55	tok := p.peek()
56	if tok == nil {
57		return nil
58	}
59	p.flowIdx++
60	return tok
61}
62
63func (p *tomlParser) parseStart() tomlParserStateFn {
64	tok := p.peek()
65
66	// end of stream, parsing is finished
67	if tok == nil {
68		return nil
69	}
70
71	switch tok.typ {
72	case tokenDoubleLeftBracket:
73		return p.parseGroupArray
74	case tokenLeftBracket:
75		return p.parseGroup
76	case tokenKey:
77		return p.parseAssign
78	case tokenEOF:
79		return nil
80	default:
81		p.raiseError(tok, "unexpected token")
82	}
83	return nil
84}
85
86func (p *tomlParser) parseGroupArray() tomlParserStateFn {
87	startToken := p.getToken() // discard the [[
88	key := p.getToken()
89	if key.typ != tokenKeyGroupArray {
90		p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
91	}
92
93	// get or create table array element at the indicated part in the path
94	keys, err := parseKey(key.val)
95	if err != nil {
96		p.raiseError(key, "invalid table array key: %s", err)
97	}
98	p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
99	destTree := p.tree.GetPath(keys)
100	var array []*Tree
101	if destTree == nil {
102		array = make([]*Tree, 0)
103	} else if target, ok := destTree.([]*Tree); ok && target != nil {
104		array = destTree.([]*Tree)
105	} else {
106		p.raiseError(key, "key %s is already assigned and not of type table array", key)
107	}
108	p.currentTable = keys
109
110	// add a new tree to the end of the table array
111	newTree := newTree()
112	newTree.position = startToken.Position
113	array = append(array, newTree)
114	p.tree.SetPath(p.currentTable, array)
115
116	// remove all keys that were children of this table array
117	prefix := key.val + "."
118	found := false
119	for ii := 0; ii < len(p.seenTableKeys); {
120		tableKey := p.seenTableKeys[ii]
121		if strings.HasPrefix(tableKey, prefix) {
122			p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
123		} else {
124			found = (tableKey == key.val)
125			ii++
126		}
127	}
128
129	// keep this key name from use by other kinds of assignments
130	if !found {
131		p.seenTableKeys = append(p.seenTableKeys, key.val)
132	}
133
134	// move to next parser state
135	p.assume(tokenDoubleRightBracket)
136	return p.parseStart
137}
138
139func (p *tomlParser) parseGroup() tomlParserStateFn {
140	startToken := p.getToken() // discard the [
141	key := p.getToken()
142	if key.typ != tokenKeyGroup {
143		p.raiseError(key, "unexpected token %s, was expecting a table key", key)
144	}
145	for _, item := range p.seenTableKeys {
146		if item == key.val {
147			p.raiseError(key, "duplicated tables")
148		}
149	}
150
151	p.seenTableKeys = append(p.seenTableKeys, key.val)
152	keys, err := parseKey(key.val)
153	if err != nil {
154		p.raiseError(key, "invalid table array key: %s", err)
155	}
156	if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
157		p.raiseError(key, "%s", err)
158	}
159	p.assume(tokenRightBracket)
160	p.currentTable = keys
161	return p.parseStart
162}
163
164func (p *tomlParser) parseAssign() tomlParserStateFn {
165	key := p.getToken()
166	p.assume(tokenEqual)
167
168	value := p.parseRvalue()
169	var tableKey []string
170	if len(p.currentTable) > 0 {
171		tableKey = p.currentTable
172	} else {
173		tableKey = []string{}
174	}
175
176	// find the table to assign, looking out for arrays of tables
177	var targetNode *Tree
178	switch node := p.tree.GetPath(tableKey).(type) {
179	case []*Tree:
180		targetNode = node[len(node)-1]
181	case *Tree:
182		targetNode = node
183	default:
184		p.raiseError(key, "Unknown table type for path: %s",
185			strings.Join(tableKey, "."))
186	}
187
188	// assign value to the found table
189	keyVals := []string{key.val}
190	if len(keyVals) != 1 {
191		p.raiseError(key, "Invalid key")
192	}
193	keyVal := keyVals[0]
194	localKey := []string{keyVal}
195	finalKey := append(tableKey, keyVal)
196	if targetNode.GetPath(localKey) != nil {
197		p.raiseError(key, "The following key was defined twice: %s",
198			strings.Join(finalKey, "."))
199	}
200	var toInsert interface{}
201
202	switch value.(type) {
203	case *Tree, []*Tree:
204		toInsert = value
205	default:
206		toInsert = &tomlValue{value: value, position: key.Position}
207	}
208	targetNode.values[keyVal] = toInsert
209	return p.parseStart
210}
211
212var numberUnderscoreInvalidRegexp *regexp.Regexp
213var hexNumberUnderscoreInvalidRegexp *regexp.Regexp
214
215func numberContainsInvalidUnderscore(value string) error {
216	if numberUnderscoreInvalidRegexp.MatchString(value) {
217		return errors.New("invalid use of _ in number")
218	}
219	return nil
220}
221
222func hexNumberContainsInvalidUnderscore(value string) error {
223	if hexNumberUnderscoreInvalidRegexp.MatchString(value) {
224		return errors.New("invalid use of _ in hex number")
225	}
226	return nil
227}
228
229func cleanupNumberToken(value string) string {
230	cleanedVal := strings.Replace(value, "_", "", -1)
231	return cleanedVal
232}
233
234func (p *tomlParser) parseRvalue() interface{} {
235	tok := p.getToken()
236	if tok == nil || tok.typ == tokenEOF {
237		p.raiseError(tok, "expecting a value")
238	}
239
240	switch tok.typ {
241	case tokenString:
242		return tok.val
243	case tokenTrue:
244		return true
245	case tokenFalse:
246		return false
247	case tokenInf:
248		if tok.val[0] == '-' {
249			return math.Inf(-1)
250		}
251		return math.Inf(1)
252	case tokenNan:
253		return math.NaN()
254	case tokenInteger:
255		cleanedVal := cleanupNumberToken(tok.val)
256		var err error
257		var val int64
258		if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
259			switch cleanedVal[1] {
260			case 'x':
261				err = hexNumberContainsInvalidUnderscore(tok.val)
262				if err != nil {
263					p.raiseError(tok, "%s", err)
264				}
265				val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
266			case 'o':
267				err = numberContainsInvalidUnderscore(tok.val)
268				if err != nil {
269					p.raiseError(tok, "%s", err)
270				}
271				val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
272			case 'b':
273				err = numberContainsInvalidUnderscore(tok.val)
274				if err != nil {
275					p.raiseError(tok, "%s", err)
276				}
277				val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
278			default:
279				panic("invalid base") // the lexer should catch this first
280			}
281		} else {
282			err = numberContainsInvalidUnderscore(tok.val)
283			if err != nil {
284				p.raiseError(tok, "%s", err)
285			}
286			val, err = strconv.ParseInt(cleanedVal, 10, 64)
287		}
288		if err != nil {
289			p.raiseError(tok, "%s", err)
290		}
291		return val
292	case tokenFloat:
293		err := numberContainsInvalidUnderscore(tok.val)
294		if err != nil {
295			p.raiseError(tok, "%s", err)
296		}
297		cleanedVal := cleanupNumberToken(tok.val)
298		val, err := strconv.ParseFloat(cleanedVal, 64)
299		if err != nil {
300			p.raiseError(tok, "%s", err)
301		}
302		return val
303	case tokenDate:
304		val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC)
305		if err != nil {
306			p.raiseError(tok, "%s", err)
307		}
308		return val
309	case tokenLeftBracket:
310		return p.parseArray()
311	case tokenLeftCurlyBrace:
312		return p.parseInlineTable()
313	case tokenEqual:
314		p.raiseError(tok, "cannot have multiple equals for the same key")
315	case tokenError:
316		p.raiseError(tok, "%s", tok)
317	}
318
319	p.raiseError(tok, "never reached")
320
321	return nil
322}
323
324func tokenIsComma(t *token) bool {
325	return t != nil && t.typ == tokenComma
326}
327
328func (p *tomlParser) parseInlineTable() *Tree {
329	tree := newTree()
330	var previous *token
331Loop:
332	for {
333		follow := p.peek()
334		if follow == nil || follow.typ == tokenEOF {
335			p.raiseError(follow, "unterminated inline table")
336		}
337		switch follow.typ {
338		case tokenRightCurlyBrace:
339			p.getToken()
340			break Loop
341		case tokenKey:
342			if !tokenIsComma(previous) && previous != nil {
343				p.raiseError(follow, "comma expected between fields in inline table")
344			}
345			key := p.getToken()
346			p.assume(tokenEqual)
347			value := p.parseRvalue()
348			tree.Set(key.val, value)
349		case tokenComma:
350			if previous == nil {
351				p.raiseError(follow, "inline table cannot start with a comma")
352			}
353			if tokenIsComma(previous) {
354				p.raiseError(follow, "need field between two commas in inline table")
355			}
356			p.getToken()
357		default:
358			p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
359		}
360		previous = follow
361	}
362	if tokenIsComma(previous) {
363		p.raiseError(previous, "trailing comma at the end of inline table")
364	}
365	return tree
366}
367
368func (p *tomlParser) parseArray() interface{} {
369	var array []interface{}
370	arrayType := reflect.TypeOf(nil)
371	for {
372		follow := p.peek()
373		if follow == nil || follow.typ == tokenEOF {
374			p.raiseError(follow, "unterminated array")
375		}
376		if follow.typ == tokenRightBracket {
377			p.getToken()
378			break
379		}
380		val := p.parseRvalue()
381		if arrayType == nil {
382			arrayType = reflect.TypeOf(val)
383		}
384		if reflect.TypeOf(val) != arrayType {
385			p.raiseError(follow, "mixed types in array")
386		}
387		array = append(array, val)
388		follow = p.peek()
389		if follow == nil || follow.typ == tokenEOF {
390			p.raiseError(follow, "unterminated array")
391		}
392		if follow.typ != tokenRightBracket && follow.typ != tokenComma {
393			p.raiseError(follow, "missing comma")
394		}
395		if follow.typ == tokenComma {
396			p.getToken()
397		}
398	}
399	// An array of Trees is actually an array of inline
400	// tables, which is a shorthand for a table array. If the
401	// array was not converted from []interface{} to []*Tree,
402	// the two notations would not be equivalent.
403	if arrayType == reflect.TypeOf(newTree()) {
404		tomlArray := make([]*Tree, len(array))
405		for i, v := range array {
406			tomlArray[i] = v.(*Tree)
407		}
408		return tomlArray
409	}
410	return array
411}
412
413func parseToml(flow []token) *Tree {
414	result := newTree()
415	result.position = Position{1, 1}
416	parser := &tomlParser{
417		flowIdx:       0,
418		flow:          flow,
419		tree:          result,
420		currentTable:  make([]string, 0),
421		seenTableKeys: make([]string, 0),
422	}
423	parser.run()
424	return result
425}
426
427func init() {
428	numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d])|_$|^_`)
429	hexNumberUnderscoreInvalidRegexp = regexp.MustCompile(`(^0x_)|([^\da-f]_|_[^\da-f])|_$|^_`)
430}
431