1// TOML Parser.
2
3package toml
4
5import (
6	"errors"
7	"fmt"
8	"reflect"
9	"regexp"
10	"strconv"
11	"strings"
12	"time"
13)
14
15type tomlParser struct {
16	flow          chan token
17	tree          *TomlTree
18	tokensBuffer  []token
19	currentTable  []string
20	seenTableKeys []string
21}
22
23type tomlParserStateFn func() tomlParserStateFn
24
25// Formats and panics an error message based on a token
26func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
27	panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
28}
29
30func (p *tomlParser) run() {
31	for state := p.parseStart; state != nil; {
32		state = state()
33	}
34}
35
36func (p *tomlParser) peek() *token {
37	if len(p.tokensBuffer) != 0 {
38		return &(p.tokensBuffer[0])
39	}
40
41	tok, ok := <-p.flow
42	if !ok {
43		return nil
44	}
45	p.tokensBuffer = append(p.tokensBuffer, tok)
46	return &tok
47}
48
49func (p *tomlParser) assume(typ tokenType) {
50	tok := p.getToken()
51	if tok == nil {
52		p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
53	}
54	if tok.typ != typ {
55		p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
56	}
57}
58
59func (p *tomlParser) getToken() *token {
60	if len(p.tokensBuffer) != 0 {
61		tok := p.tokensBuffer[0]
62		p.tokensBuffer = p.tokensBuffer[1:]
63		return &tok
64	}
65	tok, ok := <-p.flow
66	if !ok {
67		return nil
68	}
69	return &tok
70}
71
72func (p *tomlParser) parseStart() tomlParserStateFn {
73	tok := p.peek()
74
75	// end of stream, parsing is finished
76	if tok == nil {
77		return nil
78	}
79
80	switch tok.typ {
81	case tokenDoubleLeftBracket:
82		return p.parseGroupArray
83	case tokenLeftBracket:
84		return p.parseGroup
85	case tokenKey:
86		return p.parseAssign
87	case tokenEOF:
88		return nil
89	default:
90		p.raiseError(tok, "unexpected token")
91	}
92	return nil
93}
94
95func (p *tomlParser) parseGroupArray() tomlParserStateFn {
96	startToken := p.getToken() // discard the [[
97	key := p.getToken()
98	if key.typ != tokenKeyGroupArray {
99		p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
100	}
101
102	// get or create table array element at the indicated part in the path
103	keys, err := parseKey(key.val)
104	if err != nil {
105		p.raiseError(key, "invalid table array key: %s", err)
106	}
107	p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
108	destTree := p.tree.GetPath(keys)
109	var array []*TomlTree
110	if destTree == nil {
111		array = make([]*TomlTree, 0)
112	} else if target, ok := destTree.([]*TomlTree); ok && target != nil {
113		array = destTree.([]*TomlTree)
114	} else {
115		p.raiseError(key, "key %s is already assigned and not of type table array", key)
116	}
117	p.currentTable = keys
118
119	// add a new tree to the end of the table array
120	newTree := newTomlTree()
121	newTree.position = startToken.Position
122	array = append(array, newTree)
123	p.tree.SetPath(p.currentTable, array)
124
125	// remove all keys that were children of this table array
126	prefix := key.val + "."
127	found := false
128	for ii := 0; ii < len(p.seenTableKeys); {
129		tableKey := p.seenTableKeys[ii]
130		if strings.HasPrefix(tableKey, prefix) {
131			p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
132		} else {
133			found = (tableKey == key.val)
134			ii++
135		}
136	}
137
138	// keep this key name from use by other kinds of assignments
139	if !found {
140		p.seenTableKeys = append(p.seenTableKeys, key.val)
141	}
142
143	// move to next parser state
144	p.assume(tokenDoubleRightBracket)
145	return p.parseStart
146}
147
148func (p *tomlParser) parseGroup() tomlParserStateFn {
149	startToken := p.getToken() // discard the [
150	key := p.getToken()
151	if key.typ != tokenKeyGroup {
152		p.raiseError(key, "unexpected token %s, was expecting a table key", key)
153	}
154	for _, item := range p.seenTableKeys {
155		if item == key.val {
156			p.raiseError(key, "duplicated tables")
157		}
158	}
159
160	p.seenTableKeys = append(p.seenTableKeys, key.val)
161	keys, err := parseKey(key.val)
162	if err != nil {
163		p.raiseError(key, "invalid table array key: %s", err)
164	}
165	if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
166		p.raiseError(key, "%s", err)
167	}
168	p.assume(tokenRightBracket)
169	p.currentTable = keys
170	return p.parseStart
171}
172
173func (p *tomlParser) parseAssign() tomlParserStateFn {
174	key := p.getToken()
175	p.assume(tokenEqual)
176
177	value := p.parseRvalue()
178	var tableKey []string
179	if len(p.currentTable) > 0 {
180		tableKey = p.currentTable
181	} else {
182		tableKey = []string{}
183	}
184
185	// find the table to assign, looking out for arrays of tables
186	var targetNode *TomlTree
187	switch node := p.tree.GetPath(tableKey).(type) {
188	case []*TomlTree:
189		targetNode = node[len(node)-1]
190	case *TomlTree:
191		targetNode = node
192	default:
193		p.raiseError(key, "Unknown table type for path: %s",
194			strings.Join(tableKey, "."))
195	}
196
197	// assign value to the found table
198	keyVals, err := parseKey(key.val)
199	if err != nil {
200		p.raiseError(key, "%s", err)
201	}
202	if len(keyVals) != 1 {
203		p.raiseError(key, "Invalid key")
204	}
205	keyVal := keyVals[0]
206	localKey := []string{keyVal}
207	finalKey := append(tableKey, keyVal)
208	if targetNode.GetPath(localKey) != nil {
209		p.raiseError(key, "The following key was defined twice: %s",
210			strings.Join(finalKey, "."))
211	}
212	var toInsert interface{}
213
214	switch value.(type) {
215	case *TomlTree, []*TomlTree:
216		toInsert = value
217	default:
218		toInsert = &tomlValue{value, key.Position}
219	}
220	targetNode.values[keyVal] = toInsert
221	return p.parseStart
222}
223
224var numberUnderscoreInvalidRegexp *regexp.Regexp
225
226func cleanupNumberToken(value string) (string, error) {
227	if numberUnderscoreInvalidRegexp.MatchString(value) {
228		return "", errors.New("invalid use of _ in number")
229	}
230	cleanedVal := strings.Replace(value, "_", "", -1)
231	return cleanedVal, nil
232}
233
234func (p *tomlParser) parseRvalue() interface{} {
235	tok := p.getToken()
236	if tok == nil || tok.typ == tokenEOF {
237		p.raiseError(tok, "expecting a value")
238	}
239
240	switch tok.typ {
241	case tokenString:
242		return tok.val
243	case tokenTrue:
244		return true
245	case tokenFalse:
246		return false
247	case tokenInteger:
248		cleanedVal, err := cleanupNumberToken(tok.val)
249		if err != nil {
250			p.raiseError(tok, "%s", err)
251		}
252		val, err := strconv.ParseInt(cleanedVal, 10, 64)
253		if err != nil {
254			p.raiseError(tok, "%s", err)
255		}
256		return val
257	case tokenFloat:
258		cleanedVal, err := cleanupNumberToken(tok.val)
259		if err != nil {
260			p.raiseError(tok, "%s", err)
261		}
262		val, err := strconv.ParseFloat(cleanedVal, 64)
263		if err != nil {
264			p.raiseError(tok, "%s", err)
265		}
266		return val
267	case tokenDate:
268		val, err := time.ParseInLocation(time.RFC3339Nano, tok.val, time.UTC)
269		if err != nil {
270			p.raiseError(tok, "%s", err)
271		}
272		return val
273	case tokenLeftBracket:
274		return p.parseArray()
275	case tokenLeftCurlyBrace:
276		return p.parseInlineTable()
277	case tokenEqual:
278		p.raiseError(tok, "cannot have multiple equals for the same key")
279	case tokenError:
280		p.raiseError(tok, "%s", tok)
281	}
282
283	p.raiseError(tok, "never reached")
284
285	return nil
286}
287
288func tokenIsComma(t *token) bool {
289	return t != nil && t.typ == tokenComma
290}
291
292func (p *tomlParser) parseInlineTable() *TomlTree {
293	tree := newTomlTree()
294	var previous *token
295Loop:
296	for {
297		follow := p.peek()
298		if follow == nil || follow.typ == tokenEOF {
299			p.raiseError(follow, "unterminated inline table")
300		}
301		switch follow.typ {
302		case tokenRightCurlyBrace:
303			p.getToken()
304			break Loop
305		case tokenKey:
306			if !tokenIsComma(previous) && previous != nil {
307				p.raiseError(follow, "comma expected between fields in inline table")
308			}
309			key := p.getToken()
310			p.assume(tokenEqual)
311			value := p.parseRvalue()
312			tree.Set(key.val, value)
313		case tokenComma:
314			if previous == nil {
315				p.raiseError(follow, "inline table cannot start with a comma")
316			}
317			if tokenIsComma(previous) {
318				p.raiseError(follow, "need field between two commas in inline table")
319			}
320			p.getToken()
321		default:
322			p.raiseError(follow, "unexpected token type in inline table: %s", follow.typ.String())
323		}
324		previous = follow
325	}
326	if tokenIsComma(previous) {
327		p.raiseError(previous, "trailing comma at the end of inline table")
328	}
329	return tree
330}
331
332func (p *tomlParser) parseArray() interface{} {
333	var array []interface{}
334	arrayType := reflect.TypeOf(nil)
335	for {
336		follow := p.peek()
337		if follow == nil || follow.typ == tokenEOF {
338			p.raiseError(follow, "unterminated array")
339		}
340		if follow.typ == tokenRightBracket {
341			p.getToken()
342			break
343		}
344		val := p.parseRvalue()
345		if arrayType == nil {
346			arrayType = reflect.TypeOf(val)
347		}
348		if reflect.TypeOf(val) != arrayType {
349			p.raiseError(follow, "mixed types in array")
350		}
351		array = append(array, val)
352		follow = p.peek()
353		if follow == nil || follow.typ == tokenEOF {
354			p.raiseError(follow, "unterminated array")
355		}
356		if follow.typ != tokenRightBracket && follow.typ != tokenComma {
357			p.raiseError(follow, "missing comma")
358		}
359		if follow.typ == tokenComma {
360			p.getToken()
361		}
362	}
363	// An array of TomlTrees is actually an array of inline
364	// tables, which is a shorthand for a table array. If the
365	// array was not converted from []interface{} to []*TomlTree,
366	// the two notations would not be equivalent.
367	if arrayType == reflect.TypeOf(newTomlTree()) {
368		tomlArray := make([]*TomlTree, len(array))
369		for i, v := range array {
370			tomlArray[i] = v.(*TomlTree)
371		}
372		return tomlArray
373	}
374	return array
375}
376
377func parseToml(flow chan token) *TomlTree {
378	result := newTomlTree()
379	result.position = Position{1, 1}
380	parser := &tomlParser{
381		flow:          flow,
382		tree:          result,
383		tokensBuffer:  make([]token, 0),
384		currentTable:  make([]string, 0),
385		seenTableKeys: make([]string, 0),
386	}
387	parser.run()
388	return result
389}
390
391func init() {
392	numberUnderscoreInvalidRegexp = regexp.MustCompile(`([^\d]_|_[^\d]|_$|^_)`)
393}
394