1// Ungronning is the reverse of gronning: turn statements
2// back into JSON. The expected input grammar is:
3//
4//   Input ::= '--'* Statement (Statement | '--')*
5//   Statement ::= Path Space* "=" Space* Value ";" "\n"
6//   Path ::= (BareWord) ("." BareWord | ("[" Key "]"))*
7//   Value ::= String | Number | "true" | "false" | "null" | "[]" | "{}"
8//   BareWord ::= (UnicodeLu | UnicodeLl | UnicodeLm | UnicodeLo | UnicodeNl | '$' | '_') (UnicodeLu | UnicodeLl | UnicodeLm | UnicodeLo | UnicodeNl | UnicodeMn | UnicodeMc | UnicodeNd | UnicodePc | '$' | '_')*
9//   Key ::= [0-9]+ | String
10//   String ::= '"' (UnescapedRune | ("\" (["\/bfnrt] | ('u' Hex))))* '"'
11//   UnescapedRune ::= [^#x0-#x1f"\]
12
13package main
14
15import (
16	"encoding/json"
17	"fmt"
18	"strconv"
19	"strings"
20	"unicode"
21	"unicode/utf8"
22
23	"github.com/pkg/errors"
24)
25
26// errRecoverable is an error type to represent errors that
27// can be recovered from; e.g. an empty line in the input
28type errRecoverable struct {
29	msg string
30}
31
32func (e errRecoverable) Error() string {
33	return e.msg
34}
35
36// A lexer holds the state for lexing statements
37type lexer struct {
38	text       string  // The raw input text
39	pos        int     // The current byte offset in the text
40	width      int     // The width of the current rune in bytes
41	cur        rune    // The rune at the current position
42	prev       rune    // The rune at the previous position
43	tokens     []token // The tokens that have been emitted
44	tokenStart int     // The starting position of the current token
45}
46
47// newLexer returns a new lexer for the provided input string
48func newLexer(text string) *lexer {
49	return &lexer{
50		text:       text,
51		pos:        0,
52		tokenStart: 0,
53		tokens:     make([]token, 0),
54	}
55}
56
57// lex runs the lexer and returns the lexed statement
58func (l *lexer) lex() statement {
59
60	for lexfn := lexStatement; lexfn != nil; {
61		lexfn = lexfn(l)
62	}
63	return l.tokens
64}
65
66// next gets the next rune in the input and updates the lexer state
67func (l *lexer) next() rune {
68	r, w := utf8.DecodeRuneInString(l.text[l.pos:])
69
70	l.pos += w
71	l.width = w
72
73	l.prev = l.cur
74	l.cur = r
75
76	return r
77}
78
79// backup moves the lexer back one rune
80// can only be used once per call of next()
81func (l *lexer) backup() {
82	l.pos -= l.width
83}
84
85// peek returns the next rune in the input
86// without moving the internal pointer
87func (l *lexer) peek() rune {
88	r := l.next()
89	l.backup()
90	return r
91}
92
93// ignore skips the current token
94func (l *lexer) ignore() {
95	l.tokenStart = l.pos
96}
97
98// emit adds the current token to the token slice and
99// moves the tokenStart pointer to the current position
100func (l *lexer) emit(typ tokenTyp) {
101	t := token{
102		text: l.text[l.tokenStart:l.pos],
103		typ:  typ,
104	}
105	l.tokenStart = l.pos
106
107	l.tokens = append(l.tokens, t)
108}
109
110// accept moves the pointer if the next rune is in
111// the set of valid runes
112func (l *lexer) accept(valid string) bool {
113	if strings.ContainsRune(valid, l.next()) {
114		return true
115	}
116	l.backup()
117	return false
118}
119
120// acceptRun continually accepts runes from the
121// set of valid runes
122func (l *lexer) acceptRun(valid string) {
123	for strings.ContainsRune(valid, l.next()) {
124	}
125	l.backup()
126}
127
128// a runeCheck is a function that determines if a rune is valid
129// or not so that we can do complex checks against runes
130type runeCheck func(rune) bool
131
132// acceptFunc accepts a rune if the provided runeCheck
133// function returns true
134func (l *lexer) acceptFunc(fn runeCheck) bool {
135	if fn(l.next()) {
136		return true
137	}
138	l.backup()
139	return false
140}
141
142// acceptRunFunc continually accepts runes for as long
143// as the runeCheck function returns true
144func (l *lexer) acceptRunFunc(fn runeCheck) {
145	for fn(l.next()) {
146	}
147	l.backup()
148}
149
150// acceptUntil accepts runes until it hits a delimiter
151// rune contained in the provided string
152func (l *lexer) acceptUntil(delims string) {
153	for !strings.ContainsRune(delims, l.next()) {
154		if l.cur == utf8.RuneError {
155			return
156		}
157	}
158	l.backup()
159}
160
161// acceptUntilUnescaped accepts runes until it hits a delimiter
162// rune contained in the provided string, unless that rune was
163// escaped with a backslash
164func (l *lexer) acceptUntilUnescaped(delims string) {
165
166	// Read until we hit an unescaped rune or the end of the input
167	inEscape := false
168	for {
169		r := l.next()
170		if r == '\\' && !inEscape {
171			inEscape = true
172			continue
173		}
174		if strings.ContainsRune(delims, r) && !inEscape {
175			l.backup()
176			return
177		}
178		if l.cur == utf8.RuneError {
179			return
180		}
181		inEscape = false
182	}
183}
184
185// a lexFn accepts a lexer, performs some action on it and
186// then returns an appropriate lexFn for the next stage
187type lexFn func(*lexer) lexFn
188
189// lexStatement is the highest level lexFn. Its only job
190// is to determine which more specific lexFn to use
191func lexStatement(l *lexer) lexFn {
192	r := l.peek()
193
194	switch {
195	case r == '.' || validFirstRune(r):
196		return lexBareWord
197	case r == '[':
198		return lexBraces
199	case r == ' ', r == '=':
200		return lexValue
201	case r == '-':
202		// grep -A etc can add '--' lines to output
203		// we'll save the text but not actually do
204		// anything with them
205		return lexIgnore
206	case r == utf8.RuneError:
207		return nil
208	default:
209		l.emit(typError)
210		return nil
211	}
212
213}
214
215// lexBareWord lexes for bare identifiers.
216// E.g: the 'foo' in 'foo.bar' or 'foo[0]' is a bare identifier
217func lexBareWord(l *lexer) lexFn {
218	if l.accept(".") {
219		l.emit(typDot)
220	}
221
222	if !l.acceptFunc(validFirstRune) {
223		l.emit(typError)
224		return nil
225	}
226	l.acceptRunFunc(validSecondaryRune)
227	l.emit(typBare)
228
229	return lexStatement
230}
231
232// lexBraces lexes keys contained within square braces
233func lexBraces(l *lexer) lexFn {
234	l.accept("[")
235	l.emit(typLBrace)
236
237	switch {
238	case unicode.IsNumber(l.peek()):
239		return lexNumericKey
240	case l.peek() == '"':
241		return lexQuotedKey
242	default:
243		l.emit(typError)
244		return nil
245	}
246}
247
248// lexNumericKey lexes numeric keys between square braces
249func lexNumericKey(l *lexer) lexFn {
250	l.accept("[")
251	l.ignore()
252
253	l.acceptRunFunc(unicode.IsNumber)
254	l.emit(typNumericKey)
255
256	if l.accept("]") {
257		l.emit(typRBrace)
258	} else {
259		l.emit(typError)
260		return nil
261	}
262	l.ignore()
263	return lexStatement
264}
265
266// lexQuotedKey lexes quoted keys between square braces
267func lexQuotedKey(l *lexer) lexFn {
268	l.accept("[")
269	l.ignore()
270
271	l.accept(`"`)
272
273	l.acceptUntilUnescaped(`"`)
274	l.accept(`"`)
275	l.emit(typQuotedKey)
276
277	if l.accept("]") {
278		l.emit(typRBrace)
279	} else {
280		l.emit(typError)
281		return nil
282	}
283	l.ignore()
284	return lexStatement
285}
286
287// lexValue lexes a value at the end of a statement
288func lexValue(l *lexer) lexFn {
289	l.acceptRun(" ")
290	l.ignore()
291
292	if l.accept("=") {
293		l.emit(typEquals)
294	} else {
295		return nil
296	}
297	l.acceptRun(" ")
298	l.ignore()
299
300	switch {
301
302	case l.accept(`"`):
303		l.acceptUntilUnescaped(`"`)
304		l.accept(`"`)
305		l.emit(typString)
306
307	case l.accept("t"):
308		l.acceptRun("rue")
309		l.emit(typTrue)
310
311	case l.accept("f"):
312		l.acceptRun("alse")
313		l.emit(typFalse)
314
315	case l.accept("n"):
316		l.acceptRun("ul")
317		l.emit(typNull)
318
319	case l.accept("["):
320		l.accept("]")
321		l.emit(typEmptyArray)
322
323	case l.accept("{"):
324		l.accept("}")
325		l.emit(typEmptyObject)
326
327	default:
328		// Assume number
329		l.acceptUntil(";")
330		l.emit(typNumber)
331	}
332
333	l.acceptRun(" ")
334	l.ignore()
335
336	if l.accept(";") {
337		l.emit(typSemi)
338	}
339
340	// The value should always be the last thing
341	// in the statement
342	return nil
343}
344
345// lexIgnore accepts runes until the end of the input
346// and emits them as a typIgnored token
347func lexIgnore(l *lexer) lexFn {
348	l.acceptRunFunc(func(r rune) bool {
349		return r != utf8.RuneError
350	})
351	l.emit(typIgnored)
352	return nil
353}
354
355// ungronTokens turns a slice of tokens into an actual datastructure
356func ungronTokens(ts []token) (interface{}, error) {
357	if len(ts) == 0 {
358		return nil, errRecoverable{"empty input"}
359	}
360
361	if ts[0].typ == typIgnored {
362		return nil, errRecoverable{"ignored token"}
363	}
364
365	if ts[len(ts)-1].typ == typError {
366		return nil, errors.New("invalid statement")
367	}
368
369	// The last token should be typSemi so we need to check
370	// the second to last token is a value rather than the
371	// last one
372	if len(ts) > 1 && !ts[len(ts)-2].isValue() {
373		return nil, errors.New("statement has no value")
374	}
375
376	t := ts[0]
377	switch {
378	case t.isPunct():
379		// Skip the token
380		val, err := ungronTokens(ts[1:])
381		if err != nil {
382			return nil, err
383		}
384		return val, nil
385
386	case t.isValue():
387		var val interface{}
388		d := json.NewDecoder(strings.NewReader(t.text))
389		d.UseNumber()
390		err := d.Decode(&val)
391		if err != nil {
392			return nil, fmt.Errorf("invalid value `%s`", t.text)
393		}
394		return val, nil
395
396	case t.typ == typBare:
397		val, err := ungronTokens(ts[1:])
398		if err != nil {
399			return nil, err
400		}
401		out := make(map[string]interface{})
402		out[t.text] = val
403		return out, nil
404
405	case t.typ == typQuotedKey:
406		val, err := ungronTokens(ts[1:])
407		if err != nil {
408			return nil, err
409		}
410		key := ""
411		err = json.Unmarshal([]byte(t.text), &key)
412		if err != nil {
413			return nil, fmt.Errorf("invalid quoted key `%s`", t.text)
414		}
415
416		out := make(map[string]interface{})
417		out[key] = val
418		return out, nil
419
420	case t.typ == typNumericKey:
421		key, err := strconv.Atoi(t.text)
422		if err != nil {
423			return nil, fmt.Errorf("invalid integer key `%s`", t.text)
424		}
425
426		val, err := ungronTokens(ts[1:])
427		if err != nil {
428			return nil, err
429		}
430
431		// There needs to be at least key + 1 space in the array
432		out := make([]interface{}, key+1)
433		out[key] = val
434		return out, nil
435
436	default:
437		return nil, fmt.Errorf("unexpected token `%s`", t.text)
438	}
439}
440
441// recursiveMerge merges maps and slices, or returns b for scalars
442func recursiveMerge(a, b interface{}) (interface{}, error) {
443	switch a.(type) {
444
445	case map[string]interface{}:
446		bMap, ok := b.(map[string]interface{})
447		if !ok {
448			return nil, fmt.Errorf("cannot merge object with non-object")
449		}
450		return recursiveMapMerge(a.(map[string]interface{}), bMap)
451
452	case []interface{}:
453		bSlice, ok := b.([]interface{})
454		if !ok {
455			return nil, fmt.Errorf("cannot merge array with non-array")
456		}
457		return recursiveSliceMerge(a.([]interface{}), bSlice)
458
459	case string, int, float64, bool, nil:
460		// Can't merge them, second one wins
461		return b, nil
462
463	default:
464		return nil, fmt.Errorf("unexpected data type for merge")
465	}
466}
467
468// recursiveMapMerge recursively merges map[string]interface{} values
469func recursiveMapMerge(a, b map[string]interface{}) (map[string]interface{}, error) {
470	// Merge keys from b into a
471	for k, v := range b {
472		_, exists := a[k]
473		if !exists {
474			// Doesn't exist in a, just add it in
475			a[k] = v
476		} else {
477			// Does exist, merge the values
478			merged, err := recursiveMerge(a[k], b[k])
479			if err != nil {
480				return nil, err
481			}
482
483			a[k] = merged
484		}
485	}
486	return a, nil
487}
488
489// recursiveSliceMerge recursively merged []interface{} values
490func recursiveSliceMerge(a, b []interface{}) ([]interface{}, error) {
491	// We need a new slice with the capacity of whichever
492	// slive is biggest
493	outLen := len(a)
494	if len(b) > outLen {
495		outLen = len(b)
496	}
497	out := make([]interface{}, outLen)
498
499	// Copy the values from 'a' into the output slice
500	copy(out, a)
501
502	// Add the values from 'b'; merging existing keys
503	for k, v := range b {
504		if out[k] == nil {
505			out[k] = v
506		} else if v != nil {
507			merged, err := recursiveMerge(out[k], b[k])
508			if err != nil {
509				return nil, err
510			}
511			out[k] = merged
512		}
513	}
514	return out, nil
515}
516