1// Ungronning is the reverse of gronning: turn statements 2// back into JSON. The expected input grammar is: 3// 4// Input ::= '--'* Statement (Statement | '--')* 5// Statement ::= Path Space* "=" Space* Value ";" "\n" 6// Path ::= (BareWord) ("." BareWord | ("[" Key "]"))* 7// Value ::= String | Number | "true" | "false" | "null" | "[]" | "{}" 8// BareWord ::= (UnicodeLu | UnicodeLl | UnicodeLm | UnicodeLo | UnicodeNl | '$' | '_') (UnicodeLu | UnicodeLl | UnicodeLm | UnicodeLo | UnicodeNl | UnicodeMn | UnicodeMc | UnicodeNd | UnicodePc | '$' | '_')* 9// Key ::= [0-9]+ | String 10// String ::= '"' (UnescapedRune | ("\" (["\/bfnrt] | ('u' Hex))))* '"' 11// UnescapedRune ::= [^#x0-#x1f"\] 12 13package main 14 15import ( 16 "encoding/json" 17 "fmt" 18 "strconv" 19 "strings" 20 "unicode" 21 "unicode/utf8" 22 23 "github.com/pkg/errors" 24) 25 26// errRecoverable is an error type to represent errors that 27// can be recovered from; e.g. an empty line in the input 28type errRecoverable struct { 29 msg string 30} 31 32func (e errRecoverable) Error() string { 33 return e.msg 34} 35 36// A lexer holds the state for lexing statements 37type lexer struct { 38 text string // The raw input text 39 pos int // The current byte offset in the text 40 width int // The width of the current rune in bytes 41 cur rune // The rune at the current position 42 prev rune // The rune at the previous position 43 tokens []token // The tokens that have been emitted 44 tokenStart int // The starting position of the current token 45} 46 47// newLexer returns a new lexer for the provided input string 48func newLexer(text string) *lexer { 49 return &lexer{ 50 text: text, 51 pos: 0, 52 tokenStart: 0, 53 tokens: make([]token, 0), 54 } 55} 56 57// lex runs the lexer and returns the lexed statement 58func (l *lexer) lex() statement { 59 60 for lexfn := lexStatement; lexfn != nil; { 61 lexfn = lexfn(l) 62 } 63 return l.tokens 64} 65 66// next gets the next rune in the input and updates the lexer state 67func (l *lexer) next() rune { 68 r, w := utf8.DecodeRuneInString(l.text[l.pos:]) 69 70 l.pos += w 71 l.width = w 72 73 l.prev = l.cur 74 l.cur = r 75 76 return r 77} 78 79// backup moves the lexer back one rune 80// can only be used once per call of next() 81func (l *lexer) backup() { 82 l.pos -= l.width 83} 84 85// peek returns the next rune in the input 86// without moving the internal pointer 87func (l *lexer) peek() rune { 88 r := l.next() 89 l.backup() 90 return r 91} 92 93// ignore skips the current token 94func (l *lexer) ignore() { 95 l.tokenStart = l.pos 96} 97 98// emit adds the current token to the token slice and 99// moves the tokenStart pointer to the current position 100func (l *lexer) emit(typ tokenTyp) { 101 t := token{ 102 text: l.text[l.tokenStart:l.pos], 103 typ: typ, 104 } 105 l.tokenStart = l.pos 106 107 l.tokens = append(l.tokens, t) 108} 109 110// accept moves the pointer if the next rune is in 111// the set of valid runes 112func (l *lexer) accept(valid string) bool { 113 if strings.ContainsRune(valid, l.next()) { 114 return true 115 } 116 l.backup() 117 return false 118} 119 120// acceptRun continually accepts runes from the 121// set of valid runes 122func (l *lexer) acceptRun(valid string) { 123 for strings.ContainsRune(valid, l.next()) { 124 } 125 l.backup() 126} 127 128// a runeCheck is a function that determines if a rune is valid 129// or not so that we can do complex checks against runes 130type runeCheck func(rune) bool 131 132// acceptFunc accepts a rune if the provided runeCheck 133// function returns true 134func (l *lexer) acceptFunc(fn runeCheck) bool { 135 if fn(l.next()) { 136 return true 137 } 138 l.backup() 139 return false 140} 141 142// acceptRunFunc continually accepts runes for as long 143// as the runeCheck function returns true 144func (l *lexer) acceptRunFunc(fn runeCheck) { 145 for fn(l.next()) { 146 } 147 l.backup() 148} 149 150// acceptUntil accepts runes until it hits a delimiter 151// rune contained in the provided string 152func (l *lexer) acceptUntil(delims string) { 153 for !strings.ContainsRune(delims, l.next()) { 154 if l.cur == utf8.RuneError { 155 return 156 } 157 } 158 l.backup() 159} 160 161// acceptUntilUnescaped accepts runes until it hits a delimiter 162// rune contained in the provided string, unless that rune was 163// escaped with a backslash 164func (l *lexer) acceptUntilUnescaped(delims string) { 165 166 // Read until we hit an unescaped rune or the end of the input 167 inEscape := false 168 for { 169 r := l.next() 170 if r == '\\' && !inEscape { 171 inEscape = true 172 continue 173 } 174 if strings.ContainsRune(delims, r) && !inEscape { 175 l.backup() 176 return 177 } 178 if l.cur == utf8.RuneError { 179 return 180 } 181 inEscape = false 182 } 183} 184 185// a lexFn accepts a lexer, performs some action on it and 186// then returns an appropriate lexFn for the next stage 187type lexFn func(*lexer) lexFn 188 189// lexStatement is the highest level lexFn. Its only job 190// is to determine which more specific lexFn to use 191func lexStatement(l *lexer) lexFn { 192 r := l.peek() 193 194 switch { 195 case r == '.' || validFirstRune(r): 196 return lexBareWord 197 case r == '[': 198 return lexBraces 199 case r == ' ', r == '=': 200 return lexValue 201 case r == '-': 202 // grep -A etc can add '--' lines to output 203 // we'll save the text but not actually do 204 // anything with them 205 return lexIgnore 206 case r == utf8.RuneError: 207 return nil 208 default: 209 l.emit(typError) 210 return nil 211 } 212 213} 214 215// lexBareWord lexes for bare identifiers. 216// E.g: the 'foo' in 'foo.bar' or 'foo[0]' is a bare identifier 217func lexBareWord(l *lexer) lexFn { 218 if l.accept(".") { 219 l.emit(typDot) 220 } 221 222 if !l.acceptFunc(validFirstRune) { 223 l.emit(typError) 224 return nil 225 } 226 l.acceptRunFunc(validSecondaryRune) 227 l.emit(typBare) 228 229 return lexStatement 230} 231 232// lexBraces lexes keys contained within square braces 233func lexBraces(l *lexer) lexFn { 234 l.accept("[") 235 l.emit(typLBrace) 236 237 switch { 238 case unicode.IsNumber(l.peek()): 239 return lexNumericKey 240 case l.peek() == '"': 241 return lexQuotedKey 242 default: 243 l.emit(typError) 244 return nil 245 } 246} 247 248// lexNumericKey lexes numeric keys between square braces 249func lexNumericKey(l *lexer) lexFn { 250 l.accept("[") 251 l.ignore() 252 253 l.acceptRunFunc(unicode.IsNumber) 254 l.emit(typNumericKey) 255 256 if l.accept("]") { 257 l.emit(typRBrace) 258 } else { 259 l.emit(typError) 260 return nil 261 } 262 l.ignore() 263 return lexStatement 264} 265 266// lexQuotedKey lexes quoted keys between square braces 267func lexQuotedKey(l *lexer) lexFn { 268 l.accept("[") 269 l.ignore() 270 271 l.accept(`"`) 272 273 l.acceptUntilUnescaped(`"`) 274 l.accept(`"`) 275 l.emit(typQuotedKey) 276 277 if l.accept("]") { 278 l.emit(typRBrace) 279 } else { 280 l.emit(typError) 281 return nil 282 } 283 l.ignore() 284 return lexStatement 285} 286 287// lexValue lexes a value at the end of a statement 288func lexValue(l *lexer) lexFn { 289 l.acceptRun(" ") 290 l.ignore() 291 292 if l.accept("=") { 293 l.emit(typEquals) 294 } else { 295 return nil 296 } 297 l.acceptRun(" ") 298 l.ignore() 299 300 switch { 301 302 case l.accept(`"`): 303 l.acceptUntilUnescaped(`"`) 304 l.accept(`"`) 305 l.emit(typString) 306 307 case l.accept("t"): 308 l.acceptRun("rue") 309 l.emit(typTrue) 310 311 case l.accept("f"): 312 l.acceptRun("alse") 313 l.emit(typFalse) 314 315 case l.accept("n"): 316 l.acceptRun("ul") 317 l.emit(typNull) 318 319 case l.accept("["): 320 l.accept("]") 321 l.emit(typEmptyArray) 322 323 case l.accept("{"): 324 l.accept("}") 325 l.emit(typEmptyObject) 326 327 default: 328 // Assume number 329 l.acceptUntil(";") 330 l.emit(typNumber) 331 } 332 333 l.acceptRun(" ") 334 l.ignore() 335 336 if l.accept(";") { 337 l.emit(typSemi) 338 } 339 340 // The value should always be the last thing 341 // in the statement 342 return nil 343} 344 345// lexIgnore accepts runes until the end of the input 346// and emits them as a typIgnored token 347func lexIgnore(l *lexer) lexFn { 348 l.acceptRunFunc(func(r rune) bool { 349 return r != utf8.RuneError 350 }) 351 l.emit(typIgnored) 352 return nil 353} 354 355// ungronTokens turns a slice of tokens into an actual datastructure 356func ungronTokens(ts []token) (interface{}, error) { 357 if len(ts) == 0 { 358 return nil, errRecoverable{"empty input"} 359 } 360 361 if ts[0].typ == typIgnored { 362 return nil, errRecoverable{"ignored token"} 363 } 364 365 if ts[len(ts)-1].typ == typError { 366 return nil, errors.New("invalid statement") 367 } 368 369 // The last token should be typSemi so we need to check 370 // the second to last token is a value rather than the 371 // last one 372 if len(ts) > 1 && !ts[len(ts)-2].isValue() { 373 return nil, errors.New("statement has no value") 374 } 375 376 t := ts[0] 377 switch { 378 case t.isPunct(): 379 // Skip the token 380 val, err := ungronTokens(ts[1:]) 381 if err != nil { 382 return nil, err 383 } 384 return val, nil 385 386 case t.isValue(): 387 var val interface{} 388 d := json.NewDecoder(strings.NewReader(t.text)) 389 d.UseNumber() 390 err := d.Decode(&val) 391 if err != nil { 392 return nil, fmt.Errorf("invalid value `%s`", t.text) 393 } 394 return val, nil 395 396 case t.typ == typBare: 397 val, err := ungronTokens(ts[1:]) 398 if err != nil { 399 return nil, err 400 } 401 out := make(map[string]interface{}) 402 out[t.text] = val 403 return out, nil 404 405 case t.typ == typQuotedKey: 406 val, err := ungronTokens(ts[1:]) 407 if err != nil { 408 return nil, err 409 } 410 key := "" 411 err = json.Unmarshal([]byte(t.text), &key) 412 if err != nil { 413 return nil, fmt.Errorf("invalid quoted key `%s`", t.text) 414 } 415 416 out := make(map[string]interface{}) 417 out[key] = val 418 return out, nil 419 420 case t.typ == typNumericKey: 421 key, err := strconv.Atoi(t.text) 422 if err != nil { 423 return nil, fmt.Errorf("invalid integer key `%s`", t.text) 424 } 425 426 val, err := ungronTokens(ts[1:]) 427 if err != nil { 428 return nil, err 429 } 430 431 // There needs to be at least key + 1 space in the array 432 out := make([]interface{}, key+1) 433 out[key] = val 434 return out, nil 435 436 default: 437 return nil, fmt.Errorf("unexpected token `%s`", t.text) 438 } 439} 440 441// recursiveMerge merges maps and slices, or returns b for scalars 442func recursiveMerge(a, b interface{}) (interface{}, error) { 443 switch a.(type) { 444 445 case map[string]interface{}: 446 bMap, ok := b.(map[string]interface{}) 447 if !ok { 448 return nil, fmt.Errorf("cannot merge object with non-object") 449 } 450 return recursiveMapMerge(a.(map[string]interface{}), bMap) 451 452 case []interface{}: 453 bSlice, ok := b.([]interface{}) 454 if !ok { 455 return nil, fmt.Errorf("cannot merge array with non-array") 456 } 457 return recursiveSliceMerge(a.([]interface{}), bSlice) 458 459 case string, int, float64, bool, nil: 460 // Can't merge them, second one wins 461 return b, nil 462 463 default: 464 return nil, fmt.Errorf("unexpected data type for merge") 465 } 466} 467 468// recursiveMapMerge recursively merges map[string]interface{} values 469func recursiveMapMerge(a, b map[string]interface{}) (map[string]interface{}, error) { 470 // Merge keys from b into a 471 for k, v := range b { 472 _, exists := a[k] 473 if !exists { 474 // Doesn't exist in a, just add it in 475 a[k] = v 476 } else { 477 // Does exist, merge the values 478 merged, err := recursiveMerge(a[k], b[k]) 479 if err != nil { 480 return nil, err 481 } 482 483 a[k] = merged 484 } 485 } 486 return a, nil 487} 488 489// recursiveSliceMerge recursively merged []interface{} values 490func recursiveSliceMerge(a, b []interface{}) ([]interface{}, error) { 491 // We need a new slice with the capacity of whichever 492 // slive is biggest 493 outLen := len(a) 494 if len(b) > outLen { 495 outLen = len(b) 496 } 497 out := make([]interface{}, outLen) 498 499 // Copy the values from 'a' into the output slice 500 copy(out, a) 501 502 // Add the values from 'b'; merging existing keys 503 for k, v := range b { 504 if out[k] == nil { 505 out[k] = v 506 } else if v != nil { 507 merged, err := recursiveMerge(out[k], b[k]) 508 if err != nil { 509 return nil, err 510 } 511 out[k] = merged 512 } 513 } 514 return out, nil 515} 516