1// Copyright 2012-2014 Charles Banning. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file 4 5// keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters. 6 7package mxj 8 9import ( 10 "errors" 11 "fmt" 12 "strconv" 13 "strings" 14) 15 16// ----------------------------- get everything FOR a single key ------------------------- 17 18const ( 19 minArraySize = 32 20) 21 22var defaultArraySize int = minArraySize 23 24// Adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath(). 25// This can have the effect of significantly reducing memory allocation-copy functions for large data sets. 26// Returns the initial buffer size. 27func SetArraySize(size int) int { 28 if size > minArraySize { 29 defaultArraySize = size 30 } else { 31 defaultArraySize = minArraySize 32 } 33 return defaultArraySize 34} 35 36// Return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match. 37// On error, the returned slice is 'nil'. NOTE: 'key' can be wildcard, "*". 38// 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list. 39// - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them. 40// - For attributes prefix the label with a hyphen, '-', e.g., "-seq:3". 41// - If the 'key' refers to a list, then "key:value" could select a list member of the list. 42// - The subkey can be wildcarded - "key:*" - to require that it's there with some value. 43// - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an 44// exclusion critera - e.g., "!author:William T. Gaddis". 45// - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|". 46func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) { 47 m := map[string]interface{}(mv) 48 var subKeyMap map[string]interface{} 49 if len(subkeys) > 0 { 50 var err error 51 subKeyMap, err = getSubKeyMap(subkeys...) 52 if err != nil { 53 return nil, err 54 } 55 } 56 57 ret := make([]interface{}, 0, defaultArraySize) 58 var cnt int 59 hasKey(m, key, &ret, &cnt, subKeyMap) 60 return ret[:cnt], nil 61} 62 63var KeyNotExistError = errors.New("Key does not exist") 64 65// ValueForKey is a wrapper on ValuesForKey. It returns the first member of []interface{}, if any. 66// If there is no value, "nil, nil" is returned. 67func (mv Map) ValueForKey(key string, subkeys ...string) (interface{}, error) { 68 vals, err := mv.ValuesForKey(key, subkeys...) 69 if err != nil { 70 return nil, err 71 } 72 if len(vals) == 0 { 73 return nil, KeyNotExistError 74 } 75 return vals[0], nil 76} 77 78// hasKey - if the map 'key' exists append it to array 79// if it doesn't do nothing except scan array and map values 80func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) { 81 // func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) { 82 switch iv.(type) { 83 case map[string]interface{}: 84 vv := iv.(map[string]interface{}) 85 // see if the current value is of interest 86 if v, ok := vv[key]; ok { 87 switch v.(type) { 88 case map[string]interface{}: 89 if hasSubKeys(v, subkeys) { 90 *ret = append(*ret, v) 91 *cnt++ 92 } 93 case []interface{}: 94 for _, av := range v.([]interface{}) { 95 if hasSubKeys(av, subkeys) { 96 *ret = append(*ret, av) 97 *cnt++ 98 } 99 } 100 default: 101 if len(subkeys) == 0 { 102 *ret = append(*ret, v) 103 *cnt++ 104 } 105 } 106 } 107 108 // wildcard case 109 if key == "*" { 110 for _, v := range vv { 111 switch v.(type) { 112 case map[string]interface{}: 113 if hasSubKeys(v, subkeys) { 114 *ret = append(*ret, v) 115 *cnt++ 116 } 117 case []interface{}: 118 for _, av := range v.([]interface{}) { 119 if hasSubKeys(av, subkeys) { 120 *ret = append(*ret, av) 121 *cnt++ 122 } 123 } 124 default: 125 if len(subkeys) == 0 { 126 *ret = append(*ret, v) 127 *cnt++ 128 } 129 } 130 } 131 } 132 133 // scan the rest 134 for _, v := range vv { 135 hasKey(v, key, ret, cnt, subkeys) 136 } 137 case []interface{}: 138 for _, v := range iv.([]interface{}) { 139 hasKey(v, key, ret, cnt, subkeys) 140 } 141 } 142} 143 144// ----------------------- get everything for a node in the Map --------------------------- 145 146// Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.) 147// 2014.04.28 - implementation note. 148// Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion 149// of wildcards and unindexed arrays. Embedding such logic into valuesForKeyPath() would have made the 150// code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead. 151 152// Retrieve all values for a path from the Map. If len(returned_values) == 0, then no match. 153// On error, the returned array is 'nil'. 154// 'path' is a dot-separated path of key values. 155// - If a node in the path is '*', then everything beyond is walked. 156// - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" - 157// even "*[2].*[0].field". 158// 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list. 159// - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them. 160// - For attributes prefix the label with a hyphen, '-', e.g., "-seq:3". 161// - If the 'path' refers to a list, then "tag:value" would return member of the list. 162// - The subkey can be wildcarded - "key:*" - to require that it's there with some value. 163// - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an 164// exclusion critera - e.g., "!author:William T. Gaddis". 165// - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|". 166func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) { 167 // If there are no array indexes in path, use legacy ValuesForPath() logic. 168 if strings.Index(path, "[") < 0 { 169 return mv.oldValuesForPath(path, subkeys...) 170 } 171 172 var subKeyMap map[string]interface{} 173 if len(subkeys) > 0 { 174 var err error 175 subKeyMap, err = getSubKeyMap(subkeys...) 176 if err != nil { 177 return nil, err 178 } 179 } 180 181 keys, kerr := parsePath(path) 182 if kerr != nil { 183 return nil, kerr 184 } 185 186 vals, verr := valuesForArray(keys, mv) 187 if verr != nil { 188 return nil, verr // Vals may be nil, but return empty array. 189 } 190 191 // Need to handle subkeys ... only return members of vals that satisfy conditions. 192 retvals := make([]interface{}, 0) 193 for _, v := range vals { 194 if hasSubKeys(v, subKeyMap) { 195 retvals = append(retvals, v) 196 } 197 } 198 return retvals, nil 199} 200 201func valuesForArray(keys []*key, m Map) ([]interface{}, error) { 202 var tmppath string 203 var haveFirst bool 204 var vals []interface{} 205 var verr error 206 207 lastkey := len(keys) - 1 208 for i := 0; i <= lastkey; i++ { 209 if !haveFirst { 210 tmppath = keys[i].name 211 haveFirst = true 212 } else { 213 tmppath += "." + keys[i].name 214 } 215 216 // Look-ahead: explode wildcards and unindexed arrays. 217 // Need to handle un-indexed list recursively: 218 // e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]". 219 // Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ... 220 if !keys[i].isArray && i < lastkey && keys[i+1].isArray { 221 // Can't pass subkeys because we may not be at literal end of path. 222 vv, vverr := m.oldValuesForPath(tmppath) 223 if vverr != nil { 224 return nil, vverr 225 } 226 for _, v := range vv { 227 // See if we can walk the value. 228 am, ok := v.(map[string]interface{}) 229 if !ok { 230 continue 231 } 232 // Work the backend. 233 nvals, nvalserr := valuesForArray(keys[i+1:], Map(am)) 234 if nvalserr != nil { 235 return nil, nvalserr 236 } 237 vals = append(vals, nvals...) 238 } 239 break // have recursed the whole path - return 240 } 241 242 if keys[i].isArray || i == lastkey { 243 // Don't pass subkeys because may not be at literal end of path. 244 vals, verr = m.oldValuesForPath(tmppath) 245 } else { 246 continue 247 } 248 if verr != nil { 249 return nil, verr 250 } 251 252 if i == lastkey && !keys[i].isArray { 253 break 254 } 255 256 // Now we're looking at an array - supposedly. 257 // Is index in range of vals? 258 if len(vals) <= keys[i].position { 259 vals = nil 260 break 261 } 262 263 // Return the array member of interest, if at end of path. 264 if i == lastkey { 265 vals = vals[keys[i].position:(keys[i].position + 1)] 266 break 267 } 268 269 // Extract the array member of interest. 270 am := vals[keys[i].position:(keys[i].position + 1)] 271 272 // must be a map[string]interface{} value so we can keep walking the path 273 amm, ok := am[0].(map[string]interface{}) 274 if !ok { 275 vals = nil 276 break 277 } 278 279 m = Map(amm) 280 haveFirst = false 281 } 282 283 return vals, nil 284} 285 286type key struct { 287 name string 288 isArray bool 289 position int 290} 291 292func parsePath(s string) ([]*key, error) { 293 keys := strings.Split(s, ".") 294 295 ret := make([]*key, 0) 296 297 for i := 0; i < len(keys); i++ { 298 if keys[i] == "" { 299 continue 300 } 301 302 newkey := new(key) 303 if strings.Index(keys[i], "[") < 0 { 304 newkey.name = keys[i] 305 ret = append(ret, newkey) 306 continue 307 } 308 309 p := strings.Split(keys[i], "[") 310 newkey.name = p[0] 311 p = strings.Split(p[1], "]") 312 if p[0] == "" { // no right bracket 313 return nil, fmt.Errorf("no right bracket on key index: %s", keys[i]) 314 } 315 // convert p[0] to a int value 316 pos, nerr := strconv.ParseInt(p[0], 10, 32) 317 if nerr != nil { 318 return nil, fmt.Errorf("cannot convert index to int value: %s", p[0]) 319 } 320 newkey.position = int(pos) 321 newkey.isArray = true 322 ret = append(ret, newkey) 323 } 324 325 return ret, nil 326} 327 328// legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'. 329func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) { 330 m := map[string]interface{}(mv) 331 var subKeyMap map[string]interface{} 332 if len(subkeys) > 0 { 333 var err error 334 subKeyMap, err = getSubKeyMap(subkeys...) 335 if err != nil { 336 return nil, err 337 } 338 } 339 340 keys := strings.Split(path, ".") 341 if keys[len(keys)-1] == "" { 342 keys = keys[:len(keys)-1] 343 } 344 ivals := make([]interface{}, 0, defaultArraySize) 345 var cnt int 346 valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap) 347 return ivals[:cnt], nil 348} 349 350func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) { 351 lenKeys := len(keys) 352 353 // load 'm' values into 'ret' 354 // expand any lists 355 if lenKeys == 0 { 356 switch m.(type) { 357 case map[string]interface{}: 358 if subkeys != nil { 359 if ok := hasSubKeys(m, subkeys); !ok { 360 return 361 } 362 } 363 *ret = append(*ret, m) 364 *cnt++ 365 case []interface{}: 366 for i, v := range m.([]interface{}) { 367 if subkeys != nil { 368 if ok := hasSubKeys(v, subkeys); !ok { 369 continue // only load list members with subkeys 370 } 371 } 372 *ret = append(*ret, (m.([]interface{}))[i]) 373 *cnt++ 374 } 375 default: 376 if subkeys != nil { 377 return // must be map[string]interface{} if there are subkeys 378 } 379 *ret = append(*ret, m) 380 *cnt++ 381 } 382 return 383 } 384 385 // key of interest 386 key := keys[0] 387 switch key { 388 case "*": // wildcard - scan all values 389 switch m.(type) { 390 case map[string]interface{}: 391 for _, v := range m.(map[string]interface{}) { 392 // valuesForKeyPath(ret, v, keys[1:], subkeys) 393 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys) 394 } 395 case []interface{}: 396 for _, v := range m.([]interface{}) { 397 switch v.(type) { 398 // flatten out a list of maps - keys are processed 399 case map[string]interface{}: 400 for _, vv := range v.(map[string]interface{}) { 401 // valuesForKeyPath(ret, vv, keys[1:], subkeys) 402 valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys) 403 } 404 default: 405 // valuesForKeyPath(ret, v, keys[1:], subkeys) 406 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys) 407 } 408 } 409 } 410 default: // key - must be map[string]interface{} 411 switch m.(type) { 412 case map[string]interface{}: 413 if v, ok := m.(map[string]interface{})[key]; ok { 414 // valuesForKeyPath(ret, v, keys[1:], subkeys) 415 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys) 416 } 417 case []interface{}: // may be buried in list 418 for _, v := range m.([]interface{}) { 419 switch v.(type) { 420 case map[string]interface{}: 421 if vv, ok := v.(map[string]interface{})[key]; ok { 422 // valuesForKeyPath(ret, vv, keys[1:], subkeys) 423 valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys) 424 } 425 } 426 } 427 } 428 } 429} 430 431// hasSubKeys() - interface{} equality works for string, float64, bool 432// 'v' must be a map[string]interface{} value to have subkeys 433// 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard. 434func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool { 435 if len(subkeys) == 0 { 436 return true 437 } 438 439 switch v.(type) { 440 case map[string]interface{}: 441 // do all subKey name:value pairs match? 442 mv := v.(map[string]interface{}) 443 for skey, sval := range subkeys { 444 isNotKey := false 445 if skey[:1] == "!" { // a NOT-key 446 skey = skey[1:] 447 isNotKey = true 448 } 449 vv, ok := mv[skey] 450 if !ok { // key doesn't exist 451 if isNotKey { // key not there, but that's what we want 452 if kv, ok := sval.(string); ok && kv == "*" { 453 continue 454 } 455 } 456 return false 457 } 458 // wildcard check 459 if kv, ok := sval.(string); ok && kv == "*" { 460 if isNotKey { // key is there, and we don't want it 461 return false 462 } 463 continue 464 } 465 switch sval.(type) { 466 case string: 467 if s, ok := vv.(string); ok && s == sval.(string) { 468 if isNotKey { 469 return false 470 } 471 continue 472 } 473 case bool: 474 if b, ok := vv.(bool); ok && b == sval.(bool) { 475 if isNotKey { 476 return false 477 } 478 continue 479 } 480 case float64: 481 if f, ok := vv.(float64); ok && f == sval.(float64) { 482 if isNotKey { 483 return false 484 } 485 continue 486 } 487 } 488 // key there but didn't match subkey value 489 if isNotKey { // that's what we want 490 continue 491 } 492 return false 493 } 494 // all subkeys matched 495 return true 496 } 497 498 // not a map[string]interface{} value, can't have subkeys 499 return false 500} 501 502// Generate map of key:value entries as map[string]string. 503// 'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'. 504// If len(kv) == 0, the return is (nil, nil). 505func getSubKeyMap(kv ...string) (map[string]interface{}, error) { 506 if len(kv) == 0 { 507 return nil, nil 508 } 509 m := make(map[string]interface{}, 0) 510 for _, v := range kv { 511 vv := strings.Split(v, fieldSep) 512 switch len(vv) { 513 case 2: 514 m[vv[0]] = interface{}(vv[1]) 515 case 3: 516 switch vv[2] { 517 case "string", "char", "text": 518 m[vv[0]] = interface{}(vv[1]) 519 case "bool", "boolean": 520 // ParseBool treats "1"==true & "0"==false 521 b, err := strconv.ParseBool(vv[1]) 522 if err != nil { 523 return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1]) 524 } 525 m[vv[0]] = interface{}(b) 526 case "float", "float64", "num", "number", "numeric": 527 f, err := strconv.ParseFloat(vv[1], 64) 528 if err != nil { 529 return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1]) 530 } 531 m[vv[0]] = interface{}(f) 532 default: 533 return nil, fmt.Errorf("unknown subkey conversion spec: %s", v) 534 } 535 default: 536 return nil, fmt.Errorf("unknown subkey spec: %s", v) 537 } 538 } 539 return m, nil 540} 541 542// ------------------------------- END of valuesFor ... ---------------------------- 543 544// ----------------------- locate where a key value is in the tree ------------------- 545 546//----------------------------- find all paths to a key -------------------------------- 547 548// Get all paths through Map, 'mv', (in dot-notation) that terminate with the specified key. 549// Results can be used with ValuesForPath. 550func (mv Map) PathsForKey(key string) []string { 551 m := map[string]interface{}(mv) 552 breadbasket := make(map[string]bool, 0) 553 breadcrumbs := "" 554 555 hasKeyPath(breadcrumbs, m, key, breadbasket) 556 if len(breadbasket) == 0 { 557 return nil 558 } 559 560 // unpack map keys to return 561 res := make([]string, len(breadbasket)) 562 var i int 563 for k := range breadbasket { 564 res[i] = k 565 i++ 566 } 567 568 return res 569} 570 571// Extract the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'.. 572// Paths are strings using dot-notation. 573func (mv Map) PathForKeyShortest(key string) string { 574 paths := mv.PathsForKey(key) 575 576 lp := len(paths) 577 if lp == 0 { 578 return "" 579 } 580 if lp == 1 { 581 return paths[0] 582 } 583 584 shortest := paths[0] 585 shortestLen := len(strings.Split(shortest, ".")) 586 587 for i := 1; i < len(paths); i++ { 588 vlen := len(strings.Split(paths[i], ".")) 589 if vlen < shortestLen { 590 shortest = paths[i] 591 shortestLen = vlen 592 } 593 } 594 595 return shortest 596} 597 598// hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth 599// This is really just a breadcrumber that saves all trails that hit the prescribed 'key'. 600func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) { 601 switch iv.(type) { 602 case map[string]interface{}: 603 vv := iv.(map[string]interface{}) 604 if _, ok := vv[key]; ok { 605 // create a new breadcrumb, intialized with the one we have 606 var nbc string 607 if crumbs == "" { 608 nbc = key 609 } else { 610 nbc = crumbs + "." + key 611 } 612 basket[nbc] = true 613 } 614 // walk on down the path, key could occur again at deeper node 615 for k, v := range vv { 616 // create a new breadcrumb, intialized with the one we have 617 var nbc string 618 if crumbs == "" { 619 nbc = k 620 } else { 621 nbc = crumbs + "." + k 622 } 623 hasKeyPath(nbc, v, key, basket) 624 } 625 case []interface{}: 626 // crumb-trail doesn't change, pass it on 627 for _, v := range iv.([]interface{}) { 628 hasKeyPath(crumbs, v, key, basket) 629 } 630 } 631} 632 633var PathNotExistError = errors.New("Path does not exist") 634 635// ValueForPath wrap ValuesFor Path and returns the first value returned. 636// If no value is found it returns 'nil' and PathNotExistError. 637func (mv Map) ValueForPath(path string) (interface{}, error) { 638 vals, err := mv.ValuesForPath(path) 639 if err != nil { 640 return nil, err 641 } 642 if len(vals) == 0 { 643 return nil, PathNotExistError 644 } 645 return vals[0], nil 646} 647 648// Returns the first found value for the path as a string. 649func (mv Map) ValueForPathString(path string) (string, error) { 650 vals, err := mv.ValuesForPath(path) 651 if err != nil { 652 return "", err 653 } 654 if len(vals) == 0 { 655 return "", errors.New("ValueForPath: path not found") 656 } 657 val := vals[0] 658 switch str := val.(type) { 659 case string: 660 return str, nil 661 default: 662 return "", fmt.Errorf("ValueForPath: unsupported type: %T", str) 663 } 664} 665 666// Returns the first found value for the path as a string. 667// If the path is not found then it returns an empty string. 668func (mv Map) ValueOrEmptyForPathString(path string) string { 669 str, _ := mv.ValueForPathString(path) 670 return str 671} 672