1// Copyright 2012-2014 Charles Banning. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file 4 5// keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters. 6 7package mxj 8 9import ( 10 "errors" 11 "fmt" 12 "strconv" 13 "strings" 14) 15 16// ----------------------------- get everything FOR a single key ------------------------- 17 18const ( 19 minArraySize = 32 20) 21 22var defaultArraySize int = minArraySize 23 24// SetArraySize adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath(). 25// This can have the effect of significantly reducing memory allocation-copy functions for large data sets. 26// Returns the initial buffer size. 27func SetArraySize(size int) int { 28 if size > minArraySize { 29 defaultArraySize = size 30 } else { 31 defaultArraySize = minArraySize 32 } 33 return defaultArraySize 34} 35 36// ValuesForKey return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match. 37// On error, the returned slice is 'nil'. NOTE: 'key' can be wildcard, "*". 38// 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list. 39// - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them. 40// - For attributes prefix the label with the attribute prefix character, by default a 41// hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.) 42// - If the 'key' refers to a list, then "key:value" could select a list member of the list. 43// - The subkey can be wildcarded - "key:*" - to require that it's there with some value. 44// - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an 45// exclusion critera - e.g., "!author:William T. Gaddis". 46// - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|". 47func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) { 48 m := map[string]interface{}(mv) 49 var subKeyMap map[string]interface{} 50 if len(subkeys) > 0 { 51 var err error 52 subKeyMap, err = getSubKeyMap(subkeys...) 53 if err != nil { 54 return nil, err 55 } 56 } 57 58 ret := make([]interface{}, 0, defaultArraySize) 59 var cnt int 60 hasKey(m, key, &ret, &cnt, subKeyMap) 61 return ret[:cnt], nil 62} 63 64var KeyNotExistError = errors.New("Key does not exist") 65 66// ValueForKey is a wrapper on ValuesForKey. It returns the first member of []interface{}, if any. 67// If there is no value, "nil, nil" is returned. 68func (mv Map) ValueForKey(key string, subkeys ...string) (interface{}, error) { 69 vals, err := mv.ValuesForKey(key, subkeys...) 70 if err != nil { 71 return nil, err 72 } 73 if len(vals) == 0 { 74 return nil, KeyNotExistError 75 } 76 return vals[0], nil 77} 78 79// hasKey - if the map 'key' exists append it to array 80// if it doesn't do nothing except scan array and map values 81func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) { 82 // func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) { 83 switch iv.(type) { 84 case map[string]interface{}: 85 vv := iv.(map[string]interface{}) 86 // see if the current value is of interest 87 if v, ok := vv[key]; ok { 88 switch v.(type) { 89 case map[string]interface{}: 90 if hasSubKeys(v, subkeys) { 91 *ret = append(*ret, v) 92 *cnt++ 93 } 94 case []interface{}: 95 for _, av := range v.([]interface{}) { 96 if hasSubKeys(av, subkeys) { 97 *ret = append(*ret, av) 98 *cnt++ 99 } 100 } 101 default: 102 if len(subkeys) == 0 { 103 *ret = append(*ret, v) 104 *cnt++ 105 } 106 } 107 } 108 109 // wildcard case 110 if key == "*" { 111 for _, v := range vv { 112 switch v.(type) { 113 case map[string]interface{}: 114 if hasSubKeys(v, subkeys) { 115 *ret = append(*ret, v) 116 *cnt++ 117 } 118 case []interface{}: 119 for _, av := range v.([]interface{}) { 120 if hasSubKeys(av, subkeys) { 121 *ret = append(*ret, av) 122 *cnt++ 123 } 124 } 125 default: 126 if len(subkeys) == 0 { 127 *ret = append(*ret, v) 128 *cnt++ 129 } 130 } 131 } 132 } 133 134 // scan the rest 135 for _, v := range vv { 136 hasKey(v, key, ret, cnt, subkeys) 137 } 138 case []interface{}: 139 for _, v := range iv.([]interface{}) { 140 hasKey(v, key, ret, cnt, subkeys) 141 } 142 } 143} 144 145// ----------------------- get everything for a node in the Map --------------------------- 146 147// Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.) 148// 2014.04.28 - implementation note. 149// Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion 150// of wildcards and unindexed arrays. Embedding such logic into valuesForKeyPath() would have made the 151// code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead. 152 153// ValuesForPatb retrieves all values for a path from the Map. If len(returned_values) == 0, then no match. 154// On error, the returned array is 'nil'. 155// 'path' is a dot-separated path of key values. 156// - If a node in the path is '*', then everything beyond is walked. 157// - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" - 158// even "*[2].*[0].field". 159// 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list. 160// - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them. 161// - For attributes prefix the label with the attribute prefix character, by default a 162// hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.) 163// - If the 'path' refers to a list, then "tag:value" would return member of the list. 164// - The subkey can be wildcarded - "key:*" - to require that it's there with some value. 165// - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an 166// exclusion critera - e.g., "!author:William T. Gaddis". 167// - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|". 168func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) { 169 // If there are no array indexes in path, use legacy ValuesForPath() logic. 170 if strings.Index(path, "[") < 0 { 171 return mv.oldValuesForPath(path, subkeys...) 172 } 173 174 var subKeyMap map[string]interface{} 175 if len(subkeys) > 0 { 176 var err error 177 subKeyMap, err = getSubKeyMap(subkeys...) 178 if err != nil { 179 return nil, err 180 } 181 } 182 183 keys, kerr := parsePath(path) 184 if kerr != nil { 185 return nil, kerr 186 } 187 188 vals, verr := valuesForArray(keys, mv) 189 if verr != nil { 190 return nil, verr // Vals may be nil, but return empty array. 191 } 192 193 // Need to handle subkeys ... only return members of vals that satisfy conditions. 194 retvals := make([]interface{}, 0) 195 for _, v := range vals { 196 if hasSubKeys(v, subKeyMap) { 197 retvals = append(retvals, v) 198 } 199 } 200 return retvals, nil 201} 202 203func valuesForArray(keys []*key, m Map) ([]interface{}, error) { 204 var tmppath string 205 var haveFirst bool 206 var vals []interface{} 207 var verr error 208 209 lastkey := len(keys) - 1 210 for i := 0; i <= lastkey; i++ { 211 if !haveFirst { 212 tmppath = keys[i].name 213 haveFirst = true 214 } else { 215 tmppath += "." + keys[i].name 216 } 217 218 // Look-ahead: explode wildcards and unindexed arrays. 219 // Need to handle un-indexed list recursively: 220 // e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]". 221 // Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ... 222 if !keys[i].isArray && i < lastkey && keys[i+1].isArray { 223 // Can't pass subkeys because we may not be at literal end of path. 224 vv, vverr := m.oldValuesForPath(tmppath) 225 if vverr != nil { 226 return nil, vverr 227 } 228 for _, v := range vv { 229 // See if we can walk the value. 230 am, ok := v.(map[string]interface{}) 231 if !ok { 232 continue 233 } 234 // Work the backend. 235 nvals, nvalserr := valuesForArray(keys[i+1:], Map(am)) 236 if nvalserr != nil { 237 return nil, nvalserr 238 } 239 vals = append(vals, nvals...) 240 } 241 break // have recursed the whole path - return 242 } 243 244 if keys[i].isArray || i == lastkey { 245 // Don't pass subkeys because may not be at literal end of path. 246 vals, verr = m.oldValuesForPath(tmppath) 247 } else { 248 continue 249 } 250 if verr != nil { 251 return nil, verr 252 } 253 254 if i == lastkey && !keys[i].isArray { 255 break 256 } 257 258 // Now we're looking at an array - supposedly. 259 // Is index in range of vals? 260 if len(vals) <= keys[i].position { 261 vals = nil 262 break 263 } 264 265 // Return the array member of interest, if at end of path. 266 if i == lastkey { 267 vals = vals[keys[i].position:(keys[i].position + 1)] 268 break 269 } 270 271 // Extract the array member of interest. 272 am := vals[keys[i].position:(keys[i].position + 1)] 273 274 // must be a map[string]interface{} value so we can keep walking the path 275 amm, ok := am[0].(map[string]interface{}) 276 if !ok { 277 vals = nil 278 break 279 } 280 281 m = Map(amm) 282 haveFirst = false 283 } 284 285 return vals, nil 286} 287 288type key struct { 289 name string 290 isArray bool 291 position int 292} 293 294func parsePath(s string) ([]*key, error) { 295 keys := strings.Split(s, ".") 296 297 ret := make([]*key, 0) 298 299 for i := 0; i < len(keys); i++ { 300 if keys[i] == "" { 301 continue 302 } 303 304 newkey := new(key) 305 if strings.Index(keys[i], "[") < 0 { 306 newkey.name = keys[i] 307 ret = append(ret, newkey) 308 continue 309 } 310 311 p := strings.Split(keys[i], "[") 312 newkey.name = p[0] 313 p = strings.Split(p[1], "]") 314 if p[0] == "" { // no right bracket 315 return nil, fmt.Errorf("no right bracket on key index: %s", keys[i]) 316 } 317 // convert p[0] to a int value 318 pos, nerr := strconv.ParseInt(p[0], 10, 32) 319 if nerr != nil { 320 return nil, fmt.Errorf("cannot convert index to int value: %s", p[0]) 321 } 322 newkey.position = int(pos) 323 newkey.isArray = true 324 ret = append(ret, newkey) 325 } 326 327 return ret, nil 328} 329 330// legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'. 331func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) { 332 m := map[string]interface{}(mv) 333 var subKeyMap map[string]interface{} 334 if len(subkeys) > 0 { 335 var err error 336 subKeyMap, err = getSubKeyMap(subkeys...) 337 if err != nil { 338 return nil, err 339 } 340 } 341 342 keys := strings.Split(path, ".") 343 if keys[len(keys)-1] == "" { 344 keys = keys[:len(keys)-1] 345 } 346 ivals := make([]interface{}, 0, defaultArraySize) 347 var cnt int 348 valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap) 349 return ivals[:cnt], nil 350} 351 352func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) { 353 lenKeys := len(keys) 354 355 // load 'm' values into 'ret' 356 // expand any lists 357 if lenKeys == 0 { 358 switch m.(type) { 359 case map[string]interface{}: 360 if subkeys != nil { 361 if ok := hasSubKeys(m, subkeys); !ok { 362 return 363 } 364 } 365 *ret = append(*ret, m) 366 *cnt++ 367 case []interface{}: 368 for i, v := range m.([]interface{}) { 369 if subkeys != nil { 370 if ok := hasSubKeys(v, subkeys); !ok { 371 continue // only load list members with subkeys 372 } 373 } 374 *ret = append(*ret, (m.([]interface{}))[i]) 375 *cnt++ 376 } 377 default: 378 if subkeys != nil { 379 return // must be map[string]interface{} if there are subkeys 380 } 381 *ret = append(*ret, m) 382 *cnt++ 383 } 384 return 385 } 386 387 // key of interest 388 key := keys[0] 389 switch key { 390 case "*": // wildcard - scan all values 391 switch m.(type) { 392 case map[string]interface{}: 393 for _, v := range m.(map[string]interface{}) { 394 // valuesForKeyPath(ret, v, keys[1:], subkeys) 395 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys) 396 } 397 case []interface{}: 398 for _, v := range m.([]interface{}) { 399 switch v.(type) { 400 // flatten out a list of maps - keys are processed 401 case map[string]interface{}: 402 for _, vv := range v.(map[string]interface{}) { 403 // valuesForKeyPath(ret, vv, keys[1:], subkeys) 404 valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys) 405 } 406 default: 407 // valuesForKeyPath(ret, v, keys[1:], subkeys) 408 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys) 409 } 410 } 411 } 412 default: // key - must be map[string]interface{} 413 switch m.(type) { 414 case map[string]interface{}: 415 if v, ok := m.(map[string]interface{})[key]; ok { 416 // valuesForKeyPath(ret, v, keys[1:], subkeys) 417 valuesForKeyPath(ret, cnt, v, keys[1:], subkeys) 418 } 419 case []interface{}: // may be buried in list 420 for _, v := range m.([]interface{}) { 421 switch v.(type) { 422 case map[string]interface{}: 423 if vv, ok := v.(map[string]interface{})[key]; ok { 424 // valuesForKeyPath(ret, vv, keys[1:], subkeys) 425 valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys) 426 } 427 } 428 } 429 } 430 } 431} 432 433// hasSubKeys() - interface{} equality works for string, float64, bool 434// 'v' must be a map[string]interface{} value to have subkeys 435// 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard. 436func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool { 437 if len(subkeys) == 0 { 438 return true 439 } 440 441 switch v.(type) { 442 case map[string]interface{}: 443 // do all subKey name:value pairs match? 444 mv := v.(map[string]interface{}) 445 for skey, sval := range subkeys { 446 isNotKey := false 447 if skey[:1] == "!" { // a NOT-key 448 skey = skey[1:] 449 isNotKey = true 450 } 451 vv, ok := mv[skey] 452 if !ok { // key doesn't exist 453 if isNotKey { // key not there, but that's what we want 454 if kv, ok := sval.(string); ok && kv == "*" { 455 continue 456 } 457 } 458 return false 459 } 460 // wildcard check 461 if kv, ok := sval.(string); ok && kv == "*" { 462 if isNotKey { // key is there, and we don't want it 463 return false 464 } 465 continue 466 } 467 switch sval.(type) { 468 case string: 469 if s, ok := vv.(string); ok && s == sval.(string) { 470 if isNotKey { 471 return false 472 } 473 continue 474 } 475 case bool: 476 if b, ok := vv.(bool); ok && b == sval.(bool) { 477 if isNotKey { 478 return false 479 } 480 continue 481 } 482 case float64: 483 if f, ok := vv.(float64); ok && f == sval.(float64) { 484 if isNotKey { 485 return false 486 } 487 continue 488 } 489 } 490 // key there but didn't match subkey value 491 if isNotKey { // that's what we want 492 continue 493 } 494 return false 495 } 496 // all subkeys matched 497 return true 498 } 499 500 // not a map[string]interface{} value, can't have subkeys 501 return false 502} 503 504// Generate map of key:value entries as map[string]string. 505// 'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'. 506// If len(kv) == 0, the return is (nil, nil). 507func getSubKeyMap(kv ...string) (map[string]interface{}, error) { 508 if len(kv) == 0 { 509 return nil, nil 510 } 511 m := make(map[string]interface{}, 0) 512 for _, v := range kv { 513 vv := strings.Split(v, fieldSep) 514 switch len(vv) { 515 case 2: 516 m[vv[0]] = interface{}(vv[1]) 517 case 3: 518 switch vv[2] { 519 case "string", "char", "text": 520 m[vv[0]] = interface{}(vv[1]) 521 case "bool", "boolean": 522 // ParseBool treats "1"==true & "0"==false 523 b, err := strconv.ParseBool(vv[1]) 524 if err != nil { 525 return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1]) 526 } 527 m[vv[0]] = interface{}(b) 528 case "float", "float64", "num", "number", "numeric": 529 f, err := strconv.ParseFloat(vv[1], 64) 530 if err != nil { 531 return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1]) 532 } 533 m[vv[0]] = interface{}(f) 534 default: 535 return nil, fmt.Errorf("unknown subkey conversion spec: %s", v) 536 } 537 default: 538 return nil, fmt.Errorf("unknown subkey spec: %s", v) 539 } 540 } 541 return m, nil 542} 543 544// ------------------------------- END of valuesFor ... ---------------------------- 545 546// ----------------------- locate where a key value is in the tree ------------------- 547 548//----------------------------- find all paths to a key -------------------------------- 549 550// PathsForKey returns all paths through Map, 'mv', (in dot-notation) that terminate with the specified key. 551// Results can be used with ValuesForPath. 552func (mv Map) PathsForKey(key string) []string { 553 m := map[string]interface{}(mv) 554 breadbasket := make(map[string]bool, 0) 555 breadcrumbs := "" 556 557 hasKeyPath(breadcrumbs, m, key, breadbasket) 558 if len(breadbasket) == 0 { 559 return nil 560 } 561 562 // unpack map keys to return 563 res := make([]string, len(breadbasket)) 564 var i int 565 for k := range breadbasket { 566 res[i] = k 567 i++ 568 } 569 570 return res 571} 572 573// PathForKeyShortest extracts the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'.. 574// Paths are strings using dot-notation. 575func (mv Map) PathForKeyShortest(key string) string { 576 paths := mv.PathsForKey(key) 577 578 lp := len(paths) 579 if lp == 0 { 580 return "" 581 } 582 if lp == 1 { 583 return paths[0] 584 } 585 586 shortest := paths[0] 587 shortestLen := len(strings.Split(shortest, ".")) 588 589 for i := 1; i < len(paths); i++ { 590 vlen := len(strings.Split(paths[i], ".")) 591 if vlen < shortestLen { 592 shortest = paths[i] 593 shortestLen = vlen 594 } 595 } 596 597 return shortest 598} 599 600// hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth 601// This is really just a breadcrumber that saves all trails that hit the prescribed 'key'. 602func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) { 603 switch iv.(type) { 604 case map[string]interface{}: 605 vv := iv.(map[string]interface{}) 606 if _, ok := vv[key]; ok { 607 // create a new breadcrumb, intialized with the one we have 608 var nbc string 609 if crumbs == "" { 610 nbc = key 611 } else { 612 nbc = crumbs + "." + key 613 } 614 basket[nbc] = true 615 } 616 // walk on down the path, key could occur again at deeper node 617 for k, v := range vv { 618 // create a new breadcrumb, intialized with the one we have 619 var nbc string 620 if crumbs == "" { 621 nbc = k 622 } else { 623 nbc = crumbs + "." + k 624 } 625 hasKeyPath(nbc, v, key, basket) 626 } 627 case []interface{}: 628 // crumb-trail doesn't change, pass it on 629 for _, v := range iv.([]interface{}) { 630 hasKeyPath(crumbs, v, key, basket) 631 } 632 } 633} 634 635var PathNotExistError = errors.New("Path does not exist") 636 637// ValueForPath wraps ValuesFor Path and returns the first value returned. 638// If no value is found it returns 'nil' and PathNotExistError. 639func (mv Map) ValueForPath(path string) (interface{}, error) { 640 vals, err := mv.ValuesForPath(path) 641 if err != nil { 642 return nil, err 643 } 644 if len(vals) == 0 { 645 return nil, PathNotExistError 646 } 647 return vals[0], nil 648} 649 650// ValuesForPathString returns the first found value for the path as a string. 651func (mv Map) ValueForPathString(path string) (string, error) { 652 vals, err := mv.ValuesForPath(path) 653 if err != nil { 654 return "", err 655 } 656 if len(vals) == 0 { 657 return "", errors.New("ValueForPath: path not found") 658 } 659 val := vals[0] 660 return fmt.Sprintf("%v", val), nil 661} 662 663// ValueOrEmptyForPathString returns the first found value for the path as a string. 664// If the path is not found then it returns an empty string. 665func (mv Map) ValueOrEmptyForPathString(path string) string { 666 str, _ := mv.ValueForPathString(path) 667 return str 668} 669