1// Copyright 2012-2014 Charles Banning. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file
4
5//	keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters.
6
7package mxj
8
9import (
10	"errors"
11	"fmt"
12	"strconv"
13	"strings"
14)
15
16// ----------------------------- get everything FOR a single key -------------------------
17
18const (
19	minArraySize = 32
20)
21
22var defaultArraySize int = minArraySize
23
24// SetArraySize adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath().
25// This can have the effect of significantly reducing memory allocation-copy functions for large data sets.
26// Returns the initial buffer size.
27func SetArraySize(size int) int {
28	if size > minArraySize {
29		defaultArraySize = size
30	} else {
31		defaultArraySize = minArraySize
32	}
33	return defaultArraySize
34}
35
36// ValuesForKey return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match.
37// On error, the returned slice is 'nil'. NOTE: 'key' can be wildcard, "*".
38//   'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
39//             - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
40//             - For attributes prefix the label with the attribute prefix character, by default a
41//               hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.)
42//             - If the 'key' refers to a list, then "key:value" could select a list member of the list.
43//             - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
44//             - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
45//               exclusion critera - e.g., "!author:William T. Gaddis".
46//             - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
47func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) {
48	m := map[string]interface{}(mv)
49	var subKeyMap map[string]interface{}
50	if len(subkeys) > 0 {
51		var err error
52		subKeyMap, err = getSubKeyMap(subkeys...)
53		if err != nil {
54			return nil, err
55		}
56	}
57
58	ret := make([]interface{}, 0, defaultArraySize)
59	var cnt int
60	hasKey(m, key, &ret, &cnt, subKeyMap)
61	return ret[:cnt], nil
62}
63
64var KeyNotExistError = errors.New("Key does not exist")
65
66// ValueForKey is a wrapper on ValuesForKey.  It returns the first member of []interface{}, if any.
67// If there is no value, "nil, nil" is returned.
68func (mv Map) ValueForKey(key string, subkeys ...string) (interface{}, error) {
69	vals, err := mv.ValuesForKey(key, subkeys...)
70	if err != nil {
71		return nil, err
72	}
73	if len(vals) == 0 {
74		return nil, KeyNotExistError
75	}
76	return vals[0], nil
77}
78
79// hasKey - if the map 'key' exists append it to array
80//          if it doesn't do nothing except scan array and map values
81func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) {
82	// func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) {
83	switch iv.(type) {
84	case map[string]interface{}:
85		vv := iv.(map[string]interface{})
86		// see if the current value is of interest
87		if v, ok := vv[key]; ok {
88			switch v.(type) {
89			case map[string]interface{}:
90				if hasSubKeys(v, subkeys) {
91					*ret = append(*ret, v)
92					*cnt++
93				}
94			case []interface{}:
95				for _, av := range v.([]interface{}) {
96					if hasSubKeys(av, subkeys) {
97						*ret = append(*ret, av)
98						*cnt++
99					}
100				}
101			default:
102				if len(subkeys) == 0 {
103					*ret = append(*ret, v)
104					*cnt++
105				}
106			}
107		}
108
109		// wildcard case
110		if key == "*" {
111			for _, v := range vv {
112				switch v.(type) {
113				case map[string]interface{}:
114					if hasSubKeys(v, subkeys) {
115						*ret = append(*ret, v)
116						*cnt++
117					}
118				case []interface{}:
119					for _, av := range v.([]interface{}) {
120						if hasSubKeys(av, subkeys) {
121							*ret = append(*ret, av)
122							*cnt++
123						}
124					}
125				default:
126					if len(subkeys) == 0 {
127						*ret = append(*ret, v)
128						*cnt++
129					}
130				}
131			}
132		}
133
134		// scan the rest
135		for _, v := range vv {
136			hasKey(v, key, ret, cnt, subkeys)
137		}
138	case []interface{}:
139		for _, v := range iv.([]interface{}) {
140			hasKey(v, key, ret, cnt, subkeys)
141		}
142	}
143}
144
145// -----------------------  get everything for a node in the Map ---------------------------
146
147// Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.)
148// 2014.04.28 - implementation note.
149// Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion
150// of wildcards and unindexed arrays.  Embedding such logic into valuesForKeyPath() would have made the
151// code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead.
152
153// ValuesForPatb retrieves all values for a path from the Map.  If len(returned_values) == 0, then no match.
154// On error, the returned array is 'nil'.
155//   'path' is a dot-separated path of key values.
156//          - If a node in the path is '*', then everything beyond is walked.
157//          - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" -
158//            even "*[2].*[0].field".
159//   'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
160//             - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
161//             - For attributes prefix the label with the attribute prefix character, by default a
162//               hyphen, '-', e.g., "-seq:3". (See SetAttrPrefix function.)
163//             - If the 'path' refers to a list, then "tag:value" would return member of the list.
164//             - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
165//             - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
166//               exclusion critera - e.g., "!author:William T. Gaddis".
167//             - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
168func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
169	// If there are no array indexes in path, use legacy ValuesForPath() logic.
170	if strings.Index(path, "[") < 0 {
171		return mv.oldValuesForPath(path, subkeys...)
172	}
173
174	var subKeyMap map[string]interface{}
175	if len(subkeys) > 0 {
176		var err error
177		subKeyMap, err = getSubKeyMap(subkeys...)
178		if err != nil {
179			return nil, err
180		}
181	}
182
183	keys, kerr := parsePath(path)
184	if kerr != nil {
185		return nil, kerr
186	}
187
188	vals, verr := valuesForArray(keys, mv)
189	if verr != nil {
190		return nil, verr // Vals may be nil, but return empty array.
191	}
192
193	// Need to handle subkeys ... only return members of vals that satisfy conditions.
194	retvals := make([]interface{}, 0)
195	for _, v := range vals {
196		if hasSubKeys(v, subKeyMap) {
197			retvals = append(retvals, v)
198		}
199	}
200	return retvals, nil
201}
202
203func valuesForArray(keys []*key, m Map) ([]interface{}, error) {
204	var tmppath string
205	var haveFirst bool
206	var vals []interface{}
207	var verr error
208
209	lastkey := len(keys) - 1
210	for i := 0; i <= lastkey; i++ {
211		if !haveFirst {
212			tmppath = keys[i].name
213			haveFirst = true
214		} else {
215			tmppath += "." + keys[i].name
216		}
217
218		// Look-ahead: explode wildcards and unindexed arrays.
219		// Need to handle un-indexed list recursively:
220		// e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]".
221		// Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ...
222		if !keys[i].isArray && i < lastkey && keys[i+1].isArray {
223			// Can't pass subkeys because we may not be at literal end of path.
224			vv, vverr := m.oldValuesForPath(tmppath)
225			if vverr != nil {
226				return nil, vverr
227			}
228			for _, v := range vv {
229				// See if we can walk the value.
230				am, ok := v.(map[string]interface{})
231				if !ok {
232					continue
233				}
234				// Work the backend.
235				nvals, nvalserr := valuesForArray(keys[i+1:], Map(am))
236				if nvalserr != nil {
237					return nil, nvalserr
238				}
239				vals = append(vals, nvals...)
240			}
241			break // have recursed the whole path - return
242		}
243
244		if keys[i].isArray || i == lastkey {
245			// Don't pass subkeys because may not be at literal end of path.
246			vals, verr = m.oldValuesForPath(tmppath)
247		} else {
248			continue
249		}
250		if verr != nil {
251			return nil, verr
252		}
253
254		if i == lastkey && !keys[i].isArray {
255			break
256		}
257
258		// Now we're looking at an array - supposedly.
259		// Is index in range of vals?
260		if len(vals) <= keys[i].position {
261			vals = nil
262			break
263		}
264
265		// Return the array member of interest, if at end of path.
266		if i == lastkey {
267			vals = vals[keys[i].position:(keys[i].position + 1)]
268			break
269		}
270
271		// Extract the array member of interest.
272		am := vals[keys[i].position:(keys[i].position + 1)]
273
274		// must be a map[string]interface{} value so we can keep walking the path
275		amm, ok := am[0].(map[string]interface{})
276		if !ok {
277			vals = nil
278			break
279		}
280
281		m = Map(amm)
282		haveFirst = false
283	}
284
285	return vals, nil
286}
287
288type key struct {
289	name     string
290	isArray  bool
291	position int
292}
293
294func parsePath(s string) ([]*key, error) {
295	keys := strings.Split(s, ".")
296
297	ret := make([]*key, 0)
298
299	for i := 0; i < len(keys); i++ {
300		if keys[i] == "" {
301			continue
302		}
303
304		newkey := new(key)
305		if strings.Index(keys[i], "[") < 0 {
306			newkey.name = keys[i]
307			ret = append(ret, newkey)
308			continue
309		}
310
311		p := strings.Split(keys[i], "[")
312		newkey.name = p[0]
313		p = strings.Split(p[1], "]")
314		if p[0] == "" { // no right bracket
315			return nil, fmt.Errorf("no right bracket on key index: %s", keys[i])
316		}
317		// convert p[0] to a int value
318		pos, nerr := strconv.ParseInt(p[0], 10, 32)
319		if nerr != nil {
320			return nil, fmt.Errorf("cannot convert index to int value: %s", p[0])
321		}
322		newkey.position = int(pos)
323		newkey.isArray = true
324		ret = append(ret, newkey)
325	}
326
327	return ret, nil
328}
329
330// legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'.
331func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
332	m := map[string]interface{}(mv)
333	var subKeyMap map[string]interface{}
334	if len(subkeys) > 0 {
335		var err error
336		subKeyMap, err = getSubKeyMap(subkeys...)
337		if err != nil {
338			return nil, err
339		}
340	}
341
342	keys := strings.Split(path, ".")
343	if keys[len(keys)-1] == "" {
344		keys = keys[:len(keys)-1]
345	}
346	ivals := make([]interface{}, 0, defaultArraySize)
347	var cnt int
348	valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap)
349	return ivals[:cnt], nil
350}
351
352func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) {
353	lenKeys := len(keys)
354
355	// load 'm' values into 'ret'
356	// expand any lists
357	if lenKeys == 0 {
358		switch m.(type) {
359		case map[string]interface{}:
360			if subkeys != nil {
361				if ok := hasSubKeys(m, subkeys); !ok {
362					return
363				}
364			}
365			*ret = append(*ret, m)
366			*cnt++
367		case []interface{}:
368			for i, v := range m.([]interface{}) {
369				if subkeys != nil {
370					if ok := hasSubKeys(v, subkeys); !ok {
371						continue // only load list members with subkeys
372					}
373				}
374				*ret = append(*ret, (m.([]interface{}))[i])
375				*cnt++
376			}
377		default:
378			if subkeys != nil {
379				return // must be map[string]interface{} if there are subkeys
380			}
381			*ret = append(*ret, m)
382			*cnt++
383		}
384		return
385	}
386
387	// key of interest
388	key := keys[0]
389	switch key {
390	case "*": // wildcard - scan all values
391		switch m.(type) {
392		case map[string]interface{}:
393			for _, v := range m.(map[string]interface{}) {
394				// valuesForKeyPath(ret, v, keys[1:], subkeys)
395				valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
396			}
397		case []interface{}:
398			for _, v := range m.([]interface{}) {
399				switch v.(type) {
400				// flatten out a list of maps - keys are processed
401				case map[string]interface{}:
402					for _, vv := range v.(map[string]interface{}) {
403						// valuesForKeyPath(ret, vv, keys[1:], subkeys)
404						valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
405					}
406				default:
407					// valuesForKeyPath(ret, v, keys[1:], subkeys)
408					valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
409				}
410			}
411		}
412	default: // key - must be map[string]interface{}
413		switch m.(type) {
414		case map[string]interface{}:
415			if v, ok := m.(map[string]interface{})[key]; ok {
416				// valuesForKeyPath(ret, v, keys[1:], subkeys)
417				valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
418			}
419		case []interface{}: // may be buried in list
420			for _, v := range m.([]interface{}) {
421				switch v.(type) {
422				case map[string]interface{}:
423					if vv, ok := v.(map[string]interface{})[key]; ok {
424						// valuesForKeyPath(ret, vv, keys[1:], subkeys)
425						valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
426					}
427				}
428			}
429		}
430	}
431}
432
433// hasSubKeys() - interface{} equality works for string, float64, bool
434// 'v' must be a map[string]interface{} value to have subkeys
435// 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard.
436func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool {
437	if len(subkeys) == 0 {
438		return true
439	}
440
441	switch v.(type) {
442	case map[string]interface{}:
443		// do all subKey name:value pairs match?
444		mv := v.(map[string]interface{})
445		for skey, sval := range subkeys {
446			isNotKey := false
447			if skey[:1] == "!" { // a NOT-key
448				skey = skey[1:]
449				isNotKey = true
450			}
451			vv, ok := mv[skey]
452			if !ok { // key doesn't exist
453				if isNotKey { // key not there, but that's what we want
454					if kv, ok := sval.(string); ok && kv == "*" {
455						continue
456					}
457				}
458				return false
459			}
460			// wildcard check
461			if kv, ok := sval.(string); ok && kv == "*" {
462				if isNotKey { // key is there, and we don't want it
463					return false
464				}
465				continue
466			}
467			switch sval.(type) {
468			case string:
469				if s, ok := vv.(string); ok && s == sval.(string) {
470					if isNotKey {
471						return false
472					}
473					continue
474				}
475			case bool:
476				if b, ok := vv.(bool); ok && b == sval.(bool) {
477					if isNotKey {
478						return false
479					}
480					continue
481				}
482			case float64:
483				if f, ok := vv.(float64); ok && f == sval.(float64) {
484					if isNotKey {
485						return false
486					}
487					continue
488				}
489			}
490			// key there but didn't match subkey value
491			if isNotKey { // that's what we want
492				continue
493			}
494			return false
495		}
496		// all subkeys matched
497		return true
498	}
499
500	// not a map[string]interface{} value, can't have subkeys
501	return false
502}
503
504// Generate map of key:value entries as map[string]string.
505//	'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'.
506//	If len(kv) == 0, the return is (nil, nil).
507func getSubKeyMap(kv ...string) (map[string]interface{}, error) {
508	if len(kv) == 0 {
509		return nil, nil
510	}
511	m := make(map[string]interface{}, 0)
512	for _, v := range kv {
513		vv := strings.Split(v, fieldSep)
514		switch len(vv) {
515		case 2:
516			m[vv[0]] = interface{}(vv[1])
517		case 3:
518			switch vv[2] {
519			case "string", "char", "text":
520				m[vv[0]] = interface{}(vv[1])
521			case "bool", "boolean":
522				// ParseBool treats "1"==true & "0"==false
523				b, err := strconv.ParseBool(vv[1])
524				if err != nil {
525					return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1])
526				}
527				m[vv[0]] = interface{}(b)
528			case "float", "float64", "num", "number", "numeric":
529				f, err := strconv.ParseFloat(vv[1], 64)
530				if err != nil {
531					return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1])
532				}
533				m[vv[0]] = interface{}(f)
534			default:
535				return nil, fmt.Errorf("unknown subkey conversion spec: %s", v)
536			}
537		default:
538			return nil, fmt.Errorf("unknown subkey spec: %s", v)
539		}
540	}
541	return m, nil
542}
543
544// -------------------------------  END of valuesFor ... ----------------------------
545
546// ----------------------- locate where a key value is in the tree -------------------
547
548//----------------------------- find all paths to a key --------------------------------
549
550// PathsForKey returns all paths through Map, 'mv', (in dot-notation) that terminate with the specified key.
551// Results can be used with ValuesForPath.
552func (mv Map) PathsForKey(key string) []string {
553	m := map[string]interface{}(mv)
554	breadbasket := make(map[string]bool, 0)
555	breadcrumbs := ""
556
557	hasKeyPath(breadcrumbs, m, key, breadbasket)
558	if len(breadbasket) == 0 {
559		return nil
560	}
561
562	// unpack map keys to return
563	res := make([]string, len(breadbasket))
564	var i int
565	for k := range breadbasket {
566		res[i] = k
567		i++
568	}
569
570	return res
571}
572
573// PathForKeyShortest extracts the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'..
574// Paths are strings using dot-notation.
575func (mv Map) PathForKeyShortest(key string) string {
576	paths := mv.PathsForKey(key)
577
578	lp := len(paths)
579	if lp == 0 {
580		return ""
581	}
582	if lp == 1 {
583		return paths[0]
584	}
585
586	shortest := paths[0]
587	shortestLen := len(strings.Split(shortest, "."))
588
589	for i := 1; i < len(paths); i++ {
590		vlen := len(strings.Split(paths[i], "."))
591		if vlen < shortestLen {
592			shortest = paths[i]
593			shortestLen = vlen
594		}
595	}
596
597	return shortest
598}
599
600// hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth
601// This is really just a breadcrumber that saves all trails that hit the prescribed 'key'.
602func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) {
603	switch iv.(type) {
604	case map[string]interface{}:
605		vv := iv.(map[string]interface{})
606		if _, ok := vv[key]; ok {
607			// create a new breadcrumb, intialized with the one we have
608			var nbc string
609			if crumbs == "" {
610				nbc = key
611			} else {
612				nbc = crumbs + "." + key
613			}
614			basket[nbc] = true
615		}
616		// walk on down the path, key could occur again at deeper node
617		for k, v := range vv {
618			// create a new breadcrumb, intialized with the one we have
619			var nbc string
620			if crumbs == "" {
621				nbc = k
622			} else {
623				nbc = crumbs + "." + k
624			}
625			hasKeyPath(nbc, v, key, basket)
626		}
627	case []interface{}:
628		// crumb-trail doesn't change, pass it on
629		for _, v := range iv.([]interface{}) {
630			hasKeyPath(crumbs, v, key, basket)
631		}
632	}
633}
634
635var PathNotExistError = errors.New("Path does not exist")
636
637// ValueForPath wraps ValuesFor Path and returns the first value returned.
638// If no value is found it returns 'nil' and PathNotExistError.
639func (mv Map) ValueForPath(path string) (interface{}, error) {
640	vals, err := mv.ValuesForPath(path)
641	if err != nil {
642		return nil, err
643	}
644	if len(vals) == 0 {
645		return nil, PathNotExistError
646	}
647	return vals[0], nil
648}
649
650// ValuesForPathString returns the first found value for the path as a string.
651func (mv Map) ValueForPathString(path string) (string, error) {
652	vals, err := mv.ValuesForPath(path)
653	if err != nil {
654		return "", err
655	}
656	if len(vals) == 0 {
657		return "", errors.New("ValueForPath: path not found")
658	}
659	val := vals[0]
660	return fmt.Sprintf("%v", val), nil
661}
662
663// ValueOrEmptyForPathString returns the first found value for the path as a string.
664// If the path is not found then it returns an empty string.
665func (mv Map) ValueOrEmptyForPathString(path string) string {
666	str, _ := mv.ValueForPathString(path)
667	return str
668}
669