1// Copyright 2012-2014 Charles Banning. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file
4
5//	keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters.
6
7package mxj
8
9import (
10	"errors"
11	"fmt"
12	"strconv"
13	"strings"
14)
15
16// ----------------------------- get everything FOR a single key -------------------------
17
18const (
19	minArraySize = 32
20)
21
22var defaultArraySize int = minArraySize
23
24// Adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath().
25// This can have the effect of significantly reducing memory allocation-copy functions for large data sets.
26// Returns the initial buffer size.
27func SetArraySize(size int) int {
28	if size > minArraySize {
29		defaultArraySize = size
30	} else {
31		defaultArraySize = minArraySize
32	}
33	return defaultArraySize
34}
35
36// Return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match.
37// On error, the returned slice is 'nil'. NOTE: 'key' can be wildcard, "*".
38//   'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
39//             - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
40//             - For attributes prefix the label with a hyphen, '-', e.g., "-seq:3".
41//             - If the 'key' refers to a list, then "key:value" could select a list member of the list.
42//             - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
43//             - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
44//               exclusion critera - e.g., "!author:William T. Gaddis".
45//             - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
46func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) {
47	m := map[string]interface{}(mv)
48	var subKeyMap map[string]interface{}
49	if len(subkeys) > 0 {
50		var err error
51		subKeyMap, err = getSubKeyMap(subkeys...)
52		if err != nil {
53			return nil, err
54		}
55	}
56
57	ret := make([]interface{}, 0, defaultArraySize)
58	var cnt int
59	hasKey(m, key, &ret, &cnt, subKeyMap)
60	return ret[:cnt], nil
61}
62
63var KeyNotExistError = errors.New("Key does not exist")
64
65// ValueForKey is a wrapper on ValuesForKey.  It returns the first member of []interface{}, if any.
66// If there is no value, "nil, nil" is returned.
67func (mv Map) ValueForKey(key string, subkeys ...string) (interface{}, error) {
68	vals, err := mv.ValuesForKey(key, subkeys...)
69	if err != nil {
70		return nil, err
71	}
72	if len(vals) == 0 {
73		return nil, KeyNotExistError
74	}
75	return vals[0], nil
76}
77
78// hasKey - if the map 'key' exists append it to array
79//          if it doesn't do nothing except scan array and map values
80func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) {
81	// func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) {
82	switch iv.(type) {
83	case map[string]interface{}:
84		vv := iv.(map[string]interface{})
85		// see if the current value is of interest
86		if v, ok := vv[key]; ok {
87			switch v.(type) {
88			case map[string]interface{}:
89				if hasSubKeys(v, subkeys) {
90					*ret = append(*ret, v)
91					*cnt++
92				}
93			case []interface{}:
94				for _, av := range v.([]interface{}) {
95					if hasSubKeys(av, subkeys) {
96						*ret = append(*ret, av)
97						*cnt++
98					}
99				}
100			default:
101				if len(subkeys) == 0 {
102					*ret = append(*ret, v)
103					*cnt++
104				}
105			}
106		}
107
108		// wildcard case
109		if key == "*" {
110			for _, v := range vv {
111				switch v.(type) {
112				case map[string]interface{}:
113					if hasSubKeys(v, subkeys) {
114						*ret = append(*ret, v)
115						*cnt++
116					}
117				case []interface{}:
118					for _, av := range v.([]interface{}) {
119						if hasSubKeys(av, subkeys) {
120							*ret = append(*ret, av)
121							*cnt++
122						}
123					}
124				default:
125					if len(subkeys) == 0 {
126						*ret = append(*ret, v)
127						*cnt++
128					}
129				}
130			}
131		}
132
133		// scan the rest
134		for _, v := range vv {
135			hasKey(v, key, ret, cnt, subkeys)
136		}
137	case []interface{}:
138		for _, v := range iv.([]interface{}) {
139			hasKey(v, key, ret, cnt, subkeys)
140		}
141	}
142}
143
144// -----------------------  get everything for a node in the Map ---------------------------
145
146// Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.)
147// 2014.04.28 - implementation note.
148// Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion
149// of wildcards and unindexed arrays.  Embedding such logic into valuesForKeyPath() would have made the
150// code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead.
151
152// Retrieve all values for a path from the Map.  If len(returned_values) == 0, then no match.
153// On error, the returned array is 'nil'.
154//   'path' is a dot-separated path of key values.
155//          - If a node in the path is '*', then everything beyond is walked.
156//          - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" -
157//            even "*[2].*[0].field".
158//   'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
159//             - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
160//             - For attributes prefix the label with a hyphen, '-', e.g., "-seq:3".
161//             - If the 'path' refers to a list, then "tag:value" would return member of the list.
162//             - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
163//             - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
164//               exclusion critera - e.g., "!author:William T. Gaddis".
165//             - If val contains ":" symbol, use SetFieldSeparator to a unused symbol, perhaps "|".
166func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
167	// If there are no array indexes in path, use legacy ValuesForPath() logic.
168	if strings.Index(path, "[") < 0 {
169		return mv.oldValuesForPath(path, subkeys...)
170	}
171
172	var subKeyMap map[string]interface{}
173	if len(subkeys) > 0 {
174		var err error
175		subKeyMap, err = getSubKeyMap(subkeys...)
176		if err != nil {
177			return nil, err
178		}
179	}
180
181	keys, kerr := parsePath(path)
182	if kerr != nil {
183		return nil, kerr
184	}
185
186	vals, verr := valuesForArray(keys, mv)
187	if verr != nil {
188		return nil, verr // Vals may be nil, but return empty array.
189	}
190
191	// Need to handle subkeys ... only return members of vals that satisfy conditions.
192	retvals := make([]interface{}, 0)
193	for _, v := range vals {
194		if hasSubKeys(v, subKeyMap) {
195			retvals = append(retvals, v)
196		}
197	}
198	return retvals, nil
199}
200
201func valuesForArray(keys []*key, m Map) ([]interface{}, error) {
202	var tmppath string
203	var haveFirst bool
204	var vals []interface{}
205	var verr error
206
207	lastkey := len(keys) - 1
208	for i := 0; i <= lastkey; i++ {
209		if !haveFirst {
210			tmppath = keys[i].name
211			haveFirst = true
212		} else {
213			tmppath += "." + keys[i].name
214		}
215
216		// Look-ahead: explode wildcards and unindexed arrays.
217		// Need to handle un-indexed list recursively:
218		// e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]".
219		// Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ...
220		if !keys[i].isArray && i < lastkey && keys[i+1].isArray {
221			// Can't pass subkeys because we may not be at literal end of path.
222			vv, vverr := m.oldValuesForPath(tmppath)
223			if vverr != nil {
224				return nil, vverr
225			}
226			for _, v := range vv {
227				// See if we can walk the value.
228				am, ok := v.(map[string]interface{})
229				if !ok {
230					continue
231				}
232				// Work the backend.
233				nvals, nvalserr := valuesForArray(keys[i+1:], Map(am))
234				if nvalserr != nil {
235					return nil, nvalserr
236				}
237				vals = append(vals, nvals...)
238			}
239			break // have recursed the whole path - return
240		}
241
242		if keys[i].isArray || i == lastkey {
243			// Don't pass subkeys because may not be at literal end of path.
244			vals, verr = m.oldValuesForPath(tmppath)
245		} else {
246			continue
247		}
248		if verr != nil {
249			return nil, verr
250		}
251
252		if i == lastkey && !keys[i].isArray {
253			break
254		}
255
256		// Now we're looking at an array - supposedly.
257		// Is index in range of vals?
258		if len(vals) <= keys[i].position {
259			vals = nil
260			break
261		}
262
263		// Return the array member of interest, if at end of path.
264		if i == lastkey {
265			vals = vals[keys[i].position:(keys[i].position + 1)]
266			break
267		}
268
269		// Extract the array member of interest.
270		am := vals[keys[i].position:(keys[i].position + 1)]
271
272		// must be a map[string]interface{} value so we can keep walking the path
273		amm, ok := am[0].(map[string]interface{})
274		if !ok {
275			vals = nil
276			break
277		}
278
279		m = Map(amm)
280		haveFirst = false
281	}
282
283	return vals, nil
284}
285
286type key struct {
287	name     string
288	isArray  bool
289	position int
290}
291
292func parsePath(s string) ([]*key, error) {
293	keys := strings.Split(s, ".")
294
295	ret := make([]*key, 0)
296
297	for i := 0; i < len(keys); i++ {
298		if keys[i] == "" {
299			continue
300		}
301
302		newkey := new(key)
303		if strings.Index(keys[i], "[") < 0 {
304			newkey.name = keys[i]
305			ret = append(ret, newkey)
306			continue
307		}
308
309		p := strings.Split(keys[i], "[")
310		newkey.name = p[0]
311		p = strings.Split(p[1], "]")
312		if p[0] == "" { // no right bracket
313			return nil, fmt.Errorf("no right bracket on key index: %s", keys[i])
314		}
315		// convert p[0] to a int value
316		pos, nerr := strconv.ParseInt(p[0], 10, 32)
317		if nerr != nil {
318			return nil, fmt.Errorf("cannot convert index to int value: %s", p[0])
319		}
320		newkey.position = int(pos)
321		newkey.isArray = true
322		ret = append(ret, newkey)
323	}
324
325	return ret, nil
326}
327
328// legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'.
329func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
330	m := map[string]interface{}(mv)
331	var subKeyMap map[string]interface{}
332	if len(subkeys) > 0 {
333		var err error
334		subKeyMap, err = getSubKeyMap(subkeys...)
335		if err != nil {
336			return nil, err
337		}
338	}
339
340	keys := strings.Split(path, ".")
341	if keys[len(keys)-1] == "" {
342		keys = keys[:len(keys)-1]
343	}
344	ivals := make([]interface{}, 0, defaultArraySize)
345	var cnt int
346	valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap)
347	return ivals[:cnt], nil
348}
349
350func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) {
351	lenKeys := len(keys)
352
353	// load 'm' values into 'ret'
354	// expand any lists
355	if lenKeys == 0 {
356		switch m.(type) {
357		case map[string]interface{}:
358			if subkeys != nil {
359				if ok := hasSubKeys(m, subkeys); !ok {
360					return
361				}
362			}
363			*ret = append(*ret, m)
364			*cnt++
365		case []interface{}:
366			for i, v := range m.([]interface{}) {
367				if subkeys != nil {
368					if ok := hasSubKeys(v, subkeys); !ok {
369						continue // only load list members with subkeys
370					}
371				}
372				*ret = append(*ret, (m.([]interface{}))[i])
373				*cnt++
374			}
375		default:
376			if subkeys != nil {
377				return // must be map[string]interface{} if there are subkeys
378			}
379			*ret = append(*ret, m)
380			*cnt++
381		}
382		return
383	}
384
385	// key of interest
386	key := keys[0]
387	switch key {
388	case "*": // wildcard - scan all values
389		switch m.(type) {
390		case map[string]interface{}:
391			for _, v := range m.(map[string]interface{}) {
392				// valuesForKeyPath(ret, v, keys[1:], subkeys)
393				valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
394			}
395		case []interface{}:
396			for _, v := range m.([]interface{}) {
397				switch v.(type) {
398				// flatten out a list of maps - keys are processed
399				case map[string]interface{}:
400					for _, vv := range v.(map[string]interface{}) {
401						// valuesForKeyPath(ret, vv, keys[1:], subkeys)
402						valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
403					}
404				default:
405					// valuesForKeyPath(ret, v, keys[1:], subkeys)
406					valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
407				}
408			}
409		}
410	default: // key - must be map[string]interface{}
411		switch m.(type) {
412		case map[string]interface{}:
413			if v, ok := m.(map[string]interface{})[key]; ok {
414				// valuesForKeyPath(ret, v, keys[1:], subkeys)
415				valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
416			}
417		case []interface{}: // may be buried in list
418			for _, v := range m.([]interface{}) {
419				switch v.(type) {
420				case map[string]interface{}:
421					if vv, ok := v.(map[string]interface{})[key]; ok {
422						// valuesForKeyPath(ret, vv, keys[1:], subkeys)
423						valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
424					}
425				}
426			}
427		}
428	}
429}
430
431// hasSubKeys() - interface{} equality works for string, float64, bool
432// 'v' must be a map[string]interface{} value to have subkeys
433// 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard.
434func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool {
435	if len(subkeys) == 0 {
436		return true
437	}
438
439	switch v.(type) {
440	case map[string]interface{}:
441		// do all subKey name:value pairs match?
442		mv := v.(map[string]interface{})
443		for skey, sval := range subkeys {
444			isNotKey := false
445			if skey[:1] == "!" { // a NOT-key
446				skey = skey[1:]
447				isNotKey = true
448			}
449			vv, ok := mv[skey]
450			if !ok { // key doesn't exist
451				if isNotKey { // key not there, but that's what we want
452					if kv, ok := sval.(string); ok && kv == "*" {
453						continue
454					}
455				}
456				return false
457			}
458			// wildcard check
459			if kv, ok := sval.(string); ok && kv == "*" {
460				if isNotKey { // key is there, and we don't want it
461					return false
462				}
463				continue
464			}
465			switch sval.(type) {
466			case string:
467				if s, ok := vv.(string); ok && s == sval.(string) {
468					if isNotKey {
469						return false
470					}
471					continue
472				}
473			case bool:
474				if b, ok := vv.(bool); ok && b == sval.(bool) {
475					if isNotKey {
476						return false
477					}
478					continue
479				}
480			case float64:
481				if f, ok := vv.(float64); ok && f == sval.(float64) {
482					if isNotKey {
483						return false
484					}
485					continue
486				}
487			}
488			// key there but didn't match subkey value
489			if isNotKey { // that's what we want
490				continue
491			}
492			return false
493		}
494		// all subkeys matched
495		return true
496	}
497
498	// not a map[string]interface{} value, can't have subkeys
499	return false
500}
501
502// Generate map of key:value entries as map[string]string.
503//	'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'.
504//	If len(kv) == 0, the return is (nil, nil).
505func getSubKeyMap(kv ...string) (map[string]interface{}, error) {
506	if len(kv) == 0 {
507		return nil, nil
508	}
509	m := make(map[string]interface{}, 0)
510	for _, v := range kv {
511		vv := strings.Split(v, fieldSep)
512		switch len(vv) {
513		case 2:
514			m[vv[0]] = interface{}(vv[1])
515		case 3:
516			switch vv[2] {
517			case "string", "char", "text":
518				m[vv[0]] = interface{}(vv[1])
519			case "bool", "boolean":
520				// ParseBool treats "1"==true & "0"==false
521				b, err := strconv.ParseBool(vv[1])
522				if err != nil {
523					return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1])
524				}
525				m[vv[0]] = interface{}(b)
526			case "float", "float64", "num", "number", "numeric":
527				f, err := strconv.ParseFloat(vv[1], 64)
528				if err != nil {
529					return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1])
530				}
531				m[vv[0]] = interface{}(f)
532			default:
533				return nil, fmt.Errorf("unknown subkey conversion spec: %s", v)
534			}
535		default:
536			return nil, fmt.Errorf("unknown subkey spec: %s", v)
537		}
538	}
539	return m, nil
540}
541
542// -------------------------------  END of valuesFor ... ----------------------------
543
544// ----------------------- locate where a key value is in the tree -------------------
545
546//----------------------------- find all paths to a key --------------------------------
547
548// Get all paths through Map, 'mv', (in dot-notation) that terminate with the specified key.
549// Results can be used with ValuesForPath.
550func (mv Map) PathsForKey(key string) []string {
551	m := map[string]interface{}(mv)
552	breadbasket := make(map[string]bool, 0)
553	breadcrumbs := ""
554
555	hasKeyPath(breadcrumbs, m, key, breadbasket)
556	if len(breadbasket) == 0 {
557		return nil
558	}
559
560	// unpack map keys to return
561	res := make([]string, len(breadbasket))
562	var i int
563	for k := range breadbasket {
564		res[i] = k
565		i++
566	}
567
568	return res
569}
570
571// Extract the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'..
572// Paths are strings using dot-notation.
573func (mv Map) PathForKeyShortest(key string) string {
574	paths := mv.PathsForKey(key)
575
576	lp := len(paths)
577	if lp == 0 {
578		return ""
579	}
580	if lp == 1 {
581		return paths[0]
582	}
583
584	shortest := paths[0]
585	shortestLen := len(strings.Split(shortest, "."))
586
587	for i := 1; i < len(paths); i++ {
588		vlen := len(strings.Split(paths[i], "."))
589		if vlen < shortestLen {
590			shortest = paths[i]
591			shortestLen = vlen
592		}
593	}
594
595	return shortest
596}
597
598// hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth
599// This is really just a breadcrumber that saves all trails that hit the prescribed 'key'.
600func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) {
601	switch iv.(type) {
602	case map[string]interface{}:
603		vv := iv.(map[string]interface{})
604		if _, ok := vv[key]; ok {
605			// create a new breadcrumb, intialized with the one we have
606			var nbc string
607			if crumbs == "" {
608				nbc = key
609			} else {
610				nbc = crumbs + "." + key
611			}
612			basket[nbc] = true
613		}
614		// walk on down the path, key could occur again at deeper node
615		for k, v := range vv {
616			// create a new breadcrumb, intialized with the one we have
617			var nbc string
618			if crumbs == "" {
619				nbc = k
620			} else {
621				nbc = crumbs + "." + k
622			}
623			hasKeyPath(nbc, v, key, basket)
624		}
625	case []interface{}:
626		// crumb-trail doesn't change, pass it on
627		for _, v := range iv.([]interface{}) {
628			hasKeyPath(crumbs, v, key, basket)
629		}
630	}
631}
632
633var PathNotExistError = errors.New("Path does not exist")
634
635// ValueForPath wrap ValuesFor Path and returns the first value returned.
636// If no value is found it returns 'nil' and PathNotExistError.
637func (mv Map) ValueForPath(path string) (interface{}, error) {
638	vals, err := mv.ValuesForPath(path)
639	if err != nil {
640		return nil, err
641	}
642	if len(vals) == 0 {
643		return nil, PathNotExistError
644	}
645	return vals[0], nil
646}
647
648// Returns the first found value for the path as a string.
649func (mv Map) ValueForPathString(path string) (string, error) {
650	vals, err := mv.ValuesForPath(path)
651	if err != nil {
652		return "", err
653	}
654	if len(vals) == 0 {
655		return "", errors.New("ValueForPath: path not found")
656	}
657	val := vals[0]
658	switch str := val.(type) {
659	case string:
660		return str, nil
661	default:
662		return "", fmt.Errorf("ValueForPath: unsupported type: %T", str)
663	}
664}
665
666// Returns the first found value for the path as a string.
667// If the path is not found then it returns an empty string.
668func (mv Map) ValueOrEmptyForPathString(path string) string {
669	str, _ := mv.ValueForPathString(path)
670	return str
671}
672