1// Copyright 2012-2016, 2018-2019 Charles Banning. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file
4
5// xml.go - basically the core of X2j for map[string]interface{} values.
6//          NewMapXml, NewMapXmlReader, mv.Xml, mv.XmlWriter
7// see x2j and j2x for wrappers to provide end-to-end transformation of XML and JSON messages.
8
9package mxj
10
11import (
12	"bytes"
13	"encoding/json"
14	"encoding/xml"
15	"errors"
16	"fmt"
17	"io"
18	"reflect"
19	"sort"
20	"strconv"
21	"strings"
22	"time"
23)
24
25// ------------------- NewMapXml & NewMapXmlReader ... -------------------------
26
27// If XmlCharsetReader != nil, it will be used to decode the XML, if required.
28// Note: if CustomDecoder != nil, then XmlCharsetReader is ignored;
29// set the CustomDecoder attribute instead.
30//   import (
31//	     charset "code.google.com/p/go-charset/charset"
32//	     github.com/clbanning/mxj
33//	 )
34//   ...
35//   mxj.XmlCharsetReader = charset.NewReader
36//   m, merr := mxj.NewMapXml(xmlValue)
37var XmlCharsetReader func(charset string, input io.Reader) (io.Reader, error)
38
39// NewMapXml - convert a XML doc into a Map
40// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
41//	If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
42//
43//	Converting XML to JSON is a simple as:
44//		...
45//		mapVal, merr := mxj.NewMapXml(xmlVal)
46//		if merr != nil {
47//			// handle error
48//		}
49//		jsonVal, jerr := mapVal.Json()
50//		if jerr != nil {
51//			// handle error
52//		}
53//
54//	NOTES:
55//	   1. Declarations, directives, process instructions and comments are NOT parsed.
56//	   2. The 'xmlVal' will be parsed looking for an xml.StartElement, so BOM and other
57//	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
58//	   3. If CoerceKeysToLower() has been called, then all key values will be lower case.
59//	   4. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
60//	   5. If DisableTrimWhiteSpace(b bool) has been called, then all values will be trimmed or not. 'true' by default.
61func NewMapXml(xmlVal []byte, cast ...bool) (Map, error) {
62	var r bool
63	if len(cast) == 1 {
64		r = cast[0]
65	}
66	return xmlToMap(xmlVal, r)
67}
68
69// Get next XML doc from an io.Reader as a Map value.  Returns Map value.
70//	NOTES:
71//	   1. Declarations, directives, process instructions and comments are NOT parsed.
72//	   2. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other
73//	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
74//	   3. If CoerceKeysToLower() has been called, then all key values will be lower case.
75//	   4. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
76func NewMapXmlReader(xmlReader io.Reader, cast ...bool) (Map, error) {
77	var r bool
78	if len(cast) == 1 {
79		r = cast[0]
80	}
81
82	// We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
83	// will wrap it in a bufio.Reader and seek on the file beyond where the
84	// xml.Decoder parses!
85	if _, ok := xmlReader.(io.ByteReader); !ok {
86		xmlReader = myByteReader(xmlReader) // see code at EOF
87	}
88
89	// build the map
90	return xmlReaderToMap(xmlReader, r)
91}
92
93// Get next XML doc from an io.Reader as a Map value.  Returns Map value and slice with the raw XML.
94//	NOTES:
95//	   1. Declarations, directives, process instructions and comments are NOT parsed.
96//	   2. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
97//	      using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
98//	      See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
99//	      data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
100//	      you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
101//	   3. The 'raw' return value may be larger than the XML text value.
102//	   4. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other
103//	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
104//	   5. If CoerceKeysToLower() has been called, then all key values will be lower case.
105//	   6. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
106func NewMapXmlReaderRaw(xmlReader io.Reader, cast ...bool) (Map, []byte, error) {
107	var r bool
108	if len(cast) == 1 {
109		r = cast[0]
110	}
111	// create TeeReader so we can retrieve raw XML
112	buf := make([]byte, 0)
113	wb := bytes.NewBuffer(buf)
114	trdr := myTeeReader(xmlReader, wb) // see code at EOF
115
116	m, err := xmlReaderToMap(trdr, r)
117
118	// retrieve the raw XML that was decoded
119	b := wb.Bytes()
120
121	if err != nil {
122		return nil, b, err
123	}
124
125	return m, b, nil
126}
127
128// xmlReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
129func xmlReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
130	// parse the Reader
131	p := xml.NewDecoder(rdr)
132	if CustomDecoder != nil {
133		useCustomDecoder(p)
134	} else {
135		p.CharsetReader = XmlCharsetReader
136	}
137	return xmlToMapParser("", nil, p, r)
138}
139
140// xmlToMap - convert a XML doc into map[string]interface{} value
141func xmlToMap(doc []byte, r bool) (map[string]interface{}, error) {
142	b := bytes.NewReader(doc)
143	p := xml.NewDecoder(b)
144	if CustomDecoder != nil {
145		useCustomDecoder(p)
146	} else {
147		p.CharsetReader = XmlCharsetReader
148	}
149	return xmlToMapParser("", nil, p, r)
150}
151
152// ===================================== where the work happens =============================
153
154// PrependAttrWithHyphen. Prepend attribute tags with a hyphen.
155// Default is 'true'. (Not applicable to NewMapXmlSeq(), mv.XmlSeq(), etc.)
156//	Note:
157//		If 'false', unmarshaling and marshaling is not symmetric. Attributes will be
158//		marshal'd as <attr_tag>attr</attr_tag> and may be part of a list.
159func PrependAttrWithHyphen(v bool) {
160	if v {
161		attrPrefix = "-"
162		lenAttrPrefix = len(attrPrefix)
163		return
164	}
165	attrPrefix = ""
166	lenAttrPrefix = len(attrPrefix)
167}
168
169// Include sequence id with inner tags. - per Sean Murphy, murphysean84@gmail.com.
170var includeTagSeqNum bool
171
172// IncludeTagSeqNum - include a "_seq":N key:value pair with each inner tag, denoting
173// its position when parsed. This is of limited usefulness, since list values cannot
174// be tagged with "_seq" without changing their depth in the Map.
175// So THIS SHOULD BE USED WITH CAUTION - see the test cases. Here's a sample of what
176// you get.
177/*
178		<Obj c="la" x="dee" h="da">
179			<IntObj id="3"/>
180			<IntObj1 id="1"/>
181			<IntObj id="2"/>
182			<StrObj>hello</StrObj>
183		</Obj>
184
185	parses as:
186
187		{
188		Obj:{
189			"-c":"la",
190			"-h":"da",
191			"-x":"dee",
192			"intObj":[
193				{
194					"-id"="3",
195					"_seq":"0" // if mxj.Cast is passed, then: "_seq":0
196				},
197				{
198					"-id"="2",
199					"_seq":"2"
200				}],
201			"intObj1":{
202				"-id":"1",
203				"_seq":"1"
204				},
205			"StrObj":{
206				"#text":"hello", // simple element value gets "#text" tag
207				"_seq":"3"
208				}
209			}
210		}
211*/
212func IncludeTagSeqNum(b ...bool) {
213	if len(b) == 0 {
214		includeTagSeqNum = !includeTagSeqNum
215	} else if len(b) == 1 {
216		includeTagSeqNum = b[0]
217	}
218}
219
220// all keys will be "lower case"
221var lowerCase bool
222
223// Coerce all tag values to keys in lower case.  This is useful if you've got sources with variable
224// tag capitalization, and you want to use m.ValuesForKeys(), etc., with the key or path spec
225// in lower case.
226//	CoerceKeysToLower() will toggle the coercion flag true|false - on|off
227//	CoerceKeysToLower(true|false) will set the coercion flag on|off
228//
229//	NOTE: only recognized by NewMapXml, NewMapXmlReader, and NewMapXmlReaderRaw functions as well as
230//	      the associated HandleXmlReader and HandleXmlReaderRaw.
231func CoerceKeysToLower(b ...bool) {
232	if len(b) == 0 {
233		lowerCase = !lowerCase
234	} else if len(b) == 1 {
235		lowerCase = b[0]
236	}
237}
238
239// disableTrimWhiteSpace sets if the white space should be removed or not
240var disableTrimWhiteSpace bool
241var trimRunes = "\t\r\b\n "
242
243// DisableTrimWhiteSpace set if the white space should be trimmed or not. By default white space is always trimmed. If
244// no argument is provided, trim white space will be disabled.
245func DisableTrimWhiteSpace(b ...bool) {
246	if len(b) == 0 {
247		disableTrimWhiteSpace = true
248	} else {
249		disableTrimWhiteSpace = b[0]
250	}
251
252	if disableTrimWhiteSpace {
253		trimRunes = "\t\r\b\n"
254	} else {
255		trimRunes = "\t\r\b\n "
256	}
257}
258
259// 25jun16: Allow user to specify the "prefix" character for XML attribute key labels.
260// We do this by replacing '`' constant with attrPrefix var, replacing useHyphen with attrPrefix = "",
261// and adding a SetAttrPrefix(s string) function.
262
263var attrPrefix string = `-` // the default
264var lenAttrPrefix int = 1   // the default
265
266// SetAttrPrefix changes the default, "-", to the specified value, s.
267// SetAttrPrefix("") is the same as PrependAttrWithHyphen(false).
268// (Not applicable for NewMapXmlSeq(), mv.XmlSeq(), etc.)
269func SetAttrPrefix(s string) {
270	attrPrefix = s
271	lenAttrPrefix = len(attrPrefix)
272}
273
274// 18jan17: Allows user to specify if the map keys should be in snake case instead
275// of the default hyphenated notation.
276var snakeCaseKeys bool
277
278// CoerceKeysToSnakeCase changes the default, false, to the specified value, b.
279// Note: the attribute prefix will be a hyphen, '-', or what ever string value has
280// been specified using SetAttrPrefix.
281func CoerceKeysToSnakeCase(b ...bool) {
282	if len(b) == 0 {
283		snakeCaseKeys = !snakeCaseKeys
284	} else if len(b) == 1 {
285		snakeCaseKeys = b[0]
286	}
287}
288
289// 10jan19: use of pull request #57 should be conditional - legacy code assumes
290// numeric values are float64.
291var castToInt bool
292
293// CastValuesToInt tries to coerce numeric valus to int64 or uint64 instead of the
294// default float64. Repeated calls with no argument will toggle this on/off, or this
295// handling will be set with the value of 'b'.
296func CastValuesToInt(b ...bool) {
297	if len(b) == 0 {
298		castToInt = !castToInt
299	} else if len(b) == 1 {
300		castToInt = b[0]
301	}
302}
303
304// 05feb17: support processing XMPP streams (issue #36)
305var handleXMPPStreamTag bool
306
307// HandleXMPPStreamTag causes decoder to parse XMPP <stream:stream> elements.
308// If called with no argument, XMPP stream element handling is toggled on/off.
309// (See xmppStream_test.go for example.)
310//	If called with NewMapXml, NewMapXmlReader, New MapXmlReaderRaw the "stream"
311//	element will be  returned as:
312//		map["stream"]interface{}{map[-<attrs>]interface{}}.
313//	If called with NewMapSeq, NewMapSeqReader, NewMapSeqReaderRaw the "stream"
314//	element will be returned as:
315//		map["stream:stream"]interface{}{map["#attr"]interface{}{map[string]interface{}}}
316//		where the "#attr" values have "#text" and "#seq" keys. (See NewMapXmlSeq.)
317func HandleXMPPStreamTag(b ...bool) {
318	if len(b) == 0 {
319		handleXMPPStreamTag = !handleXMPPStreamTag
320	} else if len(b) == 1 {
321		handleXMPPStreamTag = b[0]
322	}
323}
324
325// 21jan18 - decode all values as map["#text":value] (issue #56)
326var decodeSimpleValuesAsMap bool
327
328// DecodeSimpleValuesAsMap forces all values to be decoded as map["#text":<value>].
329// If called with no argument, the decoding is toggled on/off.
330//
331// By default the NewMapXml functions decode simple values without attributes as
332// map[<tag>:<value>]. This function causes simple values without attributes to be
333// decoded the same as simple values with attributes - map[<tag>:map["#text":<value>]].
334func DecodeSimpleValuesAsMap(b ...bool) {
335	if len(b) == 0 {
336		decodeSimpleValuesAsMap = !decodeSimpleValuesAsMap
337	} else if len(b) == 1 {
338		decodeSimpleValuesAsMap = b[0]
339	}
340}
341
342// xmlToMapParser (2015.11.12) - load a 'clean' XML doc into a map[string]interface{} directly.
343// A refactoring of xmlToTreeParser(), markDuplicate() and treeToMap() - here, all-in-one.
344// We've removed the intermediate *node tree with the allocation and subsequent rescanning.
345func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
346	if lowerCase {
347		skey = strings.ToLower(skey)
348	}
349	if snakeCaseKeys {
350		skey = strings.Replace(skey, "-", "_", -1)
351	}
352
353	// NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
354	// Unless 'skey' is a simple element w/o attributes, in which case the xml.CharData value is the value.
355	var n, na map[string]interface{}
356	var seq int // for includeTagSeqNum
357
358	// Allocate maps and load attributes, if any.
359	// NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
360	//       to get StartElement then recurse with skey==xml.StartElement.Name.Local
361	//       where we begin allocating map[string]interface{} values 'n' and 'na'.
362	if skey != "" {
363		n = make(map[string]interface{})  // old n
364		na = make(map[string]interface{}) // old n.nodes
365		if len(a) > 0 {
366			for _, v := range a {
367				if snakeCaseKeys {
368					v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
369				}
370				var key string
371				key = attrPrefix + v.Name.Local
372				if lowerCase {
373					key = strings.ToLower(key)
374				}
375				if xmlEscapeCharsDecoder { // per issue#84
376					v.Value = escapeChars(v.Value)
377				}
378				na[key] = cast(v.Value, r, key)
379			}
380		}
381	}
382	// Return XMPP <stream:stream> message.
383	if handleXMPPStreamTag && skey == "stream" {
384		n[skey] = na
385		return n, nil
386	}
387
388	for {
389		t, err := p.Token()
390		if err != nil {
391			if err != io.EOF {
392				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
393			}
394			return nil, err
395		}
396		switch t.(type) {
397		case xml.StartElement:
398			tt := t.(xml.StartElement)
399
400			// First call to xmlToMapParser() doesn't pass xml.StartElement - the map key.
401			// So when the loop is first entered, the first token is the root tag along
402			// with any attributes, which we process here.
403			//
404			// Subsequent calls to xmlToMapParser() will pass in tag+attributes for
405			// processing before getting the next token which is the element value,
406			// which is done above.
407			if skey == "" {
408				return xmlToMapParser(tt.Name.Local, tt.Attr, p, r)
409			}
410
411			// If not initializing the map, parse the element.
412			// len(nn) == 1, necessarily - it is just an 'n'.
413			nn, err := xmlToMapParser(tt.Name.Local, tt.Attr, p, r)
414			if err != nil {
415				return nil, err
416			}
417
418			// The nn map[string]interface{} value is a na[nn_key] value.
419			// We need to see if nn_key already exists - means we're parsing a list.
420			// This may require converting na[nn_key] value into []interface{} type.
421			// First, extract the key:val for the map - it's a singleton.
422			// Note:
423			// * if CoerceKeysToLower() called, then key will be lower case.
424			// * if CoerceKeysToSnakeCase() called, then key will be converted to snake case.
425			var key string
426			var val interface{}
427			for key, val = range nn {
428				break
429			}
430
431			// IncludeTagSeqNum requests that the element be augmented with a "_seq" sub-element.
432			// In theory, we don't need this if len(na) == 1. But, we don't know what might
433			// come next - we're only parsing forward.  So if you ask for 'includeTagSeqNum' you
434			// get it on every element. (Personally, I never liked this, but I added it on request
435			// and did get a $50 Amazon gift card in return - now we support it for backwards compatibility!)
436			if includeTagSeqNum {
437				switch val.(type) {
438				case []interface{}:
439					// noop - There's no clean way to handle this w/o changing message structure.
440				case map[string]interface{}:
441					val.(map[string]interface{})["_seq"] = seq // will overwrite an "_seq" XML tag
442					seq++
443				case interface{}: // a non-nil simple element: string, float64, bool
444					v := map[string]interface{}{"#text": val}
445					v["_seq"] = seq
446					seq++
447					val = v
448				}
449			}
450
451			// 'na' holding sub-elements of n.
452			// See if 'key' already exists.
453			// If 'key' exists, then this is a list, if not just add key:val to na.
454			if v, ok := na[key]; ok {
455				var a []interface{}
456				switch v.(type) {
457				case []interface{}:
458					a = v.([]interface{})
459				default: // anything else - note: v.(type) != nil
460					a = []interface{}{v}
461				}
462				a = append(a, val)
463				na[key] = a
464			} else {
465				na[key] = val // save it as a singleton
466			}
467		case xml.EndElement:
468			// len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
469			if len(n) == 0 {
470				// If len(na)==0 we have an empty element == "";
471				// it has no xml.Attr nor xml.CharData.
472				// Note: in original node-tree parser, val defaulted to "";
473				// so we always had the default if len(node.nodes) == 0.
474				if len(na) > 0 {
475					n[skey] = na
476				} else {
477					n[skey] = "" // empty element
478				}
479			} else if len(n) == 1 && len(na) > 0 {
480				// it's a simple element w/ no attributes w/ subelements
481				for _, v := range n {
482					na["#text"] = v
483				}
484				n[skey] = na
485			}
486			return n, nil
487		case xml.CharData:
488			// clean up possible noise
489			tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
490			if xmlEscapeCharsDecoder { // issue#84
491				tt = escapeChars(tt)
492			}
493			if len(tt) > 0 {
494				if len(na) > 0 || decodeSimpleValuesAsMap {
495					na["#text"] = cast(tt, r, "#text")
496				} else if skey != "" {
497					n[skey] = cast(tt, r, skey)
498				} else {
499					// per Adrian (http://www.adrianlungu.com/) catch stray text
500					// in decoder stream -
501					// https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
502					// NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
503					// a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
504					continue
505				}
506			}
507		default:
508			// noop
509		}
510	}
511}
512
513var castNanInf bool
514
515// Cast "Nan", "Inf", "-Inf" XML values to 'float64'.
516// By default, these values will be decoded as 'string'.
517func CastNanInf(b ...bool) {
518	if len(b) == 0 {
519		castNanInf = !castNanInf
520	} else if len(b) == 1 {
521		castNanInf = b[0]
522	}
523}
524
525// cast - try to cast string values to bool or float64
526// 't' is the tag key that can be checked for 'not-casting'
527func cast(s string, r bool, t string) interface{} {
528	if checkTagToSkip != nil && t != "" && checkTagToSkip(t) {
529		// call the check-function here with 't[0]'
530		// if 'true' return s
531		return s
532	}
533
534	if r {
535		// handle nan and inf
536		if !castNanInf {
537			switch strings.ToLower(s) {
538			case "nan", "inf", "-inf":
539				return s
540			}
541		}
542
543		// handle numeric strings ahead of boolean
544		if castToInt {
545			if f, err := strconv.ParseInt(s, 10, 64); err == nil {
546				return f
547			}
548			if f, err := strconv.ParseUint(s, 10, 64); err == nil {
549				return f
550			}
551		}
552
553		if castToFloat {
554			if f, err := strconv.ParseFloat(s, 64); err == nil {
555				return f
556			}
557		}
558
559		// ParseBool treats "1"==true & "0"==false, we've already scanned those
560		// values as float64. See if value has 't' or 'f' as initial screen to
561		// minimize calls to ParseBool; also, see if len(s) < 6.
562		if castToBool {
563			if len(s) > 0 && len(s) < 6 {
564				switch s[:1] {
565				case "t", "T", "f", "F":
566					if b, err := strconv.ParseBool(s); err == nil {
567						return b
568					}
569				}
570			}
571		}
572	}
573	return s
574}
575
576// pull request, #59
577var castToFloat = true
578
579// CastValuesToFloat can be used to skip casting to float64 when
580// "cast" argument is 'true' in NewMapXml, etc.
581// Default is true.
582func CastValuesToFloat(b ...bool) {
583	if len(b) == 0 {
584		castToFloat = !castToFloat
585	} else if len(b) == 1 {
586		castToFloat = b[0]
587	}
588}
589
590var castToBool = true
591
592// CastValuesToBool can be used to skip casting to bool when
593// "cast" argument is 'true' in NewMapXml, etc.
594// Default is true.
595func CastValuesToBool(b ...bool) {
596	if len(b) == 0 {
597		castToBool = !castToBool
598	} else if len(b) == 1 {
599		castToBool = b[0]
600	}
601}
602
603// checkTagToSkip - switch to address Issue #58
604
605var checkTagToSkip func(string) bool
606
607// SetCheckTagToSkipFunc registers function to test whether the value
608// for a tag should be cast to bool or float64 when "cast" argument is 'true'.
609// (Dot tag path notation is not supported.)
610// NOTE: key may be "#text" if it's a simple element with attributes
611//       or "decodeSimpleValuesAsMap == true".
612// NOTE: does not apply to NewMapXmlSeq... functions.
613func SetCheckTagToSkipFunc(fn func(string) bool) {
614	checkTagToSkip = fn
615}
616
617// ------------------ END: NewMapXml & NewMapXmlReader -------------------------
618
619// ------------------ mv.Xml & mv.XmlWriter - from j2x ------------------------
620
621const (
622	DefaultRootTag = "doc"
623)
624
625var useGoXmlEmptyElemSyntax bool
626
627// XmlGoEmptyElemSyntax() - <tag ...></tag> rather than <tag .../>.
628//	Go's encoding/xml package marshals empty XML elements as <tag ...></tag>.  By default this package
629//	encodes empty elements as <tag .../>.  If you're marshaling Map values that include structures
630//	(which are passed to xml.Marshal for encoding), this will let you conform to the standard package.
631func XmlGoEmptyElemSyntax() {
632	useGoXmlEmptyElemSyntax = true
633}
634
635// XmlDefaultEmptyElemSyntax() - <tag .../> rather than <tag ...></tag>.
636// Return XML encoding for empty elements to the default package setting.
637// Reverses effect of XmlGoEmptyElemSyntax().
638func XmlDefaultEmptyElemSyntax() {
639	useGoXmlEmptyElemSyntax = false
640}
641
642// ------- issue #88 ----------
643// xmlCheckIsValid set switch to force decoding the encoded XML to
644// see if it is valid XML.
645var xmlCheckIsValid bool
646
647// XmlCheckIsValid forces the encoded XML to be checked for validity.
648func XmlCheckIsValid(b ...bool) {
649	if len(b) == 1 {
650		xmlCheckIsValid = b[0]
651		return
652	}
653	xmlCheckIsValid = !xmlCheckIsValid
654}
655
656// Encode a Map as XML.  The companion of NewMapXml().
657// The following rules apply.
658//    - The key label "#text" is treated as the value for a simple element with attributes.
659//    - Map keys that begin with a hyphen, '-', are interpreted as attributes.
660//      It is an error if the attribute doesn't have a []byte, string, number, or boolean value.
661//    - Map value type encoding:
662//          > string, bool, float64, int, int32, int64, float32: per "%v" formating
663//          > []bool, []uint8: by casting to string
664//          > structures, etc.: handed to xml.Marshal() - if there is an error, the element
665//            value is "UNKNOWN"
666//    - Elements with only attribute values or are null are terminated using "/>".
667//    - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
668//      Thus, `{ "key":"value" }` encodes as "<key>value</key>".
669//    - To encode empty elements in a syntax consistent with encoding/xml call UseGoXmlEmptyElementSyntax().
670// The attributes tag=value pairs are alphabetized by "tag".  Also, when encoding map[string]interface{} values -
671// complex elements, etc. - the key:value pairs are alphabetized by key so the resulting tags will appear sorted.
672func (mv Map) Xml(rootTag ...string) ([]byte, error) {
673	m := map[string]interface{}(mv)
674	var err error
675	b := new(bytes.Buffer)
676	p := new(pretty) // just a stub
677
678	if len(m) == 1 && len(rootTag) == 0 {
679		for key, value := range m {
680			// if it an array, see if all values are map[string]interface{}
681			// we force a new root tag if we'll end up with no key:value in the list
682			// so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
683			switch value.(type) {
684			case []interface{}:
685				for _, v := range value.([]interface{}) {
686					switch v.(type) {
687					case map[string]interface{}: // noop
688					default: // anything else
689						err = marshalMapToXmlIndent(false, b, DefaultRootTag, m, p)
690						goto done
691					}
692				}
693			}
694			err = marshalMapToXmlIndent(false, b, key, value, p)
695		}
696	} else if len(rootTag) == 1 {
697		err = marshalMapToXmlIndent(false, b, rootTag[0], m, p)
698	} else {
699		err = marshalMapToXmlIndent(false, b, DefaultRootTag, m, p)
700	}
701done:
702	if xmlCheckIsValid {
703		d := xml.NewDecoder(bytes.NewReader(b.Bytes()))
704		for {
705			_, err = d.Token()
706			if err == io.EOF {
707				err = nil
708				break
709			} else if err != nil {
710				return nil, err
711			}
712		}
713	}
714	return b.Bytes(), err
715}
716
717// The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
718// The names will also provide a key for the number of return arguments.
719
720// Writes the Map as  XML on the Writer.
721// See Xml() for encoding rules.
722func (mv Map) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
723	x, err := mv.Xml(rootTag...)
724	if err != nil {
725		return err
726	}
727
728	_, err = xmlWriter.Write(x)
729	return err
730}
731
732// Writes the Map as  XML on the Writer. []byte is the raw XML that was written.
733// See Xml() for encoding rules.
734/*
735func (mv Map) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
736	x, err := mv.Xml(rootTag...)
737	if err != nil {
738		return x, err
739	}
740
741	_, err = xmlWriter.Write(x)
742	return x, err
743}
744*/
745
746// Writes the Map as pretty XML on the Writer.
747// See Xml() for encoding rules.
748func (mv Map) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
749	x, err := mv.XmlIndent(prefix, indent, rootTag...)
750	if err != nil {
751		return err
752	}
753
754	_, err = xmlWriter.Write(x)
755	return err
756}
757
758// Writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
759// See Xml() for encoding rules.
760/*
761func (mv Map) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
762	x, err := mv.XmlIndent(prefix, indent, rootTag...)
763	if err != nil {
764		return x, err
765	}
766
767	_, err = xmlWriter.Write(x)
768	return x, err
769}
770*/
771
772// -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
773
774// --------------  Handle XML stream by processing Map value --------------------
775
776// Default poll delay to keep Handler from spinning on an open stream
777// like sitting on os.Stdin waiting for imput.
778var xhandlerPollInterval = time.Millisecond
779
780// Bulk process XML using handlers that process a Map value.
781//	'rdr' is an io.Reader for XML (stream)
782//	'mapHandler' is the Map processor. Return of 'false' stops io.Reader processing.
783//	'errHandler' is the error processor. Return of 'false' stops io.Reader processing and returns the error.
784//	Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized.
785//	      This means that you can stop reading the file on error or after processing a particular message.
786//	      To have reading and handling run concurrently, pass argument to a go routine in handler and return 'true'.
787func HandleXmlReader(xmlReader io.Reader, mapHandler func(Map) bool, errHandler func(error) bool) error {
788	var n int
789	for {
790		m, merr := NewMapXmlReader(xmlReader)
791		n++
792
793		// handle error condition with errhandler
794		if merr != nil && merr != io.EOF {
795			merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error())
796			if ok := errHandler(merr); !ok {
797				// caused reader termination
798				return merr
799			}
800			continue
801		}
802
803		// pass to maphandler
804		if len(m) != 0 {
805			if ok := mapHandler(m); !ok {
806				break
807			}
808		} else if merr != io.EOF {
809			time.Sleep(xhandlerPollInterval)
810		}
811
812		if merr == io.EOF {
813			break
814		}
815	}
816	return nil
817}
818
819// Bulk process XML using handlers that process a Map value and the raw XML.
820//	'rdr' is an io.Reader for XML (stream)
821//	'mapHandler' is the Map and raw XML - []byte - processor. Return of 'false' stops io.Reader processing.
822//	'errHandler' is the error and raw XML processor. Return of 'false' stops io.Reader processing and returns the error.
823//	Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized.
824//	      This means that you can stop reading the file on error or after processing a particular message.
825//	      To have reading and handling run concurrently, pass argument(s) to a go routine in handler and return 'true'.
826//	See NewMapXmlReaderRaw for comment on performance associated with retrieving raw XML from a Reader.
827func HandleXmlReaderRaw(xmlReader io.Reader, mapHandler func(Map, []byte) bool, errHandler func(error, []byte) bool) error {
828	var n int
829	for {
830		m, raw, merr := NewMapXmlReaderRaw(xmlReader)
831		n++
832
833		// handle error condition with errhandler
834		if merr != nil && merr != io.EOF {
835			merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error())
836			if ok := errHandler(merr, raw); !ok {
837				// caused reader termination
838				return merr
839			}
840			continue
841		}
842
843		// pass to maphandler
844		if len(m) != 0 {
845			if ok := mapHandler(m, raw); !ok {
846				break
847			}
848		} else if merr != io.EOF {
849			time.Sleep(xhandlerPollInterval)
850		}
851
852		if merr == io.EOF {
853			break
854		}
855	}
856	return nil
857}
858
859// ----------------- END: Handle XML stream by processing Map value --------------
860
861// --------  a hack of io.TeeReader ... need one that's an io.ByteReader for xml.NewDecoder() ----------
862
863// This is a clone of io.TeeReader with the additional method t.ReadByte().
864// Thus, this TeeReader is also an io.ByteReader.
865// This is necessary because xml.NewDecoder uses a ByteReader not a Reader. It appears to have been written
866// with bufio.Reader or bytes.Reader in mind ... not a generic io.Reader, which doesn't have to have ReadByte()..
867// If NewDecoder is passed a Reader that does not satisfy ByteReader() it wraps the Reader with
868// bufio.NewReader and uses ReadByte rather than Read that runs the TeeReader pipe logic.
869
870type teeReader struct {
871	r io.Reader
872	w io.Writer
873	b []byte
874}
875
876func myTeeReader(r io.Reader, w io.Writer) io.Reader {
877	b := make([]byte, 1)
878	return &teeReader{r, w, b}
879}
880
881// need for io.Reader - but we don't use it ...
882func (t *teeReader) Read(p []byte) (int, error) {
883	return 0, nil
884}
885
886func (t *teeReader) ReadByte() (byte, error) {
887	n, err := t.r.Read(t.b)
888	if n > 0 {
889		if _, err := t.w.Write(t.b[:1]); err != nil {
890			return t.b[0], err
891		}
892	}
893	return t.b[0], err
894}
895
896// For use with NewMapXmlReader & NewMapXmlSeqReader.
897type byteReader struct {
898	r io.Reader
899	b []byte
900}
901
902func myByteReader(r io.Reader) io.Reader {
903	b := make([]byte, 1)
904	return &byteReader{r, b}
905}
906
907// Need for io.Reader interface ...
908// Needed if reading a malformed http.Request.Body - issue #38.
909func (b *byteReader) Read(p []byte) (int, error) {
910	return b.r.Read(p)
911}
912
913func (b *byteReader) ReadByte() (byte, error) {
914	_, err := b.r.Read(b.b)
915	if len(b.b) > 0 {
916		return b.b[0], nil
917	}
918	var c byte
919	return c, err
920}
921
922// ----------------------- END: io.TeeReader hack -----------------------------------
923
924// ---------------------- XmlIndent - from j2x package ----------------------------
925
926// Encode a map[string]interface{} as a pretty XML string.
927// See Xml for encoding rules.
928func (mv Map) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
929	m := map[string]interface{}(mv)
930
931	var err error
932	b := new(bytes.Buffer)
933	p := new(pretty)
934	p.indent = indent
935	p.padding = prefix
936
937	if len(m) == 1 && len(rootTag) == 0 {
938		// this can extract the key for the single map element
939		// use it if it isn't a key for a list
940		for key, value := range m {
941			if _, ok := value.([]interface{}); ok {
942				err = marshalMapToXmlIndent(true, b, DefaultRootTag, m, p)
943			} else {
944				err = marshalMapToXmlIndent(true, b, key, value, p)
945			}
946		}
947	} else if len(rootTag) == 1 {
948		err = marshalMapToXmlIndent(true, b, rootTag[0], m, p)
949	} else {
950		err = marshalMapToXmlIndent(true, b, DefaultRootTag, m, p)
951	}
952	if xmlCheckIsValid {
953		d := xml.NewDecoder(bytes.NewReader(b.Bytes()))
954		for {
955			_, err = d.Token()
956			if err == io.EOF {
957				err = nil
958				break
959			} else if err != nil {
960				return nil, err
961			}
962		}
963	}
964	return b.Bytes(), err
965}
966
967type pretty struct {
968	indent   string
969	cnt      int
970	padding  string
971	mapDepth int
972	start    int
973}
974
975func (p *pretty) Indent() {
976	p.padding += p.indent
977	p.cnt++
978}
979
980func (p *pretty) Outdent() {
981	if p.cnt > 0 {
982		p.padding = p.padding[:len(p.padding)-len(p.indent)]
983		p.cnt--
984	}
985}
986
987// where the work actually happens
988// returns an error if an attribute is not atomic
989// NOTE: 01may20 - replaces mapToXmlIndent(); uses bytes.Buffer instead for string appends.
990func marshalMapToXmlIndent(doIndent bool, b *bytes.Buffer, key string, value interface{}, pp *pretty) error {
991	var err error
992	var endTag bool
993	var isSimple bool
994	var elen int
995	p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
996
997	// per issue #48, 18apr18 - try and coerce maps to map[string]interface{}
998	// Don't need for mapToXmlSeqIndent, since maps there are decoded by NewMapXmlSeq().
999	if reflect.ValueOf(value).Kind() == reflect.Map {
1000		switch value.(type) {
1001		case map[string]interface{}:
1002		default:
1003			val := make(map[string]interface{})
1004			vv := reflect.ValueOf(value)
1005			keys := vv.MapKeys()
1006			for _, k := range keys {
1007				val[fmt.Sprint(k)] = vv.MapIndex(k).Interface()
1008			}
1009			value = val
1010		}
1011	}
1012
1013	// 14jul20.  The following block of code has become something of a catch all for odd stuff
1014	// that might be passed in as a result of casting an arbitrary map[<T>]<T> to an mxj.Map
1015	// value and then call m.Xml or m.XmlIndent. See issue #71 (and #73) for such edge cases.
1016	switch value.(type) {
1017	// these types are handled during encoding
1018	case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32, json.Number:
1019	case []map[string]interface{}, []string, []float64, []bool, []int, []int32, []int64, []float32, []json.Number:
1020	case []interface{}:
1021	case nil:
1022		value = ""
1023	default:
1024		// see if value is a struct, if so marshal using encoding/xml package
1025		if reflect.ValueOf(value).Kind() == reflect.Struct {
1026			if v, err := xml.Marshal(value); err != nil {
1027				return err
1028			} else {
1029				value = string(v)
1030			}
1031		} else {
1032			// coerce eveything else into a string value
1033			value = fmt.Sprint(value)
1034		}
1035	}
1036
1037	// start the XML tag with required indentaton and padding
1038	if doIndent {
1039		if _, err = b.WriteString(p.padding); err != nil {
1040			return err
1041		}
1042	}
1043	switch value.(type) {
1044	case []interface{}:
1045	default:
1046		if _, err = b.WriteString(`<` + key); err != nil {
1047			return err
1048		}
1049	}
1050
1051	switch value.(type) {
1052	case map[string]interface{}:
1053		vv := value.(map[string]interface{})
1054		lenvv := len(vv)
1055		// scan out attributes - attribute keys have prepended attrPrefix
1056		attrlist := make([][2]string, len(vv))
1057		var n int
1058		var ss string
1059		for k, v := range vv {
1060			if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix {
1061				switch v.(type) {
1062				case string:
1063					if xmlEscapeChars {
1064						ss = escapeChars(v.(string))
1065					} else {
1066						ss = v.(string)
1067					}
1068					attrlist[n][0] = k[lenAttrPrefix:]
1069					attrlist[n][1] = ss
1070				case float64, bool, int, int32, int64, float32, json.Number:
1071					attrlist[n][0] = k[lenAttrPrefix:]
1072					attrlist[n][1] = fmt.Sprintf("%v", v)
1073				case []byte:
1074					if xmlEscapeChars {
1075						ss = escapeChars(string(v.([]byte)))
1076					} else {
1077						ss = string(v.([]byte))
1078					}
1079					attrlist[n][0] = k[lenAttrPrefix:]
1080					attrlist[n][1] = ss
1081				default:
1082					return fmt.Errorf("invalid attribute value for: %s:<%T>", k, v)
1083				}
1084				n++
1085			}
1086		}
1087		if n > 0 {
1088			attrlist = attrlist[:n]
1089			sort.Sort(attrList(attrlist))
1090			for _, v := range attrlist {
1091				if _, err = b.WriteString(` ` + v[0] + `="` + v[1] + `"`); err != nil {
1092					return err
1093				}
1094			}
1095		}
1096		// only attributes?
1097		if n == lenvv {
1098			if useGoXmlEmptyElemSyntax {
1099				if _, err = b.WriteString(`</` + key + ">"); err != nil {
1100					return err
1101				}
1102			} else {
1103				if _, err = b.WriteString(`/>`); err != nil {
1104					return err
1105				}
1106			}
1107			break
1108		}
1109
1110		// simple element? Note: '#text" is an invalid XML tag.
1111		isComplex := false
1112		if v, ok := vv["#text"]; ok && n+1 == lenvv {
1113			// just the value and attributes
1114			switch v.(type) {
1115			case string:
1116				if xmlEscapeChars {
1117					v = escapeChars(v.(string))
1118				} else {
1119					v = v.(string)
1120				}
1121			case []byte:
1122				if xmlEscapeChars {
1123					v = escapeChars(string(v.([]byte)))
1124				} else {
1125					v = string(v.([]byte))
1126				}
1127			}
1128			if _, err = b.WriteString(">" + fmt.Sprintf("%v", v)); err != nil {
1129				return err
1130			}
1131			endTag = true
1132			elen = 1
1133			isSimple = true
1134			break
1135		} else if ok {
1136			// need to handle when there are subelements in addition to the simple element value
1137			// issue #90
1138			switch v.(type) {
1139			case string:
1140				if xmlEscapeChars {
1141					v = escapeChars(v.(string))
1142				} else {
1143					v = v.(string)
1144				}
1145			case []byte:
1146				if xmlEscapeChars {
1147					v = escapeChars(string(v.([]byte)))
1148				} else {
1149					v = string(v.([]byte))
1150				}
1151			}
1152			if _, err = b.WriteString(">" + fmt.Sprintf("%v", v)); err != nil {
1153				return err
1154			}
1155			isComplex = true
1156		}
1157
1158		// close tag with possible attributes
1159		if !isComplex {
1160			if _, err = b.WriteString(">"); err != nil {
1161				return err
1162			}
1163		}
1164		if doIndent {
1165			// *s += "\n"
1166			if _, err = b.WriteString("\n"); err != nil {
1167				return err
1168			}
1169		}
1170		// something more complex
1171		p.mapDepth++
1172		// extract the map k:v pairs and sort on key
1173		elemlist := make([][2]interface{}, len(vv))
1174		n = 0
1175		for k, v := range vv {
1176			if k == "#text" {
1177				// simple element handled above
1178				continue
1179			}
1180			if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix {
1181				continue
1182			}
1183			elemlist[n][0] = k
1184			elemlist[n][1] = v
1185			n++
1186		}
1187		elemlist = elemlist[:n]
1188		sort.Sort(elemList(elemlist))
1189		var i int
1190		for _, v := range elemlist {
1191			switch v[1].(type) {
1192			case []interface{}:
1193			default:
1194				if i == 0 && doIndent {
1195					p.Indent()
1196				}
1197			}
1198			i++
1199			if err := marshalMapToXmlIndent(doIndent, b, v[0].(string), v[1], p); err != nil {
1200				return err
1201			}
1202			switch v[1].(type) {
1203			case []interface{}: // handled in []interface{} case
1204			default:
1205				if doIndent {
1206					p.Outdent()
1207				}
1208			}
1209			i--
1210		}
1211		p.mapDepth--
1212		endTag = true
1213		elen = 1 // we do have some content ...
1214	case []interface{}:
1215		// special case - found during implementing Issue #23
1216		if len(value.([]interface{})) == 0 {
1217			if doIndent {
1218				if _, err = b.WriteString(p.padding + p.indent); err != nil {
1219					return err
1220				}
1221			}
1222			if _, err = b.WriteString("<" + key); err != nil {
1223				return err
1224			}
1225			elen = 0
1226			endTag = true
1227			break
1228		}
1229		for _, v := range value.([]interface{}) {
1230			if doIndent {
1231				p.Indent()
1232			}
1233			if err := marshalMapToXmlIndent(doIndent, b, key, v, p); err != nil {
1234				return err
1235			}
1236			if doIndent {
1237				p.Outdent()
1238			}
1239		}
1240		return nil
1241	case []string:
1242		// This was added by https://github.com/slotix ... not a type that
1243		// would be encountered if mv generated from NewMapXml, NewMapJson.
1244		// Could be encountered in AnyXml(), so we'll let it stay, though
1245		// it should be merged with case []interface{}, above.
1246		//quick fix for []string type
1247		//[]string should be treated exaclty as []interface{}
1248		if len(value.([]string)) == 0 {
1249			if doIndent {
1250				if _, err = b.WriteString(p.padding + p.indent); err != nil {
1251					return err
1252				}
1253			}
1254			if _, err = b.WriteString("<" + key); err != nil {
1255				return err
1256			}
1257			elen = 0
1258			endTag = true
1259			break
1260		}
1261		for _, v := range value.([]string) {
1262			if doIndent {
1263				p.Indent()
1264			}
1265			if err := marshalMapToXmlIndent(doIndent, b, key, v, p); err != nil {
1266				return err
1267			}
1268			if doIndent {
1269				p.Outdent()
1270			}
1271		}
1272		return nil
1273	case nil:
1274		// terminate the tag
1275		if doIndent {
1276			// *s += p.padding
1277			if _, err = b.WriteString(p.padding); err != nil {
1278				return err
1279			}
1280		}
1281		if _, err = b.WriteString("<" + key); err != nil {
1282			return err
1283		}
1284		endTag, isSimple = true, true
1285		break
1286	default: // handle anything - even goofy stuff
1287		elen = 0
1288		switch value.(type) {
1289		case string:
1290			v := value.(string)
1291			if xmlEscapeChars {
1292				v = escapeChars(v)
1293			}
1294			elen = len(v)
1295			if elen > 0 {
1296				// *s += ">" + v
1297				if _, err = b.WriteString(">" + v); err != nil {
1298					return err
1299				}
1300			}
1301		case float64, bool, int, int32, int64, float32, json.Number:
1302			v := fmt.Sprintf("%v", value)
1303			elen = len(v) // always > 0
1304			if _, err = b.WriteString(">" + v); err != nil {
1305				return err
1306			}
1307		case []byte: // NOTE: byte is just an alias for uint8
1308			// similar to how xml.Marshal handles []byte structure members
1309			v := string(value.([]byte))
1310			if xmlEscapeChars {
1311				v = escapeChars(v)
1312			}
1313			elen = len(v)
1314			if elen > 0 {
1315				// *s += ">" + v
1316				if _, err = b.WriteString(">" + v); err != nil {
1317					return err
1318				}
1319			}
1320		default:
1321			if _, err = b.WriteString(">"); err != nil {
1322				return err
1323			}
1324			var v []byte
1325			var err error
1326			if doIndent {
1327				v, err = xml.MarshalIndent(value, p.padding, p.indent)
1328			} else {
1329				v, err = xml.Marshal(value)
1330			}
1331			if err != nil {
1332				if _, err = b.WriteString(">UNKNOWN"); err != nil {
1333					return err
1334				}
1335			} else {
1336				elen = len(v)
1337				if elen > 0 {
1338					if _, err = b.Write(v); err != nil {
1339						return err
1340					}
1341				}
1342			}
1343		}
1344		isSimple = true
1345		endTag = true
1346	}
1347	if endTag {
1348		if doIndent {
1349			if !isSimple {
1350				if _, err = b.WriteString(p.padding); err != nil {
1351					return err
1352				}
1353			}
1354		}
1355		if elen > 0 || useGoXmlEmptyElemSyntax {
1356			if elen == 0 {
1357				if _, err = b.WriteString(">"); err != nil {
1358					return err
1359				}
1360			}
1361			if _, err = b.WriteString(`</` + key + ">"); err != nil {
1362				return err
1363			}
1364		} else {
1365			if _, err = b.WriteString(`/>`); err != nil {
1366				return err
1367			}
1368		}
1369	}
1370	if doIndent {
1371		if p.cnt > p.start {
1372			if _, err = b.WriteString("\n"); err != nil {
1373				return err
1374			}
1375		}
1376		p.Outdent()
1377	}
1378
1379	return nil
1380}
1381
1382// ============================ sort interface implementation =================
1383
1384type attrList [][2]string
1385
1386func (a attrList) Len() int {
1387	return len(a)
1388}
1389
1390func (a attrList) Swap(i, j int) {
1391	a[i], a[j] = a[j], a[i]
1392}
1393
1394func (a attrList) Less(i, j int) bool {
1395	return a[i][0] <= a[j][0]
1396}
1397
1398type elemList [][2]interface{}
1399
1400func (e elemList) Len() int {
1401	return len(e)
1402}
1403
1404func (e elemList) Swap(i, j int) {
1405	e[i], e[j] = e[j], e[i]
1406}
1407
1408func (e elemList) Less(i, j int) bool {
1409	return e[i][0].(string) <= e[j][0].(string)
1410}
1411