1// Copyright 2012-2016 Charles Banning. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file
4
5// xml.go - basically the core of X2j for map[string]interface{} values.
6//          NewMapXml, NewMapXmlReader, mv.Xml, mv.XmlWriter
7// see x2j and j2x for wrappers to provide end-to-end transformation of XML and JSON messages.
8
9package mxj
10
11import (
12	"bytes"
13	"encoding/json"
14	"encoding/xml"
15	"errors"
16	"fmt"
17	"io"
18	"reflect"
19	"sort"
20	"strconv"
21	"strings"
22	"time"
23)
24
25// ------------------- NewMapXml & NewMapXmlReader ... -------------------------
26
27// If XmlCharsetReader != nil, it will be used to decode the XML, if required.
28// Note: if CustomDecoder != nil, then XmlCharsetReader is ignored;
29// set the CustomDecoder attribute instead.
30//   import (
31//	     charset "code.google.com/p/go-charset/charset"
32//	     github.com/clbanning/mxj
33//	 )
34//   ...
35//   mxj.XmlCharsetReader = charset.NewReader
36//   m, merr := mxj.NewMapXml(xmlValue)
37var XmlCharsetReader func(charset string, input io.Reader) (io.Reader, error)
38
39// NewMapXml - convert a XML doc into a Map
40// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
41//	If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
42//
43//	Converting XML to JSON is a simple as:
44//		...
45//		mapVal, merr := mxj.NewMapXml(xmlVal)
46//		if merr != nil {
47//			// handle error
48//		}
49//		jsonVal, jerr := mapVal.Json()
50//		if jerr != nil {
51//			// handle error
52//		}
53//
54//	NOTES:
55//	   1. The 'xmlVal' will be parsed looking for an xml.StartElement, so BOM and other
56//	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
57//	   2. If CoerceKeysToLower() has been called, then all key values will be lower case.
58//	   3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
59func NewMapXml(xmlVal []byte, cast ...bool) (Map, error) {
60	var r bool
61	if len(cast) == 1 {
62		r = cast[0]
63	}
64	return xmlToMap(xmlVal, r)
65}
66
67// Get next XML doc from an io.Reader as a Map value.  Returns Map value.
68//	NOTES:
69//	   1. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other
70//	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
71//	   2. If CoerceKeysToLower() has been called, then all key values will be lower case.
72//	   3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
73func NewMapXmlReader(xmlReader io.Reader, cast ...bool) (Map, error) {
74	var r bool
75	if len(cast) == 1 {
76		r = cast[0]
77	}
78
79	// We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
80	// will wrap it in a bufio.Reader and seek on the file beyond where the
81	// xml.Decoder parses!
82	if _, ok := xmlReader.(io.ByteReader); !ok {
83		xmlReader = myByteReader(xmlReader) // see code at EOF
84	}
85
86	// build the map
87	return xmlReaderToMap(xmlReader, r)
88}
89
90// Get next XML doc from an io.Reader as a Map value.  Returns Map value and slice with the raw XML.
91//	NOTES:
92//	   1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
93//	      using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
94//	      See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
95//	      data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
96//	      you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
97//	   2. The 'raw' return value may be larger than the XML text value.
98//	   3. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other
99//	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
100//	   4. If CoerceKeysToLower() has been called, then all key values will be lower case.
101//	   5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
102func NewMapXmlReaderRaw(xmlReader io.Reader, cast ...bool) (Map, []byte, error) {
103	var r bool
104	if len(cast) == 1 {
105		r = cast[0]
106	}
107	// create TeeReader so we can retrieve raw XML
108	buf := make([]byte, 0)
109	wb := bytes.NewBuffer(buf)
110	trdr := myTeeReader(xmlReader, wb) // see code at EOF
111
112	m, err := xmlReaderToMap(trdr, r)
113
114	// retrieve the raw XML that was decoded
115	b := wb.Bytes()
116
117	if err != nil {
118		return nil, b, err
119	}
120
121	return m, b, nil
122}
123
124// xmlReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
125func xmlReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
126	// parse the Reader
127	p := xml.NewDecoder(rdr)
128	if CustomDecoder != nil {
129		useCustomDecoder(p)
130	} else {
131		p.CharsetReader = XmlCharsetReader
132	}
133	return xmlToMapParser("", nil, p, r)
134}
135
136// xmlToMap - convert a XML doc into map[string]interface{} value
137func xmlToMap(doc []byte, r bool) (map[string]interface{}, error) {
138	b := bytes.NewReader(doc)
139	p := xml.NewDecoder(b)
140	if CustomDecoder != nil {
141		useCustomDecoder(p)
142	} else {
143		p.CharsetReader = XmlCharsetReader
144	}
145	return xmlToMapParser("", nil, p, r)
146}
147
148// ===================================== where the work happens =============================
149
150// PrependAttrWithHyphen. Prepend attribute tags with a hyphen.
151// Default is 'true'. (Not applicable to NewMapXmlSeq(), mv.XmlSeq(), etc.)
152//	Note:
153//		If 'false', unmarshaling and marshaling is not symmetric. Attributes will be
154//		marshal'd as <attr_tag>attr</attr_tag> and may be part of a list.
155func PrependAttrWithHyphen(v bool) {
156	if v {
157		attrPrefix = "-"
158		lenAttrPrefix = len(attrPrefix)
159		return
160	}
161	attrPrefix = ""
162	lenAttrPrefix = len(attrPrefix)
163}
164
165// Include sequence id with inner tags. - per Sean Murphy, murphysean84@gmail.com.
166var includeTagSeqNum bool
167
168// IncludeTagSeqNum - include a "_seq":N key:value pair with each inner tag, denoting
169// its position when parsed. This is of limited usefulness, since list values cannot
170// be tagged with "_seq" without changing their depth in the Map.
171// So THIS SHOULD BE USED WITH CAUTION - see the test cases. Here's a sample of what
172// you get.
173/*
174		<Obj c="la" x="dee" h="da">
175			<IntObj id="3"/>
176			<IntObj1 id="1"/>
177			<IntObj id="2"/>
178			<StrObj>hello</StrObj>
179		</Obj>
180
181	parses as:
182
183		{
184		Obj:{
185			"-c":"la",
186			"-h":"da",
187			"-x":"dee",
188			"intObj":[
189				{
190					"-id"="3",
191					"_seq":"0" // if mxj.Cast is passed, then: "_seq":0
192				},
193				{
194					"-id"="2",
195					"_seq":"2"
196				}],
197			"intObj1":{
198				"-id":"1",
199				"_seq":"1"
200				},
201			"StrObj":{
202				"#text":"hello", // simple element value gets "#text" tag
203				"_seq":"3"
204				}
205			}
206		}
207*/
208func IncludeTagSeqNum(b bool) {
209	includeTagSeqNum = b
210}
211
212// all keys will be "lower case"
213var lowerCase bool
214
215// Coerce all tag values to keys in lower case.  This is useful if you've got sources with variable
216// tag capitalization, and you want to use m.ValuesForKeys(), etc., with the key or path spec
217// in lower case.
218//	CoerceKeysToLower() will toggle the coercion flag true|false - on|off
219//	CoerceKeysToLower(true|false) will set the coercion flag on|off
220//
221//	NOTE: only recognized by NewMapXml, NewMapXmlReader, and NewMapXmlReaderRaw functions as well as
222//	      the associated HandleXmlReader and HandleXmlReaderRaw.
223func CoerceKeysToLower(b ...bool) {
224	if len(b) == 0 {
225		lowerCase = !lowerCase
226	} else if len(b) == 1 {
227		lowerCase = b[0]
228	}
229}
230
231// 25jun16: Allow user to specify the "prefix" character for XML attribute key labels.
232// We do this by replacing '`' constant with attrPrefix var, replacing useHyphen with attrPrefix = "",
233// and adding a SetAttrPrefix(s string) function.
234
235var attrPrefix string = `-` // the default
236var lenAttrPrefix int = 1   // the default
237
238// SetAttrPrefix changes the default, "-", to the specified value, s.
239// SetAttrPrefix("") is the same as PrependAttrWithHyphen(false).
240// (Not applicable for NewMapXmlSeq(), mv.XmlSeq(), etc.)
241func SetAttrPrefix(s string) {
242	attrPrefix = s
243	lenAttrPrefix = len(attrPrefix)
244}
245
246// 18jan17: Allows user to specify if the map keys should be in snake case instead
247// of the default hyphenated notation.
248var snakeCaseKeys bool
249
250// CoerceKeysToSnakeCase changes the default, false, to the specified value, b.
251// Note: the attribute prefix will be a hyphen, '-', or what ever string value has
252// been specified using SetAttrPrefix.
253func CoerceKeysToSnakeCase(b ...bool) {
254	if len(b) == 0 {
255		snakeCaseKeys = !snakeCaseKeys
256	} else if len(b) == 1 {
257		snakeCaseKeys = b[0]
258	}
259}
260
261// 05feb17: support processing XMPP streams (issue #36)
262var handleXMPPStreamTag bool
263
264// HandleXMPPStreamTag causes decoder to parse XMPP <stream:stream> elements.
265// If called with no argument, XMPP stream element handling is toggled on/off.
266// (See xmppStream_test.go for example.)
267//	If called with NewMapXml, NewMapXmlReader, New MapXmlReaderRaw the "stream"
268//	element will be  returned as:
269//		map["stream"]interface{}{map[-<attrs>]interface{}}.
270//	If called with NewMapSeq, NewMapSeqReader, NewMapSeqReaderRaw the "stream"
271//	element will be returned as:
272//		map["stream:stream"]interface{}{map["#attr"]interface{}{map[string]interface{}}}
273//		where the "#attr" values have "#text" and "#seq" keys. (See NewMapXmlSeq.)
274func HandleXMPPStreamTag(b ...bool) {
275	if len(b) == 0 {
276		handleXMPPStreamTag = !handleXMPPStreamTag
277	} else if len(b) == 1 {
278		handleXMPPStreamTag = b[0]
279	}
280}
281
282// 21jan18 - decode all values as map["#text":value] (issue #56)
283var decodeSimpleValuesAsMap bool
284
285// DecodeSimpleValuesAsMap forces all values to be decoded as map["#text":<value>].
286// If called with no argument, the decoding is toggled on/off.
287//
288// By default the NewMapXml functions decode simple values without attributes as
289// map[<tag>:<value>]. This function causes simple values without attributes to be
290// decoded the same as simple values with attributes - map[<tag>:map["#text":<value>]].
291func DecodeSimpleValuesAsMap(b ...bool) {
292	if len(b) == 0 {
293		decodeSimpleValuesAsMap = !decodeSimpleValuesAsMap
294	} else if len(b) == 1 {
295		decodeSimpleValuesAsMap = b[0]
296	}
297}
298
299// xmlToMapParser (2015.11.12) - load a 'clean' XML doc into a map[string]interface{} directly.
300// A refactoring of xmlToTreeParser(), markDuplicate() and treeToMap() - here, all-in-one.
301// We've removed the intermediate *node tree with the allocation and subsequent rescanning.
302func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
303	if lowerCase {
304		skey = strings.ToLower(skey)
305	}
306	if snakeCaseKeys {
307		skey = strings.Replace(skey, "-", "_", -1)
308	}
309
310	// NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
311	// Unless 'skey' is a simple element w/o attributes, in which case the xml.CharData value is the value.
312	var n, na map[string]interface{}
313	var seq int // for includeTagSeqNum
314
315	// Allocate maps and load attributes, if any.
316	// NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
317	//       to get StartElement then recurse with skey==xml.StartElement.Name.Local
318	//       where we begin allocating map[string]interface{} values 'n' and 'na'.
319	if skey != "" {
320		n = make(map[string]interface{})  // old n
321		na = make(map[string]interface{}) // old n.nodes
322		if len(a) > 0 {
323			for _, v := range a {
324				if snakeCaseKeys {
325					v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
326				}
327				var key string
328				key = attrPrefix + v.Name.Local
329				if lowerCase {
330					key = strings.ToLower(key)
331				}
332				na[key] = cast(v.Value, r)
333			}
334		}
335	}
336	// Return XMPP <stream:stream> message.
337	if handleXMPPStreamTag && skey == "stream" {
338		n[skey] = na
339		return n, nil
340	}
341
342	for {
343		t, err := p.Token()
344		if err != nil {
345			if err != io.EOF {
346				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
347			}
348			return nil, err
349		}
350		switch t.(type) {
351		case xml.StartElement:
352			tt := t.(xml.StartElement)
353
354			// First call to xmlToMapParser() doesn't pass xml.StartElement - the map key.
355			// So when the loop is first entered, the first token is the root tag along
356			// with any attributes, which we process here.
357			//
358			// Subsequent calls to xmlToMapParser() will pass in tag+attributes for
359			// processing before getting the next token which is the element value,
360			// which is done above.
361			if skey == "" {
362				return xmlToMapParser(tt.Name.Local, tt.Attr, p, r)
363			}
364
365			// If not initializing the map, parse the element.
366			// len(nn) == 1, necessarily - it is just an 'n'.
367			nn, err := xmlToMapParser(tt.Name.Local, tt.Attr, p, r)
368			if err != nil {
369				return nil, err
370			}
371
372			// The nn map[string]interface{} value is a na[nn_key] value.
373			// We need to see if nn_key already exists - means we're parsing a list.
374			// This may require converting na[nn_key] value into []interface{} type.
375			// First, extract the key:val for the map - it's a singleton.
376			// Note:
377			// * if CoerceKeysToLower() called, then key will be lower case.
378			// * if CoerceKeysToSnakeCase() called, then key will be converted to snake case.
379			var key string
380			var val interface{}
381			for key, val = range nn {
382				break
383			}
384
385			// IncludeTagSeqNum requests that the element be augmented with a "_seq" sub-element.
386			// In theory, we don't need this if len(na) == 1. But, we don't know what might
387			// come next - we're only parsing forward.  So if you ask for 'includeTagSeqNum' you
388			// get it on every element. (Personally, I never liked this, but I added it on request
389			// and did get a $50 Amazon gift card in return - now we support it for backwards compatibility!)
390			if includeTagSeqNum {
391				switch val.(type) {
392				case []interface{}:
393					// noop - There's no clean way to handle this w/o changing message structure.
394				case map[string]interface{}:
395					val.(map[string]interface{})["_seq"] = seq // will overwrite an "_seq" XML tag
396					seq++
397				case interface{}: // a non-nil simple element: string, float64, bool
398					v := map[string]interface{}{"#text": val}
399					v["_seq"] = seq
400					seq++
401					val = v
402				}
403			}
404
405			// 'na' holding sub-elements of n.
406			// See if 'key' already exists.
407			// If 'key' exists, then this is a list, if not just add key:val to na.
408			if v, ok := na[key]; ok {
409				var a []interface{}
410				switch v.(type) {
411				case []interface{}:
412					a = v.([]interface{})
413				default: // anything else - note: v.(type) != nil
414					a = []interface{}{v}
415				}
416				a = append(a, val)
417				na[key] = a
418			} else {
419				na[key] = val // save it as a singleton
420			}
421		case xml.EndElement:
422			// len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
423			if len(n) == 0 {
424				// If len(na)==0 we have an empty element == "";
425				// it has no xml.Attr nor xml.CharData.
426				// Note: in original node-tree parser, val defaulted to "";
427				// so we always had the default if len(node.nodes) == 0.
428				if len(na) > 0 {
429					n[skey] = na
430				} else {
431					n[skey] = "" // empty element
432				}
433			}
434			return n, nil
435		case xml.CharData:
436			// clean up possible noise
437			tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ")
438			if len(tt) > 0 {
439				if len(na) > 0 || decodeSimpleValuesAsMap {
440					na["#text"] = cast(tt, r)
441				} else if skey != "" {
442					n[skey] = cast(tt, r)
443				} else {
444					// per Adrian (http://www.adrianlungu.com/) catch stray text
445					// in decoder stream -
446					// https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
447					// NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
448					// a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
449					continue
450				}
451			}
452		default:
453			// noop
454		}
455	}
456}
457
458var castNanInf bool
459
460// Cast "Nan", "Inf", "-Inf" XML values to 'float64'.
461// By default, these values will be decoded as 'string'.
462func CastNanInf(b bool) {
463	castNanInf = b
464}
465
466// cast - try to cast string values to bool or float64
467func cast(s string, r bool) interface{} {
468	if r {
469		// handle nan and inf
470		if !castNanInf {
471			switch strings.ToLower(s) {
472			case "nan", "inf", "-inf":
473				return s
474			}
475		}
476
477		// handle numeric strings ahead of boolean
478		if f, err := strconv.ParseFloat(s, 64); err == nil {
479			return f
480		}
481		// ParseBool treats "1"==true & "0"==false, we've already scanned those
482		// values as float64. See if value has 't' or 'f' as initial screen to
483		// minimize calls to ParseBool; also, see if len(s) < 6.
484		if len(s) > 0 && len(s) < 6 {
485			switch s[:1] {
486			case "t", "T", "f", "F":
487				if b, err := strconv.ParseBool(s); err == nil {
488					return b
489				}
490			}
491		}
492	}
493	return s
494}
495
496// ------------------ END: NewMapXml & NewMapXmlReader -------------------------
497
498// ------------------ mv.Xml & mv.XmlWriter - from j2x ------------------------
499
500const (
501	DefaultRootTag = "doc"
502)
503
504var useGoXmlEmptyElemSyntax bool
505
506// XmlGoEmptyElemSyntax() - <tag ...></tag> rather than <tag .../>.
507//	Go's encoding/xml package marshals empty XML elements as <tag ...></tag>.  By default this package
508//	encodes empty elements as <tag .../>.  If you're marshaling Map values that include structures
509//	(which are passed to xml.Marshal for encoding), this will let you conform to the standard package.
510func XmlGoEmptyElemSyntax() {
511	useGoXmlEmptyElemSyntax = true
512}
513
514// XmlDefaultEmptyElemSyntax() - <tag .../> rather than <tag ...></tag>.
515// Return XML encoding for empty elements to the default package setting.
516// Reverses effect of XmlGoEmptyElemSyntax().
517func XmlDefaultEmptyElemSyntax() {
518	useGoXmlEmptyElemSyntax = false
519}
520
521// Encode a Map as XML.  The companion of NewMapXml().
522// The following rules apply.
523//    - The key label "#text" is treated as the value for a simple element with attributes.
524//    - Map keys that begin with a hyphen, '-', are interpreted as attributes.
525//      It is an error if the attribute doesn't have a []byte, string, number, or boolean value.
526//    - Map value type encoding:
527//          > string, bool, float64, int, int32, int64, float32: per "%v" formating
528//          > []bool, []uint8: by casting to string
529//          > structures, etc.: handed to xml.Marshal() - if there is an error, the element
530//            value is "UNKNOWN"
531//    - Elements with only attribute values or are null are terminated using "/>".
532//    - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
533//      Thus, `{ "key":"value" }` encodes as "<key>value</key>".
534//    - To encode empty elements in a syntax consistent with encoding/xml call UseGoXmlEmptyElementSyntax().
535// The attributes tag=value pairs are alphabetized by "tag".  Also, when encoding map[string]interface{} values -
536// complex elements, etc. - the key:value pairs are alphabetized by key so the resulting tags will appear sorted.
537func (mv Map) Xml(rootTag ...string) ([]byte, error) {
538	m := map[string]interface{}(mv)
539	var err error
540	s := new(string)
541	p := new(pretty) // just a stub
542
543	if len(m) == 1 && len(rootTag) == 0 {
544		for key, value := range m {
545			// if it an array, see if all values are map[string]interface{}
546			// we force a new root tag if we'll end up with no key:value in the list
547			// so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
548			switch value.(type) {
549			case []interface{}:
550				for _, v := range value.([]interface{}) {
551					switch v.(type) {
552					case map[string]interface{}: // noop
553					default: // anything else
554						err = mapToXmlIndent(false, s, DefaultRootTag, m, p)
555						goto done
556					}
557				}
558			}
559			err = mapToXmlIndent(false, s, key, value, p)
560		}
561	} else if len(rootTag) == 1 {
562		err = mapToXmlIndent(false, s, rootTag[0], m, p)
563	} else {
564		err = mapToXmlIndent(false, s, DefaultRootTag, m, p)
565	}
566done:
567	return []byte(*s), err
568}
569
570// The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
571// The names will also provide a key for the number of return arguments.
572
573// Writes the Map as  XML on the Writer.
574// See Xml() for encoding rules.
575func (mv Map) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
576	x, err := mv.Xml(rootTag...)
577	if err != nil {
578		return err
579	}
580
581	_, err = xmlWriter.Write(x)
582	return err
583}
584
585// Writes the Map as  XML on the Writer. []byte is the raw XML that was written.
586// See Xml() for encoding rules.
587func (mv Map) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
588	x, err := mv.Xml(rootTag...)
589	if err != nil {
590		return x, err
591	}
592
593	_, err = xmlWriter.Write(x)
594	return x, err
595}
596
597// Writes the Map as pretty XML on the Writer.
598// See Xml() for encoding rules.
599func (mv Map) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
600	x, err := mv.XmlIndent(prefix, indent, rootTag...)
601	if err != nil {
602		return err
603	}
604
605	_, err = xmlWriter.Write(x)
606	return err
607}
608
609// Writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
610// See Xml() for encoding rules.
611func (mv Map) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
612	x, err := mv.XmlIndent(prefix, indent, rootTag...)
613	if err != nil {
614		return x, err
615	}
616
617	_, err = xmlWriter.Write(x)
618	return x, err
619}
620
621// -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
622
623// --------------  Handle XML stream by processing Map value --------------------
624
625// Default poll delay to keep Handler from spinning on an open stream
626// like sitting on os.Stdin waiting for imput.
627var xhandlerPollInterval = time.Millisecond
628
629// Bulk process XML using handlers that process a Map value.
630//	'rdr' is an io.Reader for XML (stream)
631//	'mapHandler' is the Map processor. Return of 'false' stops io.Reader processing.
632//	'errHandler' is the error processor. Return of 'false' stops io.Reader processing and returns the error.
633//	Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized.
634//	      This means that you can stop reading the file on error or after processing a particular message.
635//	      To have reading and handling run concurrently, pass argument to a go routine in handler and return 'true'.
636func HandleXmlReader(xmlReader io.Reader, mapHandler func(Map) bool, errHandler func(error) bool) error {
637	var n int
638	for {
639		m, merr := NewMapXmlReader(xmlReader)
640		n++
641
642		// handle error condition with errhandler
643		if merr != nil && merr != io.EOF {
644			merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error())
645			if ok := errHandler(merr); !ok {
646				// caused reader termination
647				return merr
648			}
649			continue
650		}
651
652		// pass to maphandler
653		if len(m) != 0 {
654			if ok := mapHandler(m); !ok {
655				break
656			}
657		} else if merr != io.EOF {
658			time.Sleep(xhandlerPollInterval)
659		}
660
661		if merr == io.EOF {
662			break
663		}
664	}
665	return nil
666}
667
668// Bulk process XML using handlers that process a Map value and the raw XML.
669//	'rdr' is an io.Reader for XML (stream)
670//	'mapHandler' is the Map and raw XML - []byte - processor. Return of 'false' stops io.Reader processing.
671//	'errHandler' is the error and raw XML processor. Return of 'false' stops io.Reader processing and returns the error.
672//	Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized.
673//	      This means that you can stop reading the file on error or after processing a particular message.
674//	      To have reading and handling run concurrently, pass argument(s) to a go routine in handler and return 'true'.
675//	See NewMapXmlReaderRaw for comment on performance associated with retrieving raw XML from a Reader.
676func HandleXmlReaderRaw(xmlReader io.Reader, mapHandler func(Map, []byte) bool, errHandler func(error, []byte) bool) error {
677	var n int
678	for {
679		m, raw, merr := NewMapXmlReaderRaw(xmlReader)
680		n++
681
682		// handle error condition with errhandler
683		if merr != nil && merr != io.EOF {
684			merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error())
685			if ok := errHandler(merr, raw); !ok {
686				// caused reader termination
687				return merr
688			}
689			continue
690		}
691
692		// pass to maphandler
693		if len(m) != 0 {
694			if ok := mapHandler(m, raw); !ok {
695				break
696			}
697		} else if merr != io.EOF {
698			time.Sleep(xhandlerPollInterval)
699		}
700
701		if merr == io.EOF {
702			break
703		}
704	}
705	return nil
706}
707
708// ----------------- END: Handle XML stream by processing Map value --------------
709
710// --------  a hack of io.TeeReader ... need one that's an io.ByteReader for xml.NewDecoder() ----------
711
712// This is a clone of io.TeeReader with the additional method t.ReadByte().
713// Thus, this TeeReader is also an io.ByteReader.
714// This is necessary because xml.NewDecoder uses a ByteReader not a Reader. It appears to have been written
715// with bufio.Reader or bytes.Reader in mind ... not a generic io.Reader, which doesn't have to have ReadByte()..
716// If NewDecoder is passed a Reader that does not satisfy ByteReader() it wraps the Reader with
717// bufio.NewReader and uses ReadByte rather than Read that runs the TeeReader pipe logic.
718
719type teeReader struct {
720	r io.Reader
721	w io.Writer
722	b []byte
723}
724
725func myTeeReader(r io.Reader, w io.Writer) io.Reader {
726	b := make([]byte, 1)
727	return &teeReader{r, w, b}
728}
729
730// need for io.Reader - but we don't use it ...
731func (t *teeReader) Read(p []byte) (int, error) {
732	return 0, nil
733}
734
735func (t *teeReader) ReadByte() (byte, error) {
736	n, err := t.r.Read(t.b)
737	if n > 0 {
738		if _, err := t.w.Write(t.b[:1]); err != nil {
739			return t.b[0], err
740		}
741	}
742	return t.b[0], err
743}
744
745// For use with NewMapXmlReader & NewMapXmlSeqReader.
746type byteReader struct {
747	r io.Reader
748	b []byte
749}
750
751func myByteReader(r io.Reader) io.Reader {
752	b := make([]byte, 1)
753	return &byteReader{r, b}
754}
755
756// Need for io.Reader interface ...
757// Needed if reading a malformed http.Request.Body - issue #38.
758func (b *byteReader) Read(p []byte) (int, error) {
759	return b.r.Read(p)
760}
761
762func (b *byteReader) ReadByte() (byte, error) {
763	_, err := b.r.Read(b.b)
764	if len(b.b) > 0 {
765		return b.b[0], err
766	}
767	var c byte
768	return c, err
769}
770
771// ----------------------- END: io.TeeReader hack -----------------------------------
772
773// ---------------------- XmlIndent - from j2x package ----------------------------
774
775// Encode a map[string]interface{} as a pretty XML string.
776// See Xml for encoding rules.
777func (mv Map) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
778	m := map[string]interface{}(mv)
779
780	var err error
781	s := new(string)
782	p := new(pretty)
783	p.indent = indent
784	p.padding = prefix
785
786	if len(m) == 1 && len(rootTag) == 0 {
787		// this can extract the key for the single map element
788		// use it if it isn't a key for a list
789		for key, value := range m {
790			if _, ok := value.([]interface{}); ok {
791				err = mapToXmlIndent(true, s, DefaultRootTag, m, p)
792			} else {
793				err = mapToXmlIndent(true, s, key, value, p)
794			}
795		}
796	} else if len(rootTag) == 1 {
797		err = mapToXmlIndent(true, s, rootTag[0], m, p)
798	} else {
799		err = mapToXmlIndent(true, s, DefaultRootTag, m, p)
800	}
801	return []byte(*s), err
802}
803
804type pretty struct {
805	indent   string
806	cnt      int
807	padding  string
808	mapDepth int
809	start    int
810}
811
812func (p *pretty) Indent() {
813	p.padding += p.indent
814	p.cnt++
815}
816
817func (p *pretty) Outdent() {
818	if p.cnt > 0 {
819		p.padding = p.padding[:len(p.padding)-len(p.indent)]
820		p.cnt--
821	}
822}
823
824// where the work actually happens
825// returns an error if an attribute is not atomic
826func mapToXmlIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
827	var endTag bool
828	var isSimple bool
829	var elen int
830	p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
831
832	// per issue #48, 18apr18 - try and coerce maps to map[string]interface{}
833	// Don't need for mapToXmlSeqIndent, since maps there are decoded by NewMapXmlSeq().
834	if reflect.ValueOf(value).Kind() == reflect.Map {
835		switch value.(type) {
836		case map[string]interface{}:
837		default:
838			val := make(map[string]interface{})
839			vv := reflect.ValueOf(value)
840			keys := vv.MapKeys()
841			for _, k := range keys {
842				val[fmt.Sprint(k)] = vv.MapIndex(k).Interface()
843			}
844			value = val
845		}
846	}
847
848	switch value.(type) {
849	// special handling of []interface{} values when len(value) == 0
850	case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32, json.Number:
851		if doIndent {
852			*s += p.padding
853		}
854		*s += `<` + key
855	}
856	switch value.(type) {
857	case map[string]interface{}:
858		vv := value.(map[string]interface{})
859		lenvv := len(vv)
860		// scan out attributes - attribute keys have prepended attrPrefix
861		attrlist := make([][2]string, len(vv))
862		var n int
863		var ss string
864		for k, v := range vv {
865			if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix {
866				switch v.(type) {
867				case string:
868					if xmlEscapeChars {
869						ss = escapeChars(v.(string))
870					} else {
871						ss = v.(string)
872					}
873					attrlist[n][0] = k[lenAttrPrefix:]
874					attrlist[n][1] = ss
875				case float64, bool, int, int32, int64, float32, json.Number:
876					attrlist[n][0] = k[lenAttrPrefix:]
877					attrlist[n][1] = fmt.Sprintf("%v", v)
878				case []byte:
879					if xmlEscapeChars {
880						ss = escapeChars(string(v.([]byte)))
881					} else {
882						ss = string(v.([]byte))
883					}
884					attrlist[n][0] = k[lenAttrPrefix:]
885					attrlist[n][1] = ss
886				default:
887					return fmt.Errorf("invalid attribute value for: %s:<%T>", k, v)
888				}
889				n++
890			}
891		}
892		if n > 0 {
893			attrlist = attrlist[:n]
894			sort.Sort(attrList(attrlist))
895			for _, v := range attrlist {
896				*s += ` ` + v[0] + `="` + v[1] + `"`
897			}
898		}
899		// only attributes?
900		if n == lenvv {
901			if useGoXmlEmptyElemSyntax {
902				*s += `</` + key + ">"
903			} else {
904				*s += `/>`
905			}
906			break
907		}
908
909		// simple element? Note: '#text" is an invalid XML tag.
910		if v, ok := vv["#text"]; ok && n+1 == lenvv {
911			switch v.(type) {
912			case string:
913				if xmlEscapeChars {
914					v = escapeChars(v.(string))
915				} else {
916					v = v.(string)
917				}
918			case []byte:
919				if xmlEscapeChars {
920					v = escapeChars(string(v.([]byte)))
921				}
922			}
923			*s += ">" + fmt.Sprintf("%v", v)
924			endTag = true
925			elen = 1
926			isSimple = true
927			break
928		} else if ok {
929			// Handle edge case where simple element with attributes
930			// is unmarshal'd using NewMapXml() where attribute prefix
931			// has been set to "".
932			// TODO(clb): should probably scan all keys for invalid chars.
933			return fmt.Errorf("invalid attribute key label: #text - due to attributes not being prefixed")
934		}
935
936		// close tag with possible attributes
937		*s += ">"
938		if doIndent {
939			*s += "\n"
940		}
941		// something more complex
942		p.mapDepth++
943		// extract the map k:v pairs and sort on key
944		elemlist := make([][2]interface{}, len(vv))
945		n = 0
946		for k, v := range vv {
947			if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix {
948				continue
949			}
950			elemlist[n][0] = k
951			elemlist[n][1] = v
952			n++
953		}
954		elemlist = elemlist[:n]
955		sort.Sort(elemList(elemlist))
956		var i int
957		for _, v := range elemlist {
958			switch v[1].(type) {
959			case []interface{}:
960			default:
961				if i == 0 && doIndent {
962					p.Indent()
963				}
964			}
965			i++
966			if err := mapToXmlIndent(doIndent, s, v[0].(string), v[1], p); err != nil {
967				return err
968			}
969			switch v[1].(type) {
970			case []interface{}: // handled in []interface{} case
971			default:
972				if doIndent {
973					p.Outdent()
974				}
975			}
976			i--
977		}
978		p.mapDepth--
979		endTag = true
980		elen = 1 // we do have some content ...
981	case []interface{}:
982		// special case - found during implementing Issue #23
983		if len(value.([]interface{})) == 0 {
984			if doIndent {
985				*s += p.padding + p.indent
986			}
987			*s += "<" + key
988			elen = 0
989			endTag = true
990			break
991		}
992		for _, v := range value.([]interface{}) {
993			if doIndent {
994				p.Indent()
995			}
996			if err := mapToXmlIndent(doIndent, s, key, v, p); err != nil {
997				return err
998			}
999			if doIndent {
1000				p.Outdent()
1001			}
1002		}
1003		return nil
1004	case []string:
1005		// This was added by https://github.com/slotix ... not a type that
1006		// would be encountered if mv generated from NewMapXml, NewMapJson.
1007		// Could be encountered in AnyXml(), so we'll let it stay, though
1008		// it should be merged with case []interface{}, above.
1009		//quick fix for []string type
1010		//[]string should be treated exaclty as []interface{}
1011		if len(value.([]string)) == 0 {
1012			if doIndent {
1013				*s += p.padding + p.indent
1014			}
1015			*s += "<" + key
1016			elen = 0
1017			endTag = true
1018			break
1019		}
1020		for _, v := range value.([]string) {
1021			if doIndent {
1022				p.Indent()
1023			}
1024			if err := mapToXmlIndent(doIndent, s, key, v, p); err != nil {
1025				return err
1026			}
1027			if doIndent {
1028				p.Outdent()
1029			}
1030		}
1031		return nil
1032	case nil:
1033		// terminate the tag
1034		if doIndent {
1035			*s += p.padding
1036		}
1037		*s += "<" + key
1038		endTag, isSimple = true, true
1039		break
1040	default: // handle anything - even goofy stuff
1041		elen = 0
1042		switch value.(type) {
1043		case string:
1044			v := value.(string)
1045			if xmlEscapeChars {
1046				v = escapeChars(v)
1047			}
1048			elen = len(v)
1049			if elen > 0 {
1050				*s += ">" + v
1051			}
1052		case float64, bool, int, int32, int64, float32, json.Number:
1053			v := fmt.Sprintf("%v", value)
1054			elen = len(v) // always > 0
1055			*s += ">" + v
1056		case []byte: // NOTE: byte is just an alias for uint8
1057			// similar to how xml.Marshal handles []byte structure members
1058			v := string(value.([]byte))
1059			if xmlEscapeChars {
1060				v = escapeChars(v)
1061			}
1062			elen = len(v)
1063			if elen > 0 {
1064				*s += ">" + v
1065			}
1066		default:
1067			var v []byte
1068			var err error
1069			if doIndent {
1070				v, err = xml.MarshalIndent(value, p.padding, p.indent)
1071			} else {
1072				v, err = xml.Marshal(value)
1073			}
1074			if err != nil {
1075				*s += ">UNKNOWN"
1076			} else {
1077				elen = len(v)
1078				if elen > 0 {
1079					*s += string(v)
1080				}
1081			}
1082		}
1083		isSimple = true
1084		endTag = true
1085	}
1086	if endTag {
1087		if doIndent {
1088			if !isSimple {
1089				*s += p.padding
1090			}
1091		}
1092		if elen > 0 || useGoXmlEmptyElemSyntax {
1093			if elen == 0 {
1094				*s += ">"
1095			}
1096			*s += `</` + key + ">"
1097		} else {
1098			*s += `/>`
1099		}
1100	}
1101	if doIndent {
1102		if p.cnt > p.start {
1103			*s += "\n"
1104		}
1105		p.Outdent()
1106	}
1107
1108	return nil
1109}
1110
1111// ============================ sort interface implementation =================
1112
1113type attrList [][2]string
1114
1115func (a attrList) Len() int {
1116	return len(a)
1117}
1118
1119func (a attrList) Swap(i, j int) {
1120	a[i], a[j] = a[j], a[i]
1121}
1122
1123func (a attrList) Less(i, j int) bool {
1124	return a[i][0] <= a[j][0]
1125}
1126
1127type elemList [][2]interface{}
1128
1129func (e elemList) Len() int {
1130	return len(e)
1131}
1132
1133func (e elemList) Swap(i, j int) {
1134	e[i], e[j] = e[j], e[i]
1135}
1136
1137func (e elemList) Less(i, j int) bool {
1138	return e[i][0].(string) <= e[j][0].(string)
1139}
1140