1// Copyright 2012-2016, 2018-2019 Charles Banning. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file 4 5// xml.go - basically the core of X2j for map[string]interface{} values. 6// NewMapXml, NewMapXmlReader, mv.Xml, mv.XmlWriter 7// see x2j and j2x for wrappers to provide end-to-end transformation of XML and JSON messages. 8 9package mxj 10 11import ( 12 "bytes" 13 "encoding/json" 14 "encoding/xml" 15 "errors" 16 "fmt" 17 "io" 18 "reflect" 19 "sort" 20 "strconv" 21 "strings" 22 "time" 23) 24 25// ------------------- NewMapXml & NewMapXmlReader ... ------------------------- 26 27// If XmlCharsetReader != nil, it will be used to decode the XML, if required. 28// Note: if CustomDecoder != nil, then XmlCharsetReader is ignored; 29// set the CustomDecoder attribute instead. 30// import ( 31// charset "code.google.com/p/go-charset/charset" 32// github.com/clbanning/mxj 33// ) 34// ... 35// mxj.XmlCharsetReader = charset.NewReader 36// m, merr := mxj.NewMapXml(xmlValue) 37var XmlCharsetReader func(charset string, input io.Reader) (io.Reader, error) 38 39// NewMapXml - convert a XML doc into a Map 40// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().) 41// If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible. 42// 43// Converting XML to JSON is a simple as: 44// ... 45// mapVal, merr := mxj.NewMapXml(xmlVal) 46// if merr != nil { 47// // handle error 48// } 49// jsonVal, jerr := mapVal.Json() 50// if jerr != nil { 51// // handle error 52// } 53// 54// NOTES: 55// 1. Declarations, directives, process instructions and comments are NOT parsed. 56// 2. The 'xmlVal' will be parsed looking for an xml.StartElement, so BOM and other 57// extraneous xml.CharData will be ignored unless io.EOF is reached first. 58// 3. If CoerceKeysToLower() has been called, then all key values will be lower case. 59// 4. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. 60// 5. If DisableTrimWhiteSpace(b bool) has been called, then all values will be trimmed or not. 'true' by default. 61func NewMapXml(xmlVal []byte, cast ...bool) (Map, error) { 62 var r bool 63 if len(cast) == 1 { 64 r = cast[0] 65 } 66 return xmlToMap(xmlVal, r) 67} 68 69// Get next XML doc from an io.Reader as a Map value. Returns Map value. 70// NOTES: 71// 1. Declarations, directives, process instructions and comments are NOT parsed. 72// 2. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other 73// extraneous xml.CharData will be ignored unless io.EOF is reached first. 74// 3. If CoerceKeysToLower() has been called, then all key values will be lower case. 75// 4. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. 76func NewMapXmlReader(xmlReader io.Reader, cast ...bool) (Map, error) { 77 var r bool 78 if len(cast) == 1 { 79 r = cast[0] 80 } 81 82 // We need to put an *os.File reader in a ByteReader or the xml.NewDecoder 83 // will wrap it in a bufio.Reader and seek on the file beyond where the 84 // xml.Decoder parses! 85 if _, ok := xmlReader.(io.ByteReader); !ok { 86 xmlReader = myByteReader(xmlReader) // see code at EOF 87 } 88 89 // build the map 90 return xmlReaderToMap(xmlReader, r) 91} 92 93// Get next XML doc from an io.Reader as a Map value. Returns Map value and slice with the raw XML. 94// NOTES: 95// 1. Declarations, directives, process instructions and comments are NOT parsed. 96// 2. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte 97// using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact. 98// See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large 99// data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body 100// you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call. 101// 3. The 'raw' return value may be larger than the XML text value. 102// 4. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other 103// extraneous xml.CharData will be ignored unless io.EOF is reached first. 104// 5. If CoerceKeysToLower() has been called, then all key values will be lower case. 105// 6. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. 106func NewMapXmlReaderRaw(xmlReader io.Reader, cast ...bool) (Map, []byte, error) { 107 var r bool 108 if len(cast) == 1 { 109 r = cast[0] 110 } 111 // create TeeReader so we can retrieve raw XML 112 buf := make([]byte, 0) 113 wb := bytes.NewBuffer(buf) 114 trdr := myTeeReader(xmlReader, wb) // see code at EOF 115 116 m, err := xmlReaderToMap(trdr, r) 117 118 // retrieve the raw XML that was decoded 119 b := wb.Bytes() 120 121 if err != nil { 122 return nil, b, err 123 } 124 125 return m, b, nil 126} 127 128// xmlReaderToMap() - parse a XML io.Reader to a map[string]interface{} value 129func xmlReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) { 130 // parse the Reader 131 p := xml.NewDecoder(rdr) 132 if CustomDecoder != nil { 133 useCustomDecoder(p) 134 } else { 135 p.CharsetReader = XmlCharsetReader 136 } 137 return xmlToMapParser("", nil, p, r) 138} 139 140// xmlToMap - convert a XML doc into map[string]interface{} value 141func xmlToMap(doc []byte, r bool) (map[string]interface{}, error) { 142 b := bytes.NewReader(doc) 143 p := xml.NewDecoder(b) 144 if CustomDecoder != nil { 145 useCustomDecoder(p) 146 } else { 147 p.CharsetReader = XmlCharsetReader 148 } 149 return xmlToMapParser("", nil, p, r) 150} 151 152// ===================================== where the work happens ============================= 153 154// PrependAttrWithHyphen. Prepend attribute tags with a hyphen. 155// Default is 'true'. (Not applicable to NewMapXmlSeq(), mv.XmlSeq(), etc.) 156// Note: 157// If 'false', unmarshaling and marshaling is not symmetric. Attributes will be 158// marshal'd as <attr_tag>attr</attr_tag> and may be part of a list. 159func PrependAttrWithHyphen(v bool) { 160 if v { 161 attrPrefix = "-" 162 lenAttrPrefix = len(attrPrefix) 163 return 164 } 165 attrPrefix = "" 166 lenAttrPrefix = len(attrPrefix) 167} 168 169// Include sequence id with inner tags. - per Sean Murphy, murphysean84@gmail.com. 170var includeTagSeqNum bool 171 172// IncludeTagSeqNum - include a "_seq":N key:value pair with each inner tag, denoting 173// its position when parsed. This is of limited usefulness, since list values cannot 174// be tagged with "_seq" without changing their depth in the Map. 175// So THIS SHOULD BE USED WITH CAUTION - see the test cases. Here's a sample of what 176// you get. 177/* 178 <Obj c="la" x="dee" h="da"> 179 <IntObj id="3"/> 180 <IntObj1 id="1"/> 181 <IntObj id="2"/> 182 <StrObj>hello</StrObj> 183 </Obj> 184 185 parses as: 186 187 { 188 Obj:{ 189 "-c":"la", 190 "-h":"da", 191 "-x":"dee", 192 "intObj":[ 193 { 194 "-id"="3", 195 "_seq":"0" // if mxj.Cast is passed, then: "_seq":0 196 }, 197 { 198 "-id"="2", 199 "_seq":"2" 200 }], 201 "intObj1":{ 202 "-id":"1", 203 "_seq":"1" 204 }, 205 "StrObj":{ 206 "#text":"hello", // simple element value gets "#text" tag 207 "_seq":"3" 208 } 209 } 210 } 211*/ 212func IncludeTagSeqNum(b ...bool) { 213 if len(b) == 0 { 214 includeTagSeqNum = !includeTagSeqNum 215 } else if len(b) == 1 { 216 includeTagSeqNum = b[0] 217 } 218} 219 220// all keys will be "lower case" 221var lowerCase bool 222 223// Coerce all tag values to keys in lower case. This is useful if you've got sources with variable 224// tag capitalization, and you want to use m.ValuesForKeys(), etc., with the key or path spec 225// in lower case. 226// CoerceKeysToLower() will toggle the coercion flag true|false - on|off 227// CoerceKeysToLower(true|false) will set the coercion flag on|off 228// 229// NOTE: only recognized by NewMapXml, NewMapXmlReader, and NewMapXmlReaderRaw functions as well as 230// the associated HandleXmlReader and HandleXmlReaderRaw. 231func CoerceKeysToLower(b ...bool) { 232 if len(b) == 0 { 233 lowerCase = !lowerCase 234 } else if len(b) == 1 { 235 lowerCase = b[0] 236 } 237} 238 239// disableTrimWhiteSpace sets if the white space should be removed or not 240var disableTrimWhiteSpace bool 241var trimRunes = "\t\r\b\n " 242 243// DisableTrimWhiteSpace set if the white space should be trimmed or not. By default white space is always trimmed. If 244// no argument is provided, trim white space will be disabled. 245func DisableTrimWhiteSpace(b ...bool) { 246 if len(b) == 0 { 247 disableTrimWhiteSpace = true 248 } else { 249 disableTrimWhiteSpace = b[0] 250 } 251 252 if disableTrimWhiteSpace { 253 trimRunes = "\t\r\b\n" 254 } else { 255 trimRunes = "\t\r\b\n " 256 } 257} 258 259// 25jun16: Allow user to specify the "prefix" character for XML attribute key labels. 260// We do this by replacing '`' constant with attrPrefix var, replacing useHyphen with attrPrefix = "", 261// and adding a SetAttrPrefix(s string) function. 262 263var attrPrefix string = `-` // the default 264var lenAttrPrefix int = 1 // the default 265 266// SetAttrPrefix changes the default, "-", to the specified value, s. 267// SetAttrPrefix("") is the same as PrependAttrWithHyphen(false). 268// (Not applicable for NewMapXmlSeq(), mv.XmlSeq(), etc.) 269func SetAttrPrefix(s string) { 270 attrPrefix = s 271 lenAttrPrefix = len(attrPrefix) 272} 273 274// 18jan17: Allows user to specify if the map keys should be in snake case instead 275// of the default hyphenated notation. 276var snakeCaseKeys bool 277 278// CoerceKeysToSnakeCase changes the default, false, to the specified value, b. 279// Note: the attribute prefix will be a hyphen, '-', or what ever string value has 280// been specified using SetAttrPrefix. 281func CoerceKeysToSnakeCase(b ...bool) { 282 if len(b) == 0 { 283 snakeCaseKeys = !snakeCaseKeys 284 } else if len(b) == 1 { 285 snakeCaseKeys = b[0] 286 } 287} 288 289// 10jan19: use of pull request #57 should be conditional - legacy code assumes 290// numeric values are float64. 291var castToInt bool 292 293// CastValuesToInt tries to coerce numeric valus to int64 or uint64 instead of the 294// default float64. Repeated calls with no argument will toggle this on/off, or this 295// handling will be set with the value of 'b'. 296func CastValuesToInt(b ...bool) { 297 if len(b) == 0 { 298 castToInt = !castToInt 299 } else if len(b) == 1 { 300 castToInt = b[0] 301 } 302} 303 304// 05feb17: support processing XMPP streams (issue #36) 305var handleXMPPStreamTag bool 306 307// HandleXMPPStreamTag causes decoder to parse XMPP <stream:stream> elements. 308// If called with no argument, XMPP stream element handling is toggled on/off. 309// (See xmppStream_test.go for example.) 310// If called with NewMapXml, NewMapXmlReader, New MapXmlReaderRaw the "stream" 311// element will be returned as: 312// map["stream"]interface{}{map[-<attrs>]interface{}}. 313// If called with NewMapSeq, NewMapSeqReader, NewMapSeqReaderRaw the "stream" 314// element will be returned as: 315// map["stream:stream"]interface{}{map["#attr"]interface{}{map[string]interface{}}} 316// where the "#attr" values have "#text" and "#seq" keys. (See NewMapXmlSeq.) 317func HandleXMPPStreamTag(b ...bool) { 318 if len(b) == 0 { 319 handleXMPPStreamTag = !handleXMPPStreamTag 320 } else if len(b) == 1 { 321 handleXMPPStreamTag = b[0] 322 } 323} 324 325// 21jan18 - decode all values as map["#text":value] (issue #56) 326var decodeSimpleValuesAsMap bool 327 328// DecodeSimpleValuesAsMap forces all values to be decoded as map["#text":<value>]. 329// If called with no argument, the decoding is toggled on/off. 330// 331// By default the NewMapXml functions decode simple values without attributes as 332// map[<tag>:<value>]. This function causes simple values without attributes to be 333// decoded the same as simple values with attributes - map[<tag>:map["#text":<value>]]. 334func DecodeSimpleValuesAsMap(b ...bool) { 335 if len(b) == 0 { 336 decodeSimpleValuesAsMap = !decodeSimpleValuesAsMap 337 } else if len(b) == 1 { 338 decodeSimpleValuesAsMap = b[0] 339 } 340} 341 342// xmlToMapParser (2015.11.12) - load a 'clean' XML doc into a map[string]interface{} directly. 343// A refactoring of xmlToTreeParser(), markDuplicate() and treeToMap() - here, all-in-one. 344// We've removed the intermediate *node tree with the allocation and subsequent rescanning. 345func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) { 346 if lowerCase { 347 skey = strings.ToLower(skey) 348 } 349 if snakeCaseKeys { 350 skey = strings.Replace(skey, "-", "_", -1) 351 } 352 353 // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'. 354 // Unless 'skey' is a simple element w/o attributes, in which case the xml.CharData value is the value. 355 var n, na map[string]interface{} 356 var seq int // for includeTagSeqNum 357 358 // Allocate maps and load attributes, if any. 359 // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through 360 // to get StartElement then recurse with skey==xml.StartElement.Name.Local 361 // where we begin allocating map[string]interface{} values 'n' and 'na'. 362 if skey != "" { 363 n = make(map[string]interface{}) // old n 364 na = make(map[string]interface{}) // old n.nodes 365 if len(a) > 0 { 366 for _, v := range a { 367 if snakeCaseKeys { 368 v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1) 369 } 370 var key string 371 key = attrPrefix + v.Name.Local 372 if lowerCase { 373 key = strings.ToLower(key) 374 } 375 if xmlEscapeCharsDecoder { // per issue#84 376 v.Value = escapeChars(v.Value) 377 } 378 na[key] = cast(v.Value, r, key) 379 } 380 } 381 } 382 // Return XMPP <stream:stream> message. 383 if handleXMPPStreamTag && skey == "stream" { 384 n[skey] = na 385 return n, nil 386 } 387 388 for { 389 t, err := p.Token() 390 if err != nil { 391 if err != io.EOF { 392 return nil, errors.New("xml.Decoder.Token() - " + err.Error()) 393 } 394 return nil, err 395 } 396 switch t.(type) { 397 case xml.StartElement: 398 tt := t.(xml.StartElement) 399 400 // First call to xmlToMapParser() doesn't pass xml.StartElement - the map key. 401 // So when the loop is first entered, the first token is the root tag along 402 // with any attributes, which we process here. 403 // 404 // Subsequent calls to xmlToMapParser() will pass in tag+attributes for 405 // processing before getting the next token which is the element value, 406 // which is done above. 407 if skey == "" { 408 return xmlToMapParser(tt.Name.Local, tt.Attr, p, r) 409 } 410 411 // If not initializing the map, parse the element. 412 // len(nn) == 1, necessarily - it is just an 'n'. 413 nn, err := xmlToMapParser(tt.Name.Local, tt.Attr, p, r) 414 if err != nil { 415 return nil, err 416 } 417 418 // The nn map[string]interface{} value is a na[nn_key] value. 419 // We need to see if nn_key already exists - means we're parsing a list. 420 // This may require converting na[nn_key] value into []interface{} type. 421 // First, extract the key:val for the map - it's a singleton. 422 // Note: 423 // * if CoerceKeysToLower() called, then key will be lower case. 424 // * if CoerceKeysToSnakeCase() called, then key will be converted to snake case. 425 var key string 426 var val interface{} 427 for key, val = range nn { 428 break 429 } 430 431 // IncludeTagSeqNum requests that the element be augmented with a "_seq" sub-element. 432 // In theory, we don't need this if len(na) == 1. But, we don't know what might 433 // come next - we're only parsing forward. So if you ask for 'includeTagSeqNum' you 434 // get it on every element. (Personally, I never liked this, but I added it on request 435 // and did get a $50 Amazon gift card in return - now we support it for backwards compatibility!) 436 if includeTagSeqNum { 437 switch val.(type) { 438 case []interface{}: 439 // noop - There's no clean way to handle this w/o changing message structure. 440 case map[string]interface{}: 441 val.(map[string]interface{})["_seq"] = seq // will overwrite an "_seq" XML tag 442 seq++ 443 case interface{}: // a non-nil simple element: string, float64, bool 444 v := map[string]interface{}{"#text": val} 445 v["_seq"] = seq 446 seq++ 447 val = v 448 } 449 } 450 451 // 'na' holding sub-elements of n. 452 // See if 'key' already exists. 453 // If 'key' exists, then this is a list, if not just add key:val to na. 454 if v, ok := na[key]; ok { 455 var a []interface{} 456 switch v.(type) { 457 case []interface{}: 458 a = v.([]interface{}) 459 default: // anything else - note: v.(type) != nil 460 a = []interface{}{v} 461 } 462 a = append(a, val) 463 na[key] = a 464 } else { 465 na[key] = val // save it as a singleton 466 } 467 case xml.EndElement: 468 // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case. 469 if len(n) == 0 { 470 // If len(na)==0 we have an empty element == ""; 471 // it has no xml.Attr nor xml.CharData. 472 // Note: in original node-tree parser, val defaulted to ""; 473 // so we always had the default if len(node.nodes) == 0. 474 if len(na) > 0 { 475 n[skey] = na 476 } else { 477 n[skey] = "" // empty element 478 } 479 } else if len(n) == 1 && len(na) > 0 { 480 // it's a simple element w/ no attributes w/ subelements 481 for _, v := range n { 482 na["#text"] = v 483 } 484 n[skey] = na 485 } 486 return n, nil 487 case xml.CharData: 488 // clean up possible noise 489 tt := strings.Trim(string(t.(xml.CharData)), trimRunes) 490 if xmlEscapeCharsDecoder { // issue#84 491 tt = escapeChars(tt) 492 } 493 if len(tt) > 0 { 494 if len(na) > 0 || decodeSimpleValuesAsMap { 495 na["#text"] = cast(tt, r, "#text") 496 } else if skey != "" { 497 n[skey] = cast(tt, r, skey) 498 } else { 499 // per Adrian (http://www.adrianlungu.com/) catch stray text 500 // in decoder stream - 501 // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374 502 // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get 503 // a p.Token() decoding error when the BOM is UTF-16 or UTF-32. 504 continue 505 } 506 } 507 default: 508 // noop 509 } 510 } 511} 512 513var castNanInf bool 514 515// Cast "Nan", "Inf", "-Inf" XML values to 'float64'. 516// By default, these values will be decoded as 'string'. 517func CastNanInf(b ...bool) { 518 if len(b) == 0 { 519 castNanInf = !castNanInf 520 } else if len(b) == 1 { 521 castNanInf = b[0] 522 } 523} 524 525// cast - try to cast string values to bool or float64 526// 't' is the tag key that can be checked for 'not-casting' 527func cast(s string, r bool, t string) interface{} { 528 if checkTagToSkip != nil && t != "" && checkTagToSkip(t) { 529 // call the check-function here with 't[0]' 530 // if 'true' return s 531 return s 532 } 533 534 if r { 535 // handle nan and inf 536 if !castNanInf { 537 switch strings.ToLower(s) { 538 case "nan", "inf", "-inf": 539 return s 540 } 541 } 542 543 // handle numeric strings ahead of boolean 544 if castToInt { 545 if f, err := strconv.ParseInt(s, 10, 64); err == nil { 546 return f 547 } 548 if f, err := strconv.ParseUint(s, 10, 64); err == nil { 549 return f 550 } 551 } 552 553 if castToFloat { 554 if f, err := strconv.ParseFloat(s, 64); err == nil { 555 return f 556 } 557 } 558 559 // ParseBool treats "1"==true & "0"==false, we've already scanned those 560 // values as float64. See if value has 't' or 'f' as initial screen to 561 // minimize calls to ParseBool; also, see if len(s) < 6. 562 if castToBool { 563 if len(s) > 0 && len(s) < 6 { 564 switch s[:1] { 565 case "t", "T", "f", "F": 566 if b, err := strconv.ParseBool(s); err == nil { 567 return b 568 } 569 } 570 } 571 } 572 } 573 return s 574} 575 576// pull request, #59 577var castToFloat = true 578 579// CastValuesToFloat can be used to skip casting to float64 when 580// "cast" argument is 'true' in NewMapXml, etc. 581// Default is true. 582func CastValuesToFloat(b ...bool) { 583 if len(b) == 0 { 584 castToFloat = !castToFloat 585 } else if len(b) == 1 { 586 castToFloat = b[0] 587 } 588} 589 590var castToBool = true 591 592// CastValuesToBool can be used to skip casting to bool when 593// "cast" argument is 'true' in NewMapXml, etc. 594// Default is true. 595func CastValuesToBool(b ...bool) { 596 if len(b) == 0 { 597 castToBool = !castToBool 598 } else if len(b) == 1 { 599 castToBool = b[0] 600 } 601} 602 603// checkTagToSkip - switch to address Issue #58 604 605var checkTagToSkip func(string) bool 606 607// SetCheckTagToSkipFunc registers function to test whether the value 608// for a tag should be cast to bool or float64 when "cast" argument is 'true'. 609// (Dot tag path notation is not supported.) 610// NOTE: key may be "#text" if it's a simple element with attributes 611// or "decodeSimpleValuesAsMap == true". 612// NOTE: does not apply to NewMapXmlSeq... functions. 613func SetCheckTagToSkipFunc(fn func(string) bool) { 614 checkTagToSkip = fn 615} 616 617// ------------------ END: NewMapXml & NewMapXmlReader ------------------------- 618 619// ------------------ mv.Xml & mv.XmlWriter - from j2x ------------------------ 620 621const ( 622 DefaultRootTag = "doc" 623) 624 625var useGoXmlEmptyElemSyntax bool 626 627// XmlGoEmptyElemSyntax() - <tag ...></tag> rather than <tag .../>. 628// Go's encoding/xml package marshals empty XML elements as <tag ...></tag>. By default this package 629// encodes empty elements as <tag .../>. If you're marshaling Map values that include structures 630// (which are passed to xml.Marshal for encoding), this will let you conform to the standard package. 631func XmlGoEmptyElemSyntax() { 632 useGoXmlEmptyElemSyntax = true 633} 634 635// XmlDefaultEmptyElemSyntax() - <tag .../> rather than <tag ...></tag>. 636// Return XML encoding for empty elements to the default package setting. 637// Reverses effect of XmlGoEmptyElemSyntax(). 638func XmlDefaultEmptyElemSyntax() { 639 useGoXmlEmptyElemSyntax = false 640} 641 642// ------- issue #88 ---------- 643// xmlCheckIsValid set switch to force decoding the encoded XML to 644// see if it is valid XML. 645var xmlCheckIsValid bool 646 647// XmlCheckIsValid forces the encoded XML to be checked for validity. 648func XmlCheckIsValid(b ...bool) { 649 if len(b) == 1 { 650 xmlCheckIsValid = b[0] 651 return 652 } 653 xmlCheckIsValid = !xmlCheckIsValid 654} 655 656// Encode a Map as XML. The companion of NewMapXml(). 657// The following rules apply. 658// - The key label "#text" is treated as the value for a simple element with attributes. 659// - Map keys that begin with a hyphen, '-', are interpreted as attributes. 660// It is an error if the attribute doesn't have a []byte, string, number, or boolean value. 661// - Map value type encoding: 662// > string, bool, float64, int, int32, int64, float32: per "%v" formating 663// > []bool, []uint8: by casting to string 664// > structures, etc.: handed to xml.Marshal() - if there is an error, the element 665// value is "UNKNOWN" 666// - Elements with only attribute values or are null are terminated using "/>". 667// - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible. 668// Thus, `{ "key":"value" }` encodes as "<key>value</key>". 669// - To encode empty elements in a syntax consistent with encoding/xml call UseGoXmlEmptyElementSyntax(). 670// The attributes tag=value pairs are alphabetized by "tag". Also, when encoding map[string]interface{} values - 671// complex elements, etc. - the key:value pairs are alphabetized by key so the resulting tags will appear sorted. 672func (mv Map) Xml(rootTag ...string) ([]byte, error) { 673 m := map[string]interface{}(mv) 674 var err error 675 b := new(bytes.Buffer) 676 p := new(pretty) // just a stub 677 678 if len(m) == 1 && len(rootTag) == 0 { 679 for key, value := range m { 680 // if it an array, see if all values are map[string]interface{} 681 // we force a new root tag if we'll end up with no key:value in the list 682 // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc> 683 switch value.(type) { 684 case []interface{}: 685 for _, v := range value.([]interface{}) { 686 switch v.(type) { 687 case map[string]interface{}: // noop 688 default: // anything else 689 err = marshalMapToXmlIndent(false, b, DefaultRootTag, m, p) 690 goto done 691 } 692 } 693 } 694 err = marshalMapToXmlIndent(false, b, key, value, p) 695 } 696 } else if len(rootTag) == 1 { 697 err = marshalMapToXmlIndent(false, b, rootTag[0], m, p) 698 } else { 699 err = marshalMapToXmlIndent(false, b, DefaultRootTag, m, p) 700 } 701done: 702 if xmlCheckIsValid { 703 d := xml.NewDecoder(bytes.NewReader(b.Bytes())) 704 for { 705 _, err = d.Token() 706 if err == io.EOF { 707 err = nil 708 break 709 } else if err != nil { 710 return nil, err 711 } 712 } 713 } 714 return b.Bytes(), err 715} 716 717// The following implementation is provided only for symmetry with NewMapXmlReader[Raw] 718// The names will also provide a key for the number of return arguments. 719 720// Writes the Map as XML on the Writer. 721// See Xml() for encoding rules. 722func (mv Map) XmlWriter(xmlWriter io.Writer, rootTag ...string) error { 723 x, err := mv.Xml(rootTag...) 724 if err != nil { 725 return err 726 } 727 728 _, err = xmlWriter.Write(x) 729 return err 730} 731 732// Writes the Map as XML on the Writer. []byte is the raw XML that was written. 733// See Xml() for encoding rules. 734/* 735func (mv Map) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) { 736 x, err := mv.Xml(rootTag...) 737 if err != nil { 738 return x, err 739 } 740 741 _, err = xmlWriter.Write(x) 742 return x, err 743} 744*/ 745 746// Writes the Map as pretty XML on the Writer. 747// See Xml() for encoding rules. 748func (mv Map) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error { 749 x, err := mv.XmlIndent(prefix, indent, rootTag...) 750 if err != nil { 751 return err 752 } 753 754 _, err = xmlWriter.Write(x) 755 return err 756} 757 758// Writes the Map as pretty XML on the Writer. []byte is the raw XML that was written. 759// See Xml() for encoding rules. 760/* 761func (mv Map) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) { 762 x, err := mv.XmlIndent(prefix, indent, rootTag...) 763 if err != nil { 764 return x, err 765 } 766 767 _, err = xmlWriter.Write(x) 768 return x, err 769} 770*/ 771 772// -------------------- END: mv.Xml & mv.XmlWriter ------------------------------- 773 774// -------------- Handle XML stream by processing Map value -------------------- 775 776// Default poll delay to keep Handler from spinning on an open stream 777// like sitting on os.Stdin waiting for imput. 778var xhandlerPollInterval = time.Millisecond 779 780// Bulk process XML using handlers that process a Map value. 781// 'rdr' is an io.Reader for XML (stream) 782// 'mapHandler' is the Map processor. Return of 'false' stops io.Reader processing. 783// 'errHandler' is the error processor. Return of 'false' stops io.Reader processing and returns the error. 784// Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized. 785// This means that you can stop reading the file on error or after processing a particular message. 786// To have reading and handling run concurrently, pass argument to a go routine in handler and return 'true'. 787func HandleXmlReader(xmlReader io.Reader, mapHandler func(Map) bool, errHandler func(error) bool) error { 788 var n int 789 for { 790 m, merr := NewMapXmlReader(xmlReader) 791 n++ 792 793 // handle error condition with errhandler 794 if merr != nil && merr != io.EOF { 795 merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error()) 796 if ok := errHandler(merr); !ok { 797 // caused reader termination 798 return merr 799 } 800 continue 801 } 802 803 // pass to maphandler 804 if len(m) != 0 { 805 if ok := mapHandler(m); !ok { 806 break 807 } 808 } else if merr != io.EOF { 809 time.Sleep(xhandlerPollInterval) 810 } 811 812 if merr == io.EOF { 813 break 814 } 815 } 816 return nil 817} 818 819// Bulk process XML using handlers that process a Map value and the raw XML. 820// 'rdr' is an io.Reader for XML (stream) 821// 'mapHandler' is the Map and raw XML - []byte - processor. Return of 'false' stops io.Reader processing. 822// 'errHandler' is the error and raw XML processor. Return of 'false' stops io.Reader processing and returns the error. 823// Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized. 824// This means that you can stop reading the file on error or after processing a particular message. 825// To have reading and handling run concurrently, pass argument(s) to a go routine in handler and return 'true'. 826// See NewMapXmlReaderRaw for comment on performance associated with retrieving raw XML from a Reader. 827func HandleXmlReaderRaw(xmlReader io.Reader, mapHandler func(Map, []byte) bool, errHandler func(error, []byte) bool) error { 828 var n int 829 for { 830 m, raw, merr := NewMapXmlReaderRaw(xmlReader) 831 n++ 832 833 // handle error condition with errhandler 834 if merr != nil && merr != io.EOF { 835 merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error()) 836 if ok := errHandler(merr, raw); !ok { 837 // caused reader termination 838 return merr 839 } 840 continue 841 } 842 843 // pass to maphandler 844 if len(m) != 0 { 845 if ok := mapHandler(m, raw); !ok { 846 break 847 } 848 } else if merr != io.EOF { 849 time.Sleep(xhandlerPollInterval) 850 } 851 852 if merr == io.EOF { 853 break 854 } 855 } 856 return nil 857} 858 859// ----------------- END: Handle XML stream by processing Map value -------------- 860 861// -------- a hack of io.TeeReader ... need one that's an io.ByteReader for xml.NewDecoder() ---------- 862 863// This is a clone of io.TeeReader with the additional method t.ReadByte(). 864// Thus, this TeeReader is also an io.ByteReader. 865// This is necessary because xml.NewDecoder uses a ByteReader not a Reader. It appears to have been written 866// with bufio.Reader or bytes.Reader in mind ... not a generic io.Reader, which doesn't have to have ReadByte().. 867// If NewDecoder is passed a Reader that does not satisfy ByteReader() it wraps the Reader with 868// bufio.NewReader and uses ReadByte rather than Read that runs the TeeReader pipe logic. 869 870type teeReader struct { 871 r io.Reader 872 w io.Writer 873 b []byte 874} 875 876func myTeeReader(r io.Reader, w io.Writer) io.Reader { 877 b := make([]byte, 1) 878 return &teeReader{r, w, b} 879} 880 881// need for io.Reader - but we don't use it ... 882func (t *teeReader) Read(p []byte) (int, error) { 883 return 0, nil 884} 885 886func (t *teeReader) ReadByte() (byte, error) { 887 n, err := t.r.Read(t.b) 888 if n > 0 { 889 if _, err := t.w.Write(t.b[:1]); err != nil { 890 return t.b[0], err 891 } 892 } 893 return t.b[0], err 894} 895 896// For use with NewMapXmlReader & NewMapXmlSeqReader. 897type byteReader struct { 898 r io.Reader 899 b []byte 900} 901 902func myByteReader(r io.Reader) io.Reader { 903 b := make([]byte, 1) 904 return &byteReader{r, b} 905} 906 907// Need for io.Reader interface ... 908// Needed if reading a malformed http.Request.Body - issue #38. 909func (b *byteReader) Read(p []byte) (int, error) { 910 return b.r.Read(p) 911} 912 913func (b *byteReader) ReadByte() (byte, error) { 914 _, err := b.r.Read(b.b) 915 if len(b.b) > 0 { 916 return b.b[0], nil 917 } 918 var c byte 919 return c, err 920} 921 922// ----------------------- END: io.TeeReader hack ----------------------------------- 923 924// ---------------------- XmlIndent - from j2x package ---------------------------- 925 926// Encode a map[string]interface{} as a pretty XML string. 927// See Xml for encoding rules. 928func (mv Map) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) { 929 m := map[string]interface{}(mv) 930 931 var err error 932 b := new(bytes.Buffer) 933 p := new(pretty) 934 p.indent = indent 935 p.padding = prefix 936 937 if len(m) == 1 && len(rootTag) == 0 { 938 // this can extract the key for the single map element 939 // use it if it isn't a key for a list 940 for key, value := range m { 941 if _, ok := value.([]interface{}); ok { 942 err = marshalMapToXmlIndent(true, b, DefaultRootTag, m, p) 943 } else { 944 err = marshalMapToXmlIndent(true, b, key, value, p) 945 } 946 } 947 } else if len(rootTag) == 1 { 948 err = marshalMapToXmlIndent(true, b, rootTag[0], m, p) 949 } else { 950 err = marshalMapToXmlIndent(true, b, DefaultRootTag, m, p) 951 } 952 if xmlCheckIsValid { 953 d := xml.NewDecoder(bytes.NewReader(b.Bytes())) 954 for { 955 _, err = d.Token() 956 if err == io.EOF { 957 err = nil 958 break 959 } else if err != nil { 960 return nil, err 961 } 962 } 963 } 964 return b.Bytes(), err 965} 966 967type pretty struct { 968 indent string 969 cnt int 970 padding string 971 mapDepth int 972 start int 973} 974 975func (p *pretty) Indent() { 976 p.padding += p.indent 977 p.cnt++ 978} 979 980func (p *pretty) Outdent() { 981 if p.cnt > 0 { 982 p.padding = p.padding[:len(p.padding)-len(p.indent)] 983 p.cnt-- 984 } 985} 986 987// where the work actually happens 988// returns an error if an attribute is not atomic 989// NOTE: 01may20 - replaces mapToXmlIndent(); uses bytes.Buffer instead for string appends. 990func marshalMapToXmlIndent(doIndent bool, b *bytes.Buffer, key string, value interface{}, pp *pretty) error { 991 var err error 992 var endTag bool 993 var isSimple bool 994 var elen int 995 p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start} 996 997 // per issue #48, 18apr18 - try and coerce maps to map[string]interface{} 998 // Don't need for mapToXmlSeqIndent, since maps there are decoded by NewMapXmlSeq(). 999 if reflect.ValueOf(value).Kind() == reflect.Map { 1000 switch value.(type) { 1001 case map[string]interface{}: 1002 default: 1003 val := make(map[string]interface{}) 1004 vv := reflect.ValueOf(value) 1005 keys := vv.MapKeys() 1006 for _, k := range keys { 1007 val[fmt.Sprint(k)] = vv.MapIndex(k).Interface() 1008 } 1009 value = val 1010 } 1011 } 1012 1013 // 14jul20. The following block of code has become something of a catch all for odd stuff 1014 // that might be passed in as a result of casting an arbitrary map[<T>]<T> to an mxj.Map 1015 // value and then call m.Xml or m.XmlIndent. See issue #71 (and #73) for such edge cases. 1016 switch value.(type) { 1017 // these types are handled during encoding 1018 case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32, json.Number: 1019 case []map[string]interface{}, []string, []float64, []bool, []int, []int32, []int64, []float32, []json.Number: 1020 case []interface{}: 1021 case nil: 1022 value = "" 1023 default: 1024 // see if value is a struct, if so marshal using encoding/xml package 1025 if reflect.ValueOf(value).Kind() == reflect.Struct { 1026 if v, err := xml.Marshal(value); err != nil { 1027 return err 1028 } else { 1029 value = string(v) 1030 } 1031 } else { 1032 // coerce eveything else into a string value 1033 value = fmt.Sprint(value) 1034 } 1035 } 1036 1037 // start the XML tag with required indentaton and padding 1038 if doIndent { 1039 if _, err = b.WriteString(p.padding); err != nil { 1040 return err 1041 } 1042 } 1043 switch value.(type) { 1044 case []interface{}: 1045 default: 1046 if _, err = b.WriteString(`<` + key); err != nil { 1047 return err 1048 } 1049 } 1050 1051 switch value.(type) { 1052 case map[string]interface{}: 1053 vv := value.(map[string]interface{}) 1054 lenvv := len(vv) 1055 // scan out attributes - attribute keys have prepended attrPrefix 1056 attrlist := make([][2]string, len(vv)) 1057 var n int 1058 var ss string 1059 for k, v := range vv { 1060 if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix { 1061 switch v.(type) { 1062 case string: 1063 if xmlEscapeChars { 1064 ss = escapeChars(v.(string)) 1065 } else { 1066 ss = v.(string) 1067 } 1068 attrlist[n][0] = k[lenAttrPrefix:] 1069 attrlist[n][1] = ss 1070 case float64, bool, int, int32, int64, float32, json.Number: 1071 attrlist[n][0] = k[lenAttrPrefix:] 1072 attrlist[n][1] = fmt.Sprintf("%v", v) 1073 case []byte: 1074 if xmlEscapeChars { 1075 ss = escapeChars(string(v.([]byte))) 1076 } else { 1077 ss = string(v.([]byte)) 1078 } 1079 attrlist[n][0] = k[lenAttrPrefix:] 1080 attrlist[n][1] = ss 1081 default: 1082 return fmt.Errorf("invalid attribute value for: %s:<%T>", k, v) 1083 } 1084 n++ 1085 } 1086 } 1087 if n > 0 { 1088 attrlist = attrlist[:n] 1089 sort.Sort(attrList(attrlist)) 1090 for _, v := range attrlist { 1091 if _, err = b.WriteString(` ` + v[0] + `="` + v[1] + `"`); err != nil { 1092 return err 1093 } 1094 } 1095 } 1096 // only attributes? 1097 if n == lenvv { 1098 if useGoXmlEmptyElemSyntax { 1099 if _, err = b.WriteString(`</` + key + ">"); err != nil { 1100 return err 1101 } 1102 } else { 1103 if _, err = b.WriteString(`/>`); err != nil { 1104 return err 1105 } 1106 } 1107 break 1108 } 1109 1110 // simple element? Note: '#text" is an invalid XML tag. 1111 isComplex := false 1112 if v, ok := vv["#text"]; ok && n+1 == lenvv { 1113 // just the value and attributes 1114 switch v.(type) { 1115 case string: 1116 if xmlEscapeChars { 1117 v = escapeChars(v.(string)) 1118 } else { 1119 v = v.(string) 1120 } 1121 case []byte: 1122 if xmlEscapeChars { 1123 v = escapeChars(string(v.([]byte))) 1124 } else { 1125 v = string(v.([]byte)) 1126 } 1127 } 1128 if _, err = b.WriteString(">" + fmt.Sprintf("%v", v)); err != nil { 1129 return err 1130 } 1131 endTag = true 1132 elen = 1 1133 isSimple = true 1134 break 1135 } else if ok { 1136 // need to handle when there are subelements in addition to the simple element value 1137 // issue #90 1138 switch v.(type) { 1139 case string: 1140 if xmlEscapeChars { 1141 v = escapeChars(v.(string)) 1142 } else { 1143 v = v.(string) 1144 } 1145 case []byte: 1146 if xmlEscapeChars { 1147 v = escapeChars(string(v.([]byte))) 1148 } else { 1149 v = string(v.([]byte)) 1150 } 1151 } 1152 if _, err = b.WriteString(">" + fmt.Sprintf("%v", v)); err != nil { 1153 return err 1154 } 1155 isComplex = true 1156 } 1157 1158 // close tag with possible attributes 1159 if !isComplex { 1160 if _, err = b.WriteString(">"); err != nil { 1161 return err 1162 } 1163 } 1164 if doIndent { 1165 // *s += "\n" 1166 if _, err = b.WriteString("\n"); err != nil { 1167 return err 1168 } 1169 } 1170 // something more complex 1171 p.mapDepth++ 1172 // extract the map k:v pairs and sort on key 1173 elemlist := make([][2]interface{}, len(vv)) 1174 n = 0 1175 for k, v := range vv { 1176 if k == "#text" { 1177 // simple element handled above 1178 continue 1179 } 1180 if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix { 1181 continue 1182 } 1183 elemlist[n][0] = k 1184 elemlist[n][1] = v 1185 n++ 1186 } 1187 elemlist = elemlist[:n] 1188 sort.Sort(elemList(elemlist)) 1189 var i int 1190 for _, v := range elemlist { 1191 switch v[1].(type) { 1192 case []interface{}: 1193 default: 1194 if i == 0 && doIndent { 1195 p.Indent() 1196 } 1197 } 1198 i++ 1199 if err := marshalMapToXmlIndent(doIndent, b, v[0].(string), v[1], p); err != nil { 1200 return err 1201 } 1202 switch v[1].(type) { 1203 case []interface{}: // handled in []interface{} case 1204 default: 1205 if doIndent { 1206 p.Outdent() 1207 } 1208 } 1209 i-- 1210 } 1211 p.mapDepth-- 1212 endTag = true 1213 elen = 1 // we do have some content ... 1214 case []interface{}: 1215 // special case - found during implementing Issue #23 1216 if len(value.([]interface{})) == 0 { 1217 if doIndent { 1218 if _, err = b.WriteString(p.padding + p.indent); err != nil { 1219 return err 1220 } 1221 } 1222 if _, err = b.WriteString("<" + key); err != nil { 1223 return err 1224 } 1225 elen = 0 1226 endTag = true 1227 break 1228 } 1229 for _, v := range value.([]interface{}) { 1230 if doIndent { 1231 p.Indent() 1232 } 1233 if err := marshalMapToXmlIndent(doIndent, b, key, v, p); err != nil { 1234 return err 1235 } 1236 if doIndent { 1237 p.Outdent() 1238 } 1239 } 1240 return nil 1241 case []string: 1242 // This was added by https://github.com/slotix ... not a type that 1243 // would be encountered if mv generated from NewMapXml, NewMapJson. 1244 // Could be encountered in AnyXml(), so we'll let it stay, though 1245 // it should be merged with case []interface{}, above. 1246 //quick fix for []string type 1247 //[]string should be treated exaclty as []interface{} 1248 if len(value.([]string)) == 0 { 1249 if doIndent { 1250 if _, err = b.WriteString(p.padding + p.indent); err != nil { 1251 return err 1252 } 1253 } 1254 if _, err = b.WriteString("<" + key); err != nil { 1255 return err 1256 } 1257 elen = 0 1258 endTag = true 1259 break 1260 } 1261 for _, v := range value.([]string) { 1262 if doIndent { 1263 p.Indent() 1264 } 1265 if err := marshalMapToXmlIndent(doIndent, b, key, v, p); err != nil { 1266 return err 1267 } 1268 if doIndent { 1269 p.Outdent() 1270 } 1271 } 1272 return nil 1273 case nil: 1274 // terminate the tag 1275 if doIndent { 1276 // *s += p.padding 1277 if _, err = b.WriteString(p.padding); err != nil { 1278 return err 1279 } 1280 } 1281 if _, err = b.WriteString("<" + key); err != nil { 1282 return err 1283 } 1284 endTag, isSimple = true, true 1285 break 1286 default: // handle anything - even goofy stuff 1287 elen = 0 1288 switch value.(type) { 1289 case string: 1290 v := value.(string) 1291 if xmlEscapeChars { 1292 v = escapeChars(v) 1293 } 1294 elen = len(v) 1295 if elen > 0 { 1296 // *s += ">" + v 1297 if _, err = b.WriteString(">" + v); err != nil { 1298 return err 1299 } 1300 } 1301 case float64, bool, int, int32, int64, float32, json.Number: 1302 v := fmt.Sprintf("%v", value) 1303 elen = len(v) // always > 0 1304 if _, err = b.WriteString(">" + v); err != nil { 1305 return err 1306 } 1307 case []byte: // NOTE: byte is just an alias for uint8 1308 // similar to how xml.Marshal handles []byte structure members 1309 v := string(value.([]byte)) 1310 if xmlEscapeChars { 1311 v = escapeChars(v) 1312 } 1313 elen = len(v) 1314 if elen > 0 { 1315 // *s += ">" + v 1316 if _, err = b.WriteString(">" + v); err != nil { 1317 return err 1318 } 1319 } 1320 default: 1321 if _, err = b.WriteString(">"); err != nil { 1322 return err 1323 } 1324 var v []byte 1325 var err error 1326 if doIndent { 1327 v, err = xml.MarshalIndent(value, p.padding, p.indent) 1328 } else { 1329 v, err = xml.Marshal(value) 1330 } 1331 if err != nil { 1332 if _, err = b.WriteString(">UNKNOWN"); err != nil { 1333 return err 1334 } 1335 } else { 1336 elen = len(v) 1337 if elen > 0 { 1338 if _, err = b.Write(v); err != nil { 1339 return err 1340 } 1341 } 1342 } 1343 } 1344 isSimple = true 1345 endTag = true 1346 } 1347 if endTag { 1348 if doIndent { 1349 if !isSimple { 1350 if _, err = b.WriteString(p.padding); err != nil { 1351 return err 1352 } 1353 } 1354 } 1355 if elen > 0 || useGoXmlEmptyElemSyntax { 1356 if elen == 0 { 1357 if _, err = b.WriteString(">"); err != nil { 1358 return err 1359 } 1360 } 1361 if _, err = b.WriteString(`</` + key + ">"); err != nil { 1362 return err 1363 } 1364 } else { 1365 if _, err = b.WriteString(`/>`); err != nil { 1366 return err 1367 } 1368 } 1369 } 1370 if doIndent { 1371 if p.cnt > p.start { 1372 if _, err = b.WriteString("\n"); err != nil { 1373 return err 1374 } 1375 } 1376 p.Outdent() 1377 } 1378 1379 return nil 1380} 1381 1382// ============================ sort interface implementation ================= 1383 1384type attrList [][2]string 1385 1386func (a attrList) Len() int { 1387 return len(a) 1388} 1389 1390func (a attrList) Swap(i, j int) { 1391 a[i], a[j] = a[j], a[i] 1392} 1393 1394func (a attrList) Less(i, j int) bool { 1395 return a[i][0] <= a[j][0] 1396} 1397 1398type elemList [][2]interface{} 1399 1400func (e elemList) Len() int { 1401 return len(e) 1402} 1403 1404func (e elemList) Swap(i, j int) { 1405 e[i], e[j] = e[j], e[i] 1406} 1407 1408func (e elemList) Less(i, j int) bool { 1409 return e[i][0].(string) <= e[j][0].(string) 1410} 1411