1// Copyright 2012-2016 Charles Banning. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file 4 5// xml.go - basically the core of X2j for map[string]interface{} values. 6// NewMapXml, NewMapXmlReader, mv.Xml, mv.XmlWriter 7// see x2j and j2x for wrappers to provide end-to-end transformation of XML and JSON messages. 8 9package mxj 10 11import ( 12 "bytes" 13 "encoding/json" 14 "encoding/xml" 15 "errors" 16 "fmt" 17 "io" 18 "reflect" 19 "sort" 20 "strconv" 21 "strings" 22 "time" 23) 24 25// ------------------- NewMapXml & NewMapXmlReader ... ------------------------- 26 27// If XmlCharsetReader != nil, it will be used to decode the XML, if required. 28// Note: if CustomDecoder != nil, then XmlCharsetReader is ignored; 29// set the CustomDecoder attribute instead. 30// import ( 31// charset "code.google.com/p/go-charset/charset" 32// github.com/clbanning/mxj 33// ) 34// ... 35// mxj.XmlCharsetReader = charset.NewReader 36// m, merr := mxj.NewMapXml(xmlValue) 37var XmlCharsetReader func(charset string, input io.Reader) (io.Reader, error) 38 39// NewMapXml - convert a XML doc into a Map 40// (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().) 41// If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible. 42// 43// Converting XML to JSON is a simple as: 44// ... 45// mapVal, merr := mxj.NewMapXml(xmlVal) 46// if merr != nil { 47// // handle error 48// } 49// jsonVal, jerr := mapVal.Json() 50// if jerr != nil { 51// // handle error 52// } 53// 54// NOTES: 55// 1. The 'xmlVal' will be parsed looking for an xml.StartElement, so BOM and other 56// extraneous xml.CharData will be ignored unless io.EOF is reached first. 57// 2. If CoerceKeysToLower() has been called, then all key values will be lower case. 58// 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. 59func NewMapXml(xmlVal []byte, cast ...bool) (Map, error) { 60 var r bool 61 if len(cast) == 1 { 62 r = cast[0] 63 } 64 return xmlToMap(xmlVal, r) 65} 66 67// Get next XML doc from an io.Reader as a Map value. Returns Map value. 68// NOTES: 69// 1. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other 70// extraneous xml.CharData will be ignored unless io.EOF is reached first. 71// 2. If CoerceKeysToLower() has been called, then all key values will be lower case. 72// 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. 73func NewMapXmlReader(xmlReader io.Reader, cast ...bool) (Map, error) { 74 var r bool 75 if len(cast) == 1 { 76 r = cast[0] 77 } 78 79 // We need to put an *os.File reader in a ByteReader or the xml.NewDecoder 80 // will wrap it in a bufio.Reader and seek on the file beyond where the 81 // xml.Decoder parses! 82 if _, ok := xmlReader.(io.ByteReader); !ok { 83 xmlReader = myByteReader(xmlReader) // see code at EOF 84 } 85 86 // build the map 87 return xmlReaderToMap(xmlReader, r) 88} 89 90// Get next XML doc from an io.Reader as a Map value. Returns Map value and slice with the raw XML. 91// NOTES: 92// 1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte 93// using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact. 94// See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large 95// data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body 96// you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call. 97// 2. The 'raw' return value may be larger than the XML text value. 98// 3. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other 99// extraneous xml.CharData will be ignored unless io.EOF is reached first. 100// 4. If CoerceKeysToLower() has been called, then all key values will be lower case. 101// 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. 102func NewMapXmlReaderRaw(xmlReader io.Reader, cast ...bool) (Map, []byte, error) { 103 var r bool 104 if len(cast) == 1 { 105 r = cast[0] 106 } 107 // create TeeReader so we can retrieve raw XML 108 buf := make([]byte, 0) 109 wb := bytes.NewBuffer(buf) 110 trdr := myTeeReader(xmlReader, wb) // see code at EOF 111 112 m, err := xmlReaderToMap(trdr, r) 113 114 // retrieve the raw XML that was decoded 115 b := wb.Bytes() 116 117 if err != nil { 118 return nil, b, err 119 } 120 121 return m, b, nil 122} 123 124// xmlReaderToMap() - parse a XML io.Reader to a map[string]interface{} value 125func xmlReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) { 126 // parse the Reader 127 p := xml.NewDecoder(rdr) 128 if CustomDecoder != nil { 129 useCustomDecoder(p) 130 } else { 131 p.CharsetReader = XmlCharsetReader 132 } 133 return xmlToMapParser("", nil, p, r) 134} 135 136// xmlToMap - convert a XML doc into map[string]interface{} value 137func xmlToMap(doc []byte, r bool) (map[string]interface{}, error) { 138 b := bytes.NewReader(doc) 139 p := xml.NewDecoder(b) 140 if CustomDecoder != nil { 141 useCustomDecoder(p) 142 } else { 143 p.CharsetReader = XmlCharsetReader 144 } 145 return xmlToMapParser("", nil, p, r) 146} 147 148// ===================================== where the work happens ============================= 149 150// PrependAttrWithHyphen. Prepend attribute tags with a hyphen. 151// Default is 'true'. (Not applicable to NewMapXmlSeq(), mv.XmlSeq(), etc.) 152// Note: 153// If 'false', unmarshaling and marshaling is not symmetric. Attributes will be 154// marshal'd as <attr_tag>attr</attr_tag> and may be part of a list. 155func PrependAttrWithHyphen(v bool) { 156 if v { 157 attrPrefix = "-" 158 lenAttrPrefix = len(attrPrefix) 159 return 160 } 161 attrPrefix = "" 162 lenAttrPrefix = len(attrPrefix) 163} 164 165// Include sequence id with inner tags. - per Sean Murphy, murphysean84@gmail.com. 166var includeTagSeqNum bool 167 168// IncludeTagSeqNum - include a "_seq":N key:value pair with each inner tag, denoting 169// its position when parsed. This is of limited usefulness, since list values cannot 170// be tagged with "_seq" without changing their depth in the Map. 171// So THIS SHOULD BE USED WITH CAUTION - see the test cases. Here's a sample of what 172// you get. 173/* 174 <Obj c="la" x="dee" h="da"> 175 <IntObj id="3"/> 176 <IntObj1 id="1"/> 177 <IntObj id="2"/> 178 <StrObj>hello</StrObj> 179 </Obj> 180 181 parses as: 182 183 { 184 Obj:{ 185 "-c":"la", 186 "-h":"da", 187 "-x":"dee", 188 "intObj":[ 189 { 190 "-id"="3", 191 "_seq":"0" // if mxj.Cast is passed, then: "_seq":0 192 }, 193 { 194 "-id"="2", 195 "_seq":"2" 196 }], 197 "intObj1":{ 198 "-id":"1", 199 "_seq":"1" 200 }, 201 "StrObj":{ 202 "#text":"hello", // simple element value gets "#text" tag 203 "_seq":"3" 204 } 205 } 206 } 207*/ 208func IncludeTagSeqNum(b bool) { 209 includeTagSeqNum = b 210} 211 212// all keys will be "lower case" 213var lowerCase bool 214 215// Coerce all tag values to keys in lower case. This is useful if you've got sources with variable 216// tag capitalization, and you want to use m.ValuesForKeys(), etc., with the key or path spec 217// in lower case. 218// CoerceKeysToLower() will toggle the coercion flag true|false - on|off 219// CoerceKeysToLower(true|false) will set the coercion flag on|off 220// 221// NOTE: only recognized by NewMapXml, NewMapXmlReader, and NewMapXmlReaderRaw functions as well as 222// the associated HandleXmlReader and HandleXmlReaderRaw. 223func CoerceKeysToLower(b ...bool) { 224 if len(b) == 0 { 225 lowerCase = !lowerCase 226 } else if len(b) == 1 { 227 lowerCase = b[0] 228 } 229} 230 231// 25jun16: Allow user to specify the "prefix" character for XML attribute key labels. 232// We do this by replacing '`' constant with attrPrefix var, replacing useHyphen with attrPrefix = "", 233// and adding a SetAttrPrefix(s string) function. 234 235var attrPrefix string = `-` // the default 236var lenAttrPrefix int = 1 // the default 237 238// SetAttrPrefix changes the default, "-", to the specified value, s. 239// SetAttrPrefix("") is the same as PrependAttrWithHyphen(false). 240// (Not applicable for NewMapXmlSeq(), mv.XmlSeq(), etc.) 241func SetAttrPrefix(s string) { 242 attrPrefix = s 243 lenAttrPrefix = len(attrPrefix) 244} 245 246// 18jan17: Allows user to specify if the map keys should be in snake case instead 247// of the default hyphenated notation. 248var snakeCaseKeys bool 249 250// CoerceKeysToSnakeCase changes the default, false, to the specified value, b. 251// Note: the attribute prefix will be a hyphen, '-', or what ever string value has 252// been specified using SetAttrPrefix. 253func CoerceKeysToSnakeCase(b ...bool) { 254 if len(b) == 0 { 255 snakeCaseKeys = !snakeCaseKeys 256 } else if len(b) == 1 { 257 snakeCaseKeys = b[0] 258 } 259} 260 261// 05feb17: support processing XMPP streams (issue #36) 262var handleXMPPStreamTag bool 263 264// HandleXMPPStreamTag causes decoder to parse XMPP <stream:stream> elements. 265// If called with no argument, XMPP stream element handling is toggled on/off. 266// (See xmppStream_test.go for example.) 267// If called with NewMapXml, NewMapXmlReader, New MapXmlReaderRaw the "stream" 268// element will be returned as: 269// map["stream"]interface{}{map[-<attrs>]interface{}}. 270// If called with NewMapSeq, NewMapSeqReader, NewMapSeqReaderRaw the "stream" 271// element will be returned as: 272// map["stream:stream"]interface{}{map["#attr"]interface{}{map[string]interface{}}} 273// where the "#attr" values have "#text" and "#seq" keys. (See NewMapXmlSeq.) 274func HandleXMPPStreamTag(b ...bool) { 275 if len(b) == 0 { 276 handleXMPPStreamTag = !handleXMPPStreamTag 277 } else if len(b) == 1 { 278 handleXMPPStreamTag = b[0] 279 } 280} 281 282// 21jan18 - decode all values as map["#text":value] (issue #56) 283var decodeSimpleValuesAsMap bool 284 285// DecodeSimpleValuesAsMap forces all values to be decoded as map["#text":<value>]. 286// If called with no argument, the decoding is toggled on/off. 287// 288// By default the NewMapXml functions decode simple values without attributes as 289// map[<tag>:<value>]. This function causes simple values without attributes to be 290// decoded the same as simple values with attributes - map[<tag>:map["#text":<value>]]. 291func DecodeSimpleValuesAsMap(b ...bool) { 292 if len(b) == 0 { 293 decodeSimpleValuesAsMap = !decodeSimpleValuesAsMap 294 } else if len(b) == 1 { 295 decodeSimpleValuesAsMap = b[0] 296 } 297} 298 299// xmlToMapParser (2015.11.12) - load a 'clean' XML doc into a map[string]interface{} directly. 300// A refactoring of xmlToTreeParser(), markDuplicate() and treeToMap() - here, all-in-one. 301// We've removed the intermediate *node tree with the allocation and subsequent rescanning. 302func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) { 303 if lowerCase { 304 skey = strings.ToLower(skey) 305 } 306 if snakeCaseKeys { 307 skey = strings.Replace(skey, "-", "_", -1) 308 } 309 310 // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'. 311 // Unless 'skey' is a simple element w/o attributes, in which case the xml.CharData value is the value. 312 var n, na map[string]interface{} 313 var seq int // for includeTagSeqNum 314 315 // Allocate maps and load attributes, if any. 316 // NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through 317 // to get StartElement then recurse with skey==xml.StartElement.Name.Local 318 // where we begin allocating map[string]interface{} values 'n' and 'na'. 319 if skey != "" { 320 n = make(map[string]interface{}) // old n 321 na = make(map[string]interface{}) // old n.nodes 322 if len(a) > 0 { 323 for _, v := range a { 324 if snakeCaseKeys { 325 v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1) 326 } 327 var key string 328 key = attrPrefix + v.Name.Local 329 if lowerCase { 330 key = strings.ToLower(key) 331 } 332 na[key] = cast(v.Value, r) 333 } 334 } 335 } 336 // Return XMPP <stream:stream> message. 337 if handleXMPPStreamTag && skey == "stream" { 338 n[skey] = na 339 return n, nil 340 } 341 342 for { 343 t, err := p.Token() 344 if err != nil { 345 if err != io.EOF { 346 return nil, errors.New("xml.Decoder.Token() - " + err.Error()) 347 } 348 return nil, err 349 } 350 switch t.(type) { 351 case xml.StartElement: 352 tt := t.(xml.StartElement) 353 354 // First call to xmlToMapParser() doesn't pass xml.StartElement - the map key. 355 // So when the loop is first entered, the first token is the root tag along 356 // with any attributes, which we process here. 357 // 358 // Subsequent calls to xmlToMapParser() will pass in tag+attributes for 359 // processing before getting the next token which is the element value, 360 // which is done above. 361 if skey == "" { 362 return xmlToMapParser(tt.Name.Local, tt.Attr, p, r) 363 } 364 365 // If not initializing the map, parse the element. 366 // len(nn) == 1, necessarily - it is just an 'n'. 367 nn, err := xmlToMapParser(tt.Name.Local, tt.Attr, p, r) 368 if err != nil { 369 return nil, err 370 } 371 372 // The nn map[string]interface{} value is a na[nn_key] value. 373 // We need to see if nn_key already exists - means we're parsing a list. 374 // This may require converting na[nn_key] value into []interface{} type. 375 // First, extract the key:val for the map - it's a singleton. 376 // Note: 377 // * if CoerceKeysToLower() called, then key will be lower case. 378 // * if CoerceKeysToSnakeCase() called, then key will be converted to snake case. 379 var key string 380 var val interface{} 381 for key, val = range nn { 382 break 383 } 384 385 // IncludeTagSeqNum requests that the element be augmented with a "_seq" sub-element. 386 // In theory, we don't need this if len(na) == 1. But, we don't know what might 387 // come next - we're only parsing forward. So if you ask for 'includeTagSeqNum' you 388 // get it on every element. (Personally, I never liked this, but I added it on request 389 // and did get a $50 Amazon gift card in return - now we support it for backwards compatibility!) 390 if includeTagSeqNum { 391 switch val.(type) { 392 case []interface{}: 393 // noop - There's no clean way to handle this w/o changing message structure. 394 case map[string]interface{}: 395 val.(map[string]interface{})["_seq"] = seq // will overwrite an "_seq" XML tag 396 seq++ 397 case interface{}: // a non-nil simple element: string, float64, bool 398 v := map[string]interface{}{"#text": val} 399 v["_seq"] = seq 400 seq++ 401 val = v 402 } 403 } 404 405 // 'na' holding sub-elements of n. 406 // See if 'key' already exists. 407 // If 'key' exists, then this is a list, if not just add key:val to na. 408 if v, ok := na[key]; ok { 409 var a []interface{} 410 switch v.(type) { 411 case []interface{}: 412 a = v.([]interface{}) 413 default: // anything else - note: v.(type) != nil 414 a = []interface{}{v} 415 } 416 a = append(a, val) 417 na[key] = a 418 } else { 419 na[key] = val // save it as a singleton 420 } 421 case xml.EndElement: 422 // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case. 423 if len(n) == 0 { 424 // If len(na)==0 we have an empty element == ""; 425 // it has no xml.Attr nor xml.CharData. 426 // Note: in original node-tree parser, val defaulted to ""; 427 // so we always had the default if len(node.nodes) == 0. 428 if len(na) > 0 { 429 n[skey] = na 430 } else { 431 n[skey] = "" // empty element 432 } 433 } 434 return n, nil 435 case xml.CharData: 436 // clean up possible noise 437 tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ") 438 if len(tt) > 0 { 439 if len(na) > 0 || decodeSimpleValuesAsMap { 440 na["#text"] = cast(tt, r) 441 } else if skey != "" { 442 n[skey] = cast(tt, r) 443 } else { 444 // per Adrian (http://www.adrianlungu.com/) catch stray text 445 // in decoder stream - 446 // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374 447 // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get 448 // a p.Token() decoding error when the BOM is UTF-16 or UTF-32. 449 continue 450 } 451 } 452 default: 453 // noop 454 } 455 } 456} 457 458var castNanInf bool 459 460// Cast "Nan", "Inf", "-Inf" XML values to 'float64'. 461// By default, these values will be decoded as 'string'. 462func CastNanInf(b bool) { 463 castNanInf = b 464} 465 466// cast - try to cast string values to bool or float64 467func cast(s string, r bool) interface{} { 468 if r { 469 // handle nan and inf 470 if !castNanInf { 471 switch strings.ToLower(s) { 472 case "nan", "inf", "-inf": 473 return s 474 } 475 } 476 477 // handle numeric strings ahead of boolean 478 if f, err := strconv.ParseFloat(s, 64); err == nil { 479 return f 480 } 481 // ParseBool treats "1"==true & "0"==false, we've already scanned those 482 // values as float64. See if value has 't' or 'f' as initial screen to 483 // minimize calls to ParseBool; also, see if len(s) < 6. 484 if len(s) > 0 && len(s) < 6 { 485 switch s[:1] { 486 case "t", "T", "f", "F": 487 if b, err := strconv.ParseBool(s); err == nil { 488 return b 489 } 490 } 491 } 492 } 493 return s 494} 495 496// ------------------ END: NewMapXml & NewMapXmlReader ------------------------- 497 498// ------------------ mv.Xml & mv.XmlWriter - from j2x ------------------------ 499 500const ( 501 DefaultRootTag = "doc" 502) 503 504var useGoXmlEmptyElemSyntax bool 505 506// XmlGoEmptyElemSyntax() - <tag ...></tag> rather than <tag .../>. 507// Go's encoding/xml package marshals empty XML elements as <tag ...></tag>. By default this package 508// encodes empty elements as <tag .../>. If you're marshaling Map values that include structures 509// (which are passed to xml.Marshal for encoding), this will let you conform to the standard package. 510func XmlGoEmptyElemSyntax() { 511 useGoXmlEmptyElemSyntax = true 512} 513 514// XmlDefaultEmptyElemSyntax() - <tag .../> rather than <tag ...></tag>. 515// Return XML encoding for empty elements to the default package setting. 516// Reverses effect of XmlGoEmptyElemSyntax(). 517func XmlDefaultEmptyElemSyntax() { 518 useGoXmlEmptyElemSyntax = false 519} 520 521// Encode a Map as XML. The companion of NewMapXml(). 522// The following rules apply. 523// - The key label "#text" is treated as the value for a simple element with attributes. 524// - Map keys that begin with a hyphen, '-', are interpreted as attributes. 525// It is an error if the attribute doesn't have a []byte, string, number, or boolean value. 526// - Map value type encoding: 527// > string, bool, float64, int, int32, int64, float32: per "%v" formating 528// > []bool, []uint8: by casting to string 529// > structures, etc.: handed to xml.Marshal() - if there is an error, the element 530// value is "UNKNOWN" 531// - Elements with only attribute values or are null are terminated using "/>". 532// - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible. 533// Thus, `{ "key":"value" }` encodes as "<key>value</key>". 534// - To encode empty elements in a syntax consistent with encoding/xml call UseGoXmlEmptyElementSyntax(). 535// The attributes tag=value pairs are alphabetized by "tag". Also, when encoding map[string]interface{} values - 536// complex elements, etc. - the key:value pairs are alphabetized by key so the resulting tags will appear sorted. 537func (mv Map) Xml(rootTag ...string) ([]byte, error) { 538 m := map[string]interface{}(mv) 539 var err error 540 s := new(string) 541 p := new(pretty) // just a stub 542 543 if len(m) == 1 && len(rootTag) == 0 { 544 for key, value := range m { 545 // if it an array, see if all values are map[string]interface{} 546 // we force a new root tag if we'll end up with no key:value in the list 547 // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc> 548 switch value.(type) { 549 case []interface{}: 550 for _, v := range value.([]interface{}) { 551 switch v.(type) { 552 case map[string]interface{}: // noop 553 default: // anything else 554 err = mapToXmlIndent(false, s, DefaultRootTag, m, p) 555 goto done 556 } 557 } 558 } 559 err = mapToXmlIndent(false, s, key, value, p) 560 } 561 } else if len(rootTag) == 1 { 562 err = mapToXmlIndent(false, s, rootTag[0], m, p) 563 } else { 564 err = mapToXmlIndent(false, s, DefaultRootTag, m, p) 565 } 566done: 567 return []byte(*s), err 568} 569 570// The following implementation is provided only for symmetry with NewMapXmlReader[Raw] 571// The names will also provide a key for the number of return arguments. 572 573// Writes the Map as XML on the Writer. 574// See Xml() for encoding rules. 575func (mv Map) XmlWriter(xmlWriter io.Writer, rootTag ...string) error { 576 x, err := mv.Xml(rootTag...) 577 if err != nil { 578 return err 579 } 580 581 _, err = xmlWriter.Write(x) 582 return err 583} 584 585// Writes the Map as XML on the Writer. []byte is the raw XML that was written. 586// See Xml() for encoding rules. 587func (mv Map) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) { 588 x, err := mv.Xml(rootTag...) 589 if err != nil { 590 return x, err 591 } 592 593 _, err = xmlWriter.Write(x) 594 return x, err 595} 596 597// Writes the Map as pretty XML on the Writer. 598// See Xml() for encoding rules. 599func (mv Map) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error { 600 x, err := mv.XmlIndent(prefix, indent, rootTag...) 601 if err != nil { 602 return err 603 } 604 605 _, err = xmlWriter.Write(x) 606 return err 607} 608 609// Writes the Map as pretty XML on the Writer. []byte is the raw XML that was written. 610// See Xml() for encoding rules. 611func (mv Map) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) { 612 x, err := mv.XmlIndent(prefix, indent, rootTag...) 613 if err != nil { 614 return x, err 615 } 616 617 _, err = xmlWriter.Write(x) 618 return x, err 619} 620 621// -------------------- END: mv.Xml & mv.XmlWriter ------------------------------- 622 623// -------------- Handle XML stream by processing Map value -------------------- 624 625// Default poll delay to keep Handler from spinning on an open stream 626// like sitting on os.Stdin waiting for imput. 627var xhandlerPollInterval = time.Millisecond 628 629// Bulk process XML using handlers that process a Map value. 630// 'rdr' is an io.Reader for XML (stream) 631// 'mapHandler' is the Map processor. Return of 'false' stops io.Reader processing. 632// 'errHandler' is the error processor. Return of 'false' stops io.Reader processing and returns the error. 633// Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized. 634// This means that you can stop reading the file on error or after processing a particular message. 635// To have reading and handling run concurrently, pass argument to a go routine in handler and return 'true'. 636func HandleXmlReader(xmlReader io.Reader, mapHandler func(Map) bool, errHandler func(error) bool) error { 637 var n int 638 for { 639 m, merr := NewMapXmlReader(xmlReader) 640 n++ 641 642 // handle error condition with errhandler 643 if merr != nil && merr != io.EOF { 644 merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error()) 645 if ok := errHandler(merr); !ok { 646 // caused reader termination 647 return merr 648 } 649 continue 650 } 651 652 // pass to maphandler 653 if len(m) != 0 { 654 if ok := mapHandler(m); !ok { 655 break 656 } 657 } else if merr != io.EOF { 658 time.Sleep(xhandlerPollInterval) 659 } 660 661 if merr == io.EOF { 662 break 663 } 664 } 665 return nil 666} 667 668// Bulk process XML using handlers that process a Map value and the raw XML. 669// 'rdr' is an io.Reader for XML (stream) 670// 'mapHandler' is the Map and raw XML - []byte - processor. Return of 'false' stops io.Reader processing. 671// 'errHandler' is the error and raw XML processor. Return of 'false' stops io.Reader processing and returns the error. 672// Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized. 673// This means that you can stop reading the file on error or after processing a particular message. 674// To have reading and handling run concurrently, pass argument(s) to a go routine in handler and return 'true'. 675// See NewMapXmlReaderRaw for comment on performance associated with retrieving raw XML from a Reader. 676func HandleXmlReaderRaw(xmlReader io.Reader, mapHandler func(Map, []byte) bool, errHandler func(error, []byte) bool) error { 677 var n int 678 for { 679 m, raw, merr := NewMapXmlReaderRaw(xmlReader) 680 n++ 681 682 // handle error condition with errhandler 683 if merr != nil && merr != io.EOF { 684 merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error()) 685 if ok := errHandler(merr, raw); !ok { 686 // caused reader termination 687 return merr 688 } 689 continue 690 } 691 692 // pass to maphandler 693 if len(m) != 0 { 694 if ok := mapHandler(m, raw); !ok { 695 break 696 } 697 } else if merr != io.EOF { 698 time.Sleep(xhandlerPollInterval) 699 } 700 701 if merr == io.EOF { 702 break 703 } 704 } 705 return nil 706} 707 708// ----------------- END: Handle XML stream by processing Map value -------------- 709 710// -------- a hack of io.TeeReader ... need one that's an io.ByteReader for xml.NewDecoder() ---------- 711 712// This is a clone of io.TeeReader with the additional method t.ReadByte(). 713// Thus, this TeeReader is also an io.ByteReader. 714// This is necessary because xml.NewDecoder uses a ByteReader not a Reader. It appears to have been written 715// with bufio.Reader or bytes.Reader in mind ... not a generic io.Reader, which doesn't have to have ReadByte().. 716// If NewDecoder is passed a Reader that does not satisfy ByteReader() it wraps the Reader with 717// bufio.NewReader and uses ReadByte rather than Read that runs the TeeReader pipe logic. 718 719type teeReader struct { 720 r io.Reader 721 w io.Writer 722 b []byte 723} 724 725func myTeeReader(r io.Reader, w io.Writer) io.Reader { 726 b := make([]byte, 1) 727 return &teeReader{r, w, b} 728} 729 730// need for io.Reader - but we don't use it ... 731func (t *teeReader) Read(p []byte) (int, error) { 732 return 0, nil 733} 734 735func (t *teeReader) ReadByte() (byte, error) { 736 n, err := t.r.Read(t.b) 737 if n > 0 { 738 if _, err := t.w.Write(t.b[:1]); err != nil { 739 return t.b[0], err 740 } 741 } 742 return t.b[0], err 743} 744 745// For use with NewMapXmlReader & NewMapXmlSeqReader. 746type byteReader struct { 747 r io.Reader 748 b []byte 749} 750 751func myByteReader(r io.Reader) io.Reader { 752 b := make([]byte, 1) 753 return &byteReader{r, b} 754} 755 756// Need for io.Reader interface ... 757// Needed if reading a malformed http.Request.Body - issue #38. 758func (b *byteReader) Read(p []byte) (int, error) { 759 return b.r.Read(p) 760} 761 762func (b *byteReader) ReadByte() (byte, error) { 763 _, err := b.r.Read(b.b) 764 if len(b.b) > 0 { 765 return b.b[0], err 766 } 767 var c byte 768 return c, err 769} 770 771// ----------------------- END: io.TeeReader hack ----------------------------------- 772 773// ---------------------- XmlIndent - from j2x package ---------------------------- 774 775// Encode a map[string]interface{} as a pretty XML string. 776// See Xml for encoding rules. 777func (mv Map) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) { 778 m := map[string]interface{}(mv) 779 780 var err error 781 s := new(string) 782 p := new(pretty) 783 p.indent = indent 784 p.padding = prefix 785 786 if len(m) == 1 && len(rootTag) == 0 { 787 // this can extract the key for the single map element 788 // use it if it isn't a key for a list 789 for key, value := range m { 790 if _, ok := value.([]interface{}); ok { 791 err = mapToXmlIndent(true, s, DefaultRootTag, m, p) 792 } else { 793 err = mapToXmlIndent(true, s, key, value, p) 794 } 795 } 796 } else if len(rootTag) == 1 { 797 err = mapToXmlIndent(true, s, rootTag[0], m, p) 798 } else { 799 err = mapToXmlIndent(true, s, DefaultRootTag, m, p) 800 } 801 return []byte(*s), err 802} 803 804type pretty struct { 805 indent string 806 cnt int 807 padding string 808 mapDepth int 809 start int 810} 811 812func (p *pretty) Indent() { 813 p.padding += p.indent 814 p.cnt++ 815} 816 817func (p *pretty) Outdent() { 818 if p.cnt > 0 { 819 p.padding = p.padding[:len(p.padding)-len(p.indent)] 820 p.cnt-- 821 } 822} 823 824// where the work actually happens 825// returns an error if an attribute is not atomic 826func mapToXmlIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error { 827 var endTag bool 828 var isSimple bool 829 var elen int 830 p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start} 831 832 // per issue #48, 18apr18 - try and coerce maps to map[string]interface{} 833 // Don't need for mapToXmlSeqIndent, since maps there are decoded by NewMapXmlSeq(). 834 if reflect.ValueOf(value).Kind() == reflect.Map { 835 switch value.(type) { 836 case map[string]interface{}: 837 default: 838 val := make(map[string]interface{}) 839 vv := reflect.ValueOf(value) 840 keys := vv.MapKeys() 841 for _, k := range keys { 842 val[fmt.Sprint(k)] = vv.MapIndex(k).Interface() 843 } 844 value = val 845 } 846 } 847 848 switch value.(type) { 849 // special handling of []interface{} values when len(value) == 0 850 case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32, json.Number: 851 if doIndent { 852 *s += p.padding 853 } 854 *s += `<` + key 855 } 856 switch value.(type) { 857 case map[string]interface{}: 858 vv := value.(map[string]interface{}) 859 lenvv := len(vv) 860 // scan out attributes - attribute keys have prepended attrPrefix 861 attrlist := make([][2]string, len(vv)) 862 var n int 863 var ss string 864 for k, v := range vv { 865 if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix { 866 switch v.(type) { 867 case string: 868 if xmlEscapeChars { 869 ss = escapeChars(v.(string)) 870 } else { 871 ss = v.(string) 872 } 873 attrlist[n][0] = k[lenAttrPrefix:] 874 attrlist[n][1] = ss 875 case float64, bool, int, int32, int64, float32, json.Number: 876 attrlist[n][0] = k[lenAttrPrefix:] 877 attrlist[n][1] = fmt.Sprintf("%v", v) 878 case []byte: 879 if xmlEscapeChars { 880 ss = escapeChars(string(v.([]byte))) 881 } else { 882 ss = string(v.([]byte)) 883 } 884 attrlist[n][0] = k[lenAttrPrefix:] 885 attrlist[n][1] = ss 886 default: 887 return fmt.Errorf("invalid attribute value for: %s:<%T>", k, v) 888 } 889 n++ 890 } 891 } 892 if n > 0 { 893 attrlist = attrlist[:n] 894 sort.Sort(attrList(attrlist)) 895 for _, v := range attrlist { 896 *s += ` ` + v[0] + `="` + v[1] + `"` 897 } 898 } 899 // only attributes? 900 if n == lenvv { 901 if useGoXmlEmptyElemSyntax { 902 *s += `</` + key + ">" 903 } else { 904 *s += `/>` 905 } 906 break 907 } 908 909 // simple element? Note: '#text" is an invalid XML tag. 910 if v, ok := vv["#text"]; ok && n+1 == lenvv { 911 switch v.(type) { 912 case string: 913 if xmlEscapeChars { 914 v = escapeChars(v.(string)) 915 } else { 916 v = v.(string) 917 } 918 case []byte: 919 if xmlEscapeChars { 920 v = escapeChars(string(v.([]byte))) 921 } 922 } 923 *s += ">" + fmt.Sprintf("%v", v) 924 endTag = true 925 elen = 1 926 isSimple = true 927 break 928 } else if ok { 929 // Handle edge case where simple element with attributes 930 // is unmarshal'd using NewMapXml() where attribute prefix 931 // has been set to "". 932 // TODO(clb): should probably scan all keys for invalid chars. 933 return fmt.Errorf("invalid attribute key label: #text - due to attributes not being prefixed") 934 } 935 936 // close tag with possible attributes 937 *s += ">" 938 if doIndent { 939 *s += "\n" 940 } 941 // something more complex 942 p.mapDepth++ 943 // extract the map k:v pairs and sort on key 944 elemlist := make([][2]interface{}, len(vv)) 945 n = 0 946 for k, v := range vv { 947 if lenAttrPrefix > 0 && lenAttrPrefix < len(k) && k[:lenAttrPrefix] == attrPrefix { 948 continue 949 } 950 elemlist[n][0] = k 951 elemlist[n][1] = v 952 n++ 953 } 954 elemlist = elemlist[:n] 955 sort.Sort(elemList(elemlist)) 956 var i int 957 for _, v := range elemlist { 958 switch v[1].(type) { 959 case []interface{}: 960 default: 961 if i == 0 && doIndent { 962 p.Indent() 963 } 964 } 965 i++ 966 if err := mapToXmlIndent(doIndent, s, v[0].(string), v[1], p); err != nil { 967 return err 968 } 969 switch v[1].(type) { 970 case []interface{}: // handled in []interface{} case 971 default: 972 if doIndent { 973 p.Outdent() 974 } 975 } 976 i-- 977 } 978 p.mapDepth-- 979 endTag = true 980 elen = 1 // we do have some content ... 981 case []interface{}: 982 // special case - found during implementing Issue #23 983 if len(value.([]interface{})) == 0 { 984 if doIndent { 985 *s += p.padding + p.indent 986 } 987 *s += "<" + key 988 elen = 0 989 endTag = true 990 break 991 } 992 for _, v := range value.([]interface{}) { 993 if doIndent { 994 p.Indent() 995 } 996 if err := mapToXmlIndent(doIndent, s, key, v, p); err != nil { 997 return err 998 } 999 if doIndent { 1000 p.Outdent() 1001 } 1002 } 1003 return nil 1004 case []string: 1005 // This was added by https://github.com/slotix ... not a type that 1006 // would be encountered if mv generated from NewMapXml, NewMapJson. 1007 // Could be encountered in AnyXml(), so we'll let it stay, though 1008 // it should be merged with case []interface{}, above. 1009 //quick fix for []string type 1010 //[]string should be treated exaclty as []interface{} 1011 if len(value.([]string)) == 0 { 1012 if doIndent { 1013 *s += p.padding + p.indent 1014 } 1015 *s += "<" + key 1016 elen = 0 1017 endTag = true 1018 break 1019 } 1020 for _, v := range value.([]string) { 1021 if doIndent { 1022 p.Indent() 1023 } 1024 if err := mapToXmlIndent(doIndent, s, key, v, p); err != nil { 1025 return err 1026 } 1027 if doIndent { 1028 p.Outdent() 1029 } 1030 } 1031 return nil 1032 case nil: 1033 // terminate the tag 1034 if doIndent { 1035 *s += p.padding 1036 } 1037 *s += "<" + key 1038 endTag, isSimple = true, true 1039 break 1040 default: // handle anything - even goofy stuff 1041 elen = 0 1042 switch value.(type) { 1043 case string: 1044 v := value.(string) 1045 if xmlEscapeChars { 1046 v = escapeChars(v) 1047 } 1048 elen = len(v) 1049 if elen > 0 { 1050 *s += ">" + v 1051 } 1052 case float64, bool, int, int32, int64, float32, json.Number: 1053 v := fmt.Sprintf("%v", value) 1054 elen = len(v) // always > 0 1055 *s += ">" + v 1056 case []byte: // NOTE: byte is just an alias for uint8 1057 // similar to how xml.Marshal handles []byte structure members 1058 v := string(value.([]byte)) 1059 if xmlEscapeChars { 1060 v = escapeChars(v) 1061 } 1062 elen = len(v) 1063 if elen > 0 { 1064 *s += ">" + v 1065 } 1066 default: 1067 var v []byte 1068 var err error 1069 if doIndent { 1070 v, err = xml.MarshalIndent(value, p.padding, p.indent) 1071 } else { 1072 v, err = xml.Marshal(value) 1073 } 1074 if err != nil { 1075 *s += ">UNKNOWN" 1076 } else { 1077 elen = len(v) 1078 if elen > 0 { 1079 *s += string(v) 1080 } 1081 } 1082 } 1083 isSimple = true 1084 endTag = true 1085 } 1086 if endTag { 1087 if doIndent { 1088 if !isSimple { 1089 *s += p.padding 1090 } 1091 } 1092 if elen > 0 || useGoXmlEmptyElemSyntax { 1093 if elen == 0 { 1094 *s += ">" 1095 } 1096 *s += `</` + key + ">" 1097 } else { 1098 *s += `/>` 1099 } 1100 } 1101 if doIndent { 1102 if p.cnt > p.start { 1103 *s += "\n" 1104 } 1105 p.Outdent() 1106 } 1107 1108 return nil 1109} 1110 1111// ============================ sort interface implementation ================= 1112 1113type attrList [][2]string 1114 1115func (a attrList) Len() int { 1116 return len(a) 1117} 1118 1119func (a attrList) Swap(i, j int) { 1120 a[i], a[j] = a[j], a[i] 1121} 1122 1123func (a attrList) Less(i, j int) bool { 1124 return a[i][0] <= a[j][0] 1125} 1126 1127type elemList [][2]interface{} 1128 1129func (e elemList) Len() int { 1130 return len(e) 1131} 1132 1133func (e elemList) Swap(i, j int) { 1134 e[i], e[j] = e[j], e[i] 1135} 1136 1137func (e elemList) Less(i, j int) bool { 1138 return e[i][0].(string) <= e[j][0].(string) 1139} 1140