1 //! Defines zero-copy XML events used throughout this library. 2 3 pub mod attributes; 4 5 #[cfg(feature = "encoding_rs")] 6 use encoding_rs::Encoding; 7 use std::borrow::Cow; 8 use std::collections::HashMap; 9 use std::io::BufRead; 10 use std::ops::Deref; 11 use std::str::from_utf8; 12 13 use self::attributes::{Attribute, Attributes}; 14 use errors::{Error, Result}; 15 use escape::{do_unescape, escape}; 16 use reader::Reader; 17 18 use memchr; 19 20 /// Opening tag data (`Event::Start`), with optional attributes. 21 /// 22 /// `<name attr="value">`. 23 /// 24 /// The name can be accessed using the [`name`], [`local_name`] or [`unescaped`] methods. An 25 /// iterator over the attributes is returned by the [`attributes`] method. 26 /// 27 /// [`name`]: #method.name 28 /// [`local_name`]: #method.local_name 29 /// [`unescaped`]: #method.unescaped 30 /// [`attributes`]: #method.attributes 31 #[derive(Clone)] 32 pub struct BytesStart<'a> { 33 /// content of the element, before any utf8 conversion 34 buf: Cow<'a, [u8]>, 35 /// end of the element name, the name starts at that the start of `buf` 36 name_len: usize, 37 } 38 39 impl<'a> BytesStart<'a> { 40 /// Creates a new `BytesStart` from the given content (name + attributes). 41 /// 42 /// # Warning 43 /// 44 /// `&content[..name_len]` is not checked to be a valid name 45 #[inline] borrowed(content: &'a [u8], name_len: usize) -> Self46 pub fn borrowed(content: &'a [u8], name_len: usize) -> Self { 47 BytesStart { 48 buf: Cow::Borrowed(content), 49 name_len, 50 } 51 } 52 53 /// Creates a new `BytesStart` from the given name. 54 /// 55 /// # Warning 56 /// 57 /// `&content` is not checked to be a valid name 58 #[inline] borrowed_name(name: &'a [u8]) -> BytesStart<'a>59 pub fn borrowed_name(name: &'a [u8]) -> BytesStart<'a> { 60 Self::borrowed(name, name.len()) 61 } 62 63 /// Creates a new `BytesStart` from the given content (name + attributes) 64 /// 65 /// Owns its contents. 66 #[inline] owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static>67 pub fn owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static> { 68 BytesStart { 69 buf: Cow::Owned(content.into()), 70 name_len, 71 } 72 } 73 74 /// Creates a new `BytesStart` from the given name 75 /// 76 /// Owns its contents. 77 #[inline] owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static>78 pub fn owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static> { 79 let content = name.into(); 80 BytesStart { 81 name_len: content.len(), 82 buf: Cow::Owned(content), 83 } 84 } 85 86 /// Converts the event into an owned event. into_owned(self) -> BytesStart<'static>87 pub fn into_owned(self) -> BytesStart<'static> { 88 Self::owned(self.buf.into_owned(), self.name_len) 89 } 90 91 /// Converts the event into an owned event without taking ownership of Event to_owned(&self) -> BytesStart<'static>92 pub fn to_owned(&self) -> BytesStart<'static> { 93 Self::owned(self.buf.to_owned(), self.name_len) 94 } 95 96 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`]. 97 /// 98 /// # Example 99 /// 100 /// ``` 101 /// # use quick_xml::{Error, Writer}; 102 /// use quick_xml::events::{BytesStart, Event}; 103 /// 104 /// struct SomeStruct<'a> { 105 /// attrs: BytesStart<'a>, 106 /// // ... 107 /// } 108 /// # impl<'a> SomeStruct<'a> { 109 /// # fn example(&self) -> Result<(), Error> { 110 /// # let mut writer = Writer::new(Vec::new()); 111 /// 112 /// writer.write_event(Event::Start(self.attrs.to_borrowed()))?; 113 /// // ... 114 /// writer.write_event(Event::End(self.attrs.to_end()))?; 115 /// # Ok(()) 116 /// # }} 117 /// ``` 118 /// 119 /// [`to_end`]: #method.to_end to_borrowed(&self) -> BytesStart120 pub fn to_borrowed(&self) -> BytesStart { 121 BytesStart::borrowed(&self.buf, self.name_len) 122 } 123 124 /// Creates new paired close tag to_end(&self) -> BytesEnd125 pub fn to_end(&self) -> BytesEnd { 126 BytesEnd::borrowed(self.name()) 127 } 128 129 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator. 130 /// 131 /// The yielded items must be convertible to [`Attribute`] using `Into`. 132 /// 133 /// [`Attribute`]: attributes/struct.Attributes.html with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into<Attribute<'b>>,134 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self 135 where 136 I: IntoIterator, 137 I::Item: Into<Attribute<'b>>, 138 { 139 self.extend_attributes(attributes); 140 self 141 } 142 143 /// Gets the undecoded raw tag name as a `&[u8]`. 144 #[inline] name(&self) -> &[u8]145 pub fn name(&self) -> &[u8] { 146 &self.buf[..self.name_len] 147 } 148 149 /// Gets the undecoded raw local tag name (excluding namespace) as a `&[u8]`. 150 /// 151 /// All content up to and including the first `:` character is removed from the tag name. 152 #[inline] local_name(&self) -> &[u8]153 pub fn local_name(&self) -> &[u8] { 154 let name = self.name(); 155 memchr::memchr(b':', name).map_or(name, |i| &name[i + 1..]) 156 } 157 158 /// Gets the unescaped tag name. 159 /// 160 /// XML escape sequences like "`<`" will be replaced by their unescaped characters like 161 /// "`<`". 162 /// 163 /// See also [`unescaped_with_custom_entities()`](#method.unescaped_with_custom_entities) 164 #[inline] unescaped(&self) -> Result<Cow<[u8]>>165 pub fn unescaped(&self) -> Result<Cow<[u8]>> { 166 self.make_unescaped(None) 167 } 168 169 /// Gets the unescaped tag name, using custom entities. 170 /// 171 /// XML escape sequences like "`<`" will be replaced by their unescaped characters like 172 /// "`<`". 173 /// Additional entities can be provided in `custom_entities`. 174 /// 175 /// # Pre-condition 176 /// 177 /// The keys and values of `custom_entities`, if any, must be valid UTF-8. 178 /// 179 /// See also [`unescaped()`](#method.unescaped) 180 #[inline] unescaped_with_custom_entities<'s>( &'s self, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<Cow<'s, [u8]>>181 pub fn unescaped_with_custom_entities<'s>( 182 &'s self, 183 custom_entities: &HashMap<Vec<u8>, Vec<u8>>, 184 ) -> Result<Cow<'s, [u8]>> { 185 self.make_unescaped(Some(custom_entities)) 186 } 187 188 #[inline] make_unescaped<'s>( &'s self, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<Cow<'s, [u8]>>189 fn make_unescaped<'s>( 190 &'s self, 191 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 192 ) -> Result<Cow<'s, [u8]>> { 193 do_unescape(&*self.buf, custom_entities).map_err(Error::EscapeError) 194 } 195 196 /// Returns an iterator over the attributes of this tag. attributes(&self) -> Attributes197 pub fn attributes(&self) -> Attributes { 198 Attributes::new(self, self.name_len) 199 } 200 201 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`). html_attributes(&self) -> Attributes202 pub fn html_attributes(&self) -> Attributes { 203 Attributes::html(self, self.name_len) 204 } 205 206 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`, 207 /// including the whitespace after the tag name if there is any. 208 #[inline] attributes_raw(&self) -> &[u8]209 pub fn attributes_raw(&self) -> &[u8] { 210 &self.buf[self.name_len..] 211 } 212 213 /// Add additional attributes to this tag using an iterator. 214 /// 215 /// The yielded items must be convertible to [`Attribute`] using `Into`. 216 /// 217 /// [`Attribute`]: attributes/struct.Attributes.html extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> where I: IntoIterator, I::Item: Into<Attribute<'b>>,218 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> 219 where 220 I: IntoIterator, 221 I::Item: Into<Attribute<'b>>, 222 { 223 for attr in attributes { 224 self.push_attribute(attr); 225 } 226 self 227 } 228 229 /// Returns the unescaped and decoded string value. 230 /// 231 /// This allocates a `String` in all cases. For performance reasons it might be a better idea to 232 /// instead use one of: 233 /// 234 /// * [`unescaped()`], as it doesn't allocate when no escape sequences are used. 235 /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise. 236 /// 237 /// [`unescaped()`]: #method.unescaped 238 /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode 239 #[inline] unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>240 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> { 241 self.do_unescape_and_decode_with_custom_entities(reader, None) 242 } 243 244 /// Returns the unescaped and decoded string value with custom entities. 245 /// 246 /// This allocates a `String` in all cases. For performance reasons it might be a better idea to 247 /// instead use one of: 248 /// 249 /// * [`unescaped_with_custom_entities()`], as it doesn't allocate when no escape sequences are used. 250 /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise. 251 /// 252 /// [`unescaped_with_custom_entities()`]: #method.unescaped_with_custom_entities 253 /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode 254 /// 255 /// # Pre-condition 256 /// 257 /// The keys and values of `custom_entities`, if any, must be valid UTF-8. 258 #[inline] unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>259 pub fn unescape_and_decode_with_custom_entities<B: BufRead>( 260 &self, 261 reader: &Reader<B>, 262 custom_entities: &HashMap<Vec<u8>, Vec<u8>>, 263 ) -> Result<String> { 264 self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities)) 265 } 266 267 #[cfg(feature = "encoding")] 268 #[inline] do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>269 fn do_unescape_and_decode_with_custom_entities<B: BufRead>( 270 &self, 271 reader: &Reader<B>, 272 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 273 ) -> Result<String> { 274 let decoded = reader.decode(&*self); 275 let unescaped = 276 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; 277 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 278 } 279 280 #[cfg(not(feature = "encoding"))] 281 #[inline] do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>282 fn do_unescape_and_decode_with_custom_entities<B: BufRead>( 283 &self, 284 reader: &Reader<B>, 285 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 286 ) -> Result<String> { 287 let decoded = reader.decode(&*self)?; 288 let unescaped = 289 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; 290 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 291 } 292 293 /// Adds an attribute to this element. push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A)294 pub fn push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A) { 295 let a = attr.into(); 296 let bytes = self.buf.to_mut(); 297 bytes.push(b' '); 298 bytes.extend_from_slice(a.key); 299 bytes.extend_from_slice(b"=\""); 300 bytes.extend_from_slice(&*a.value); 301 bytes.push(b'"'); 302 } 303 304 /// Edit the name of the BytesStart in-place 305 /// 306 /// # Warning 307 /// 308 /// `name` is not checked to be a valid name set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a>309 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> { 310 let bytes = self.buf.to_mut(); 311 bytes.splice(..self.name_len, name.iter().cloned()); 312 self.name_len = name.len(); 313 self 314 } 315 316 /// Remove all attributes from the ByteStart clear_attributes(&mut self) -> &mut BytesStart<'a>317 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> { 318 self.buf.to_mut().truncate(self.name_len); 319 self 320 } 321 } 322 323 impl<'a> std::fmt::Debug for BytesStart<'a> { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result324 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 325 use crate::utils::write_byte_string; 326 327 write!(f, "BytesStart {{ buf: ")?; 328 write_byte_string(f, &self.buf)?; 329 write!(f, ", name_len: {} }}", self.name_len) 330 } 331 } 332 333 /// An XML declaration (`Event::Decl`). 334 /// 335 /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) 336 #[derive(Clone, Debug)] 337 pub struct BytesDecl<'a> { 338 element: BytesStart<'a>, 339 } 340 341 impl<'a> BytesDecl<'a> { 342 /// Creates a `BytesDecl` from a `BytesStart` from_start(start: BytesStart<'a>) -> BytesDecl<'a>343 pub fn from_start(start: BytesStart<'a>) -> BytesDecl<'a> { 344 BytesDecl { element: start } 345 } 346 347 /// Gets xml version, including quotes (' or ") version(&self) -> Result<Cow<[u8]>>348 pub fn version(&self) -> Result<Cow<[u8]>> { 349 // The version *must* be the first thing in the declaration. 350 match self.element.attributes().next() { 351 Some(Err(e)) => Err(e), 352 Some(Ok(Attribute { 353 key: b"version", 354 value: v, 355 })) => Ok(v), 356 Some(Ok(a)) => { 357 let found = from_utf8(a.key).map_err(Error::Utf8)?.to_string(); 358 Err(Error::XmlDeclWithoutVersion(Some(found))) 359 } 360 None => Err(Error::XmlDeclWithoutVersion(None)), 361 } 362 } 363 364 /// Gets xml encoding, including quotes (' or ") encoding(&self) -> Option<Result<Cow<[u8]>>>365 pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> { 366 for a in self.element.attributes() { 367 match a { 368 Err(e) => return Some(Err(e)), 369 Ok(Attribute { 370 key: b"encoding", 371 value: v, 372 }) => return Some(Ok(v)), 373 _ => (), 374 } 375 } 376 None 377 } 378 379 /// Gets xml standalone, including quotes (' or ") standalone(&self) -> Option<Result<Cow<[u8]>>>380 pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> { 381 for a in self.element.attributes() { 382 match a { 383 Err(e) => return Some(Err(e)), 384 Ok(Attribute { 385 key: b"standalone", 386 value: v, 387 }) => return Some(Ok(v)), 388 _ => (), 389 } 390 } 391 None 392 } 393 394 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), 395 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) 396 /// attribute. 397 /// 398 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. 399 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since 400 /// the double quote character is not allowed in any of the attribute values. new( version: &[u8], encoding: Option<&[u8]>, standalone: Option<&[u8]>, ) -> BytesDecl<'static>401 pub fn new( 402 version: &[u8], 403 encoding: Option<&[u8]>, 404 standalone: Option<&[u8]>, 405 ) -> BytesDecl<'static> { 406 // Compute length of the buffer based on supplied attributes 407 // ' encoding=""' => 12 408 let encoding_attr_len = if let Some(xs) = encoding { 409 12 + xs.len() 410 } else { 411 0 412 }; 413 // ' standalone=""' => 14 414 let standalone_attr_len = if let Some(xs) = standalone { 415 14 + xs.len() 416 } else { 417 0 418 }; 419 // 'xml version=""' => 14 420 let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len); 421 422 buf.extend_from_slice(b"xml version=\""); 423 buf.extend_from_slice(version); 424 425 if let Some(encoding_val) = encoding { 426 buf.extend_from_slice(b"\" encoding=\""); 427 buf.extend_from_slice(encoding_val); 428 } 429 430 if let Some(standalone_val) = standalone { 431 buf.extend_from_slice(b"\" standalone=\""); 432 buf.extend_from_slice(standalone_val); 433 } 434 buf.push(b'"'); 435 436 BytesDecl { 437 element: BytesStart::owned(buf, 3), 438 } 439 } 440 441 /// Gets the decoder struct 442 #[cfg(feature = "encoding_rs")] encoder(&self) -> Option<&'static Encoding>443 pub fn encoder(&self) -> Option<&'static Encoding> { 444 self.encoding() 445 .and_then(|e| e.ok()) 446 .and_then(|e| Encoding::for_label(&*e)) 447 } 448 449 /// Converts the event into an owned event. into_owned(self) -> BytesDecl<'static>450 pub fn into_owned(self) -> BytesDecl<'static> { 451 BytesDecl { 452 element: self.element.into_owned(), 453 } 454 } 455 } 456 457 /// A struct to manage `Event::End` events 458 #[derive(Clone)] 459 pub struct BytesEnd<'a> { 460 name: Cow<'a, [u8]>, 461 } 462 463 impl<'a> BytesEnd<'a> { 464 /// Creates a new `BytesEnd` borrowing a slice 465 #[inline] borrowed(name: &'a [u8]) -> BytesEnd<'a>466 pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> { 467 BytesEnd { 468 name: Cow::Borrowed(name), 469 } 470 } 471 472 /// Creates a new `BytesEnd` owning its name 473 #[inline] owned(name: Vec<u8>) -> BytesEnd<'static>474 pub fn owned(name: Vec<u8>) -> BytesEnd<'static> { 475 BytesEnd { 476 name: Cow::Owned(name), 477 } 478 } 479 480 /// Converts the event into an owned event. into_owned(self) -> BytesEnd<'static>481 pub fn into_owned(self) -> BytesEnd<'static> { 482 BytesEnd { 483 name: Cow::Owned(self.name.into_owned()), 484 } 485 } 486 487 /// Gets `BytesEnd` event name 488 #[inline] name(&self) -> &[u8]489 pub fn name(&self) -> &[u8] { 490 &*self.name 491 } 492 493 /// local name (excluding namespace) as &[u8] (without eventual attributes) 494 /// returns the name() with any leading namespace removed (all content up to 495 /// and including the first ':' character) 496 #[inline] local_name(&self) -> &[u8]497 pub fn local_name(&self) -> &[u8] { 498 if let Some(i) = self.name().iter().position(|b| *b == b':') { 499 &self.name()[i + 1..] 500 } else { 501 self.name() 502 } 503 } 504 } 505 506 impl<'a> std::fmt::Debug for BytesEnd<'a> { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result507 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 508 use crate::utils::write_byte_string; 509 510 write!(f, "BytesEnd {{ name: ")?; 511 write_byte_string(f, &self.name)?; 512 write!(f, " }}") 513 } 514 } 515 516 /// Data from various events (most notably, `Event::Text`). 517 #[derive(Clone)] 518 pub struct BytesText<'a> { 519 // Invariant: The content is always escaped. 520 content: Cow<'a, [u8]>, 521 } 522 523 impl<'a> BytesText<'a> { 524 /// Creates a new `BytesText` from an escaped byte sequence. 525 #[inline] from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a>526 pub fn from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a> { 527 BytesText { 528 content: content.into(), 529 } 530 } 531 532 /// Creates a new `BytesText` from a byte sequence. The byte sequence is 533 /// expected not to be escaped. 534 #[inline] from_plain(content: &'a [u8]) -> BytesText<'a>535 pub fn from_plain(content: &'a [u8]) -> BytesText<'a> { 536 BytesText { 537 content: escape(content), 538 } 539 } 540 541 /// Creates a new `BytesText` from an escaped string. 542 #[inline] from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a>543 pub fn from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a> { 544 Self::from_escaped(match content.into() { 545 Cow::Owned(o) => Cow::Owned(o.into_bytes()), 546 Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), 547 }) 548 } 549 550 /// Creates a new `BytesText` from a string. The string is expected not to 551 /// be escaped. 552 #[inline] from_plain_str(content: &'a str) -> BytesText<'a>553 pub fn from_plain_str(content: &'a str) -> BytesText<'a> { 554 Self::from_plain(content.as_bytes()) 555 } 556 557 /// Ensures that all data is owned to extend the object's lifetime if 558 /// necessary. 559 #[inline] into_owned(self) -> BytesText<'static>560 pub fn into_owned(self) -> BytesText<'static> { 561 BytesText { 562 content: self.content.into_owned().into(), 563 } 564 } 565 566 /// Extracts the inner `Cow` from the `BytesText` event container. 567 #[cfg(feature = "serialize")] 568 #[inline] into_inner(self) -> Cow<'a, [u8]>569 pub(crate) fn into_inner(self) -> Cow<'a, [u8]> { 570 self.content 571 } 572 573 /// gets escaped content 574 /// 575 /// Searches for '&' into content and try to escape the coded character if possible 576 /// returns Malformed error with index within element if '&' is not followed by ';' 577 /// 578 /// See also [`unescaped_with_custom_entities()`](#method.unescaped_with_custom_entities) unescaped(&self) -> Result<Cow<[u8]>>579 pub fn unescaped(&self) -> Result<Cow<[u8]>> { 580 self.make_unescaped(None) 581 } 582 583 /// gets escaped content with custom entities 584 /// 585 /// Searches for '&' into content and try to escape the coded character if possible 586 /// returns Malformed error with index within element if '&' is not followed by ';' 587 /// Additional entities can be provided in `custom_entities`. 588 /// 589 /// # Pre-condition 590 /// 591 /// The keys and values of `custom_entities`, if any, must be valid UTF-8. 592 /// 593 /// See also [`unescaped()`](#method.unescaped) unescaped_with_custom_entities<'s>( &'s self, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<Cow<'s, [u8]>>594 pub fn unescaped_with_custom_entities<'s>( 595 &'s self, 596 custom_entities: &HashMap<Vec<u8>, Vec<u8>>, 597 ) -> Result<Cow<'s, [u8]>> { 598 self.make_unescaped(Some(custom_entities)) 599 } 600 make_unescaped<'s>( &'s self, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<Cow<'s, [u8]>>601 fn make_unescaped<'s>( 602 &'s self, 603 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 604 ) -> Result<Cow<'s, [u8]>> { 605 do_unescape(self, custom_entities).map_err(Error::EscapeError) 606 } 607 608 /// helper method to unescape then decode self using the reader encoding 609 /// but without BOM (Byte order mark) 610 /// 611 /// for performance reasons (could avoid allocating a `String`), 612 /// it might be wiser to manually use 613 /// 1. BytesText::unescaped() 614 /// 2. Reader::decode(...) 615 #[cfg(feature = "encoding")] unescape_and_decode_without_bom<B: BufRead>( &self, reader: &mut Reader<B>, ) -> Result<String>616 pub fn unescape_and_decode_without_bom<B: BufRead>( 617 &self, 618 reader: &mut Reader<B>, 619 ) -> Result<String> { 620 self.do_unescape_and_decode_without_bom(reader, None) 621 } 622 623 /// helper method to unescape then decode self using the reader encoding 624 /// but without BOM (Byte order mark) 625 /// 626 /// for performance reasons (could avoid allocating a `String`), 627 /// it might be wiser to manually use 628 /// 1. BytesText::unescaped() 629 /// 2. Reader::decode(...) 630 #[cfg(not(feature = "encoding"))] unescape_and_decode_without_bom<B: BufRead>( &self, reader: &Reader<B>, ) -> Result<String>631 pub fn unescape_and_decode_without_bom<B: BufRead>( 632 &self, 633 reader: &Reader<B>, 634 ) -> Result<String> { 635 self.do_unescape_and_decode_without_bom(reader, None) 636 } 637 638 /// helper method to unescape then decode self using the reader encoding with custom entities 639 /// but without BOM (Byte order mark) 640 /// 641 /// for performance reasons (could avoid allocating a `String`), 642 /// it might be wiser to manually use 643 /// 1. BytesText::unescaped() 644 /// 2. Reader::decode(...) 645 /// 646 /// # Pre-condition 647 /// 648 /// The keys and values of `custom_entities`, if any, must be valid UTF-8. 649 #[cfg(feature = "encoding")] unescape_and_decode_without_bom_with_custom_entities<B: BufRead>( &self, reader: &mut Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>650 pub fn unescape_and_decode_without_bom_with_custom_entities<B: BufRead>( 651 &self, 652 reader: &mut Reader<B>, 653 custom_entities: &HashMap<Vec<u8>, Vec<u8>>, 654 ) -> Result<String> { 655 self.do_unescape_and_decode_without_bom(reader, Some(custom_entities)) 656 } 657 658 /// helper method to unescape then decode self using the reader encoding with custom entities 659 /// but without BOM (Byte order mark) 660 /// 661 /// for performance reasons (could avoid allocating a `String`), 662 /// it might be wiser to manually use 663 /// 1. BytesText::unescaped() 664 /// 2. Reader::decode(...) 665 /// 666 /// # Pre-condition 667 /// 668 /// The keys and values of `custom_entities`, if any, must be valid UTF-8. 669 #[cfg(not(feature = "encoding"))] unescape_and_decode_without_bom_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>670 pub fn unescape_and_decode_without_bom_with_custom_entities<B: BufRead>( 671 &self, 672 reader: &Reader<B>, 673 custom_entities: &HashMap<Vec<u8>, Vec<u8>>, 674 ) -> Result<String> { 675 self.do_unescape_and_decode_without_bom(reader, Some(custom_entities)) 676 } 677 678 #[cfg(feature = "encoding")] do_unescape_and_decode_without_bom<B: BufRead>( &self, reader: &mut Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>679 fn do_unescape_and_decode_without_bom<B: BufRead>( 680 &self, 681 reader: &mut Reader<B>, 682 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 683 ) -> Result<String> { 684 let decoded = reader.decode_without_bom(&*self); 685 let unescaped = 686 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; 687 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 688 } 689 690 #[cfg(not(feature = "encoding"))] do_unescape_and_decode_without_bom<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>691 fn do_unescape_and_decode_without_bom<B: BufRead>( 692 &self, 693 reader: &Reader<B>, 694 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 695 ) -> Result<String> { 696 let decoded = reader.decode_without_bom(&*self)?; 697 let unescaped = 698 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; 699 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 700 } 701 702 /// helper method to unescape then decode self using the reader encoding 703 /// 704 /// for performance reasons (could avoid allocating a `String`), 705 /// it might be wiser to manually use 706 /// 1. BytesText::unescaped() 707 /// 2. Reader::decode(...) unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>708 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> { 709 self.do_unescape_and_decode_with_custom_entities(reader, None) 710 } 711 712 /// helper method to unescape then decode self using the reader encoding with custom entities 713 /// 714 /// for performance reasons (could avoid allocating a `String`), 715 /// it might be wiser to manually use 716 /// 1. BytesText::unescaped() 717 /// 2. Reader::decode(...) 718 /// 719 /// # Pre-condition 720 /// 721 /// The keys and values of `custom_entities`, if any, must be valid UTF-8. unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>722 pub fn unescape_and_decode_with_custom_entities<B: BufRead>( 723 &self, 724 reader: &Reader<B>, 725 custom_entities: &HashMap<Vec<u8>, Vec<u8>>, 726 ) -> Result<String> { 727 self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities)) 728 } 729 730 #[cfg(feature = "encoding")] do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>731 fn do_unescape_and_decode_with_custom_entities<B: BufRead>( 732 &self, 733 reader: &Reader<B>, 734 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 735 ) -> Result<String> { 736 let decoded = reader.decode(&*self); 737 let unescaped = 738 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; 739 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 740 } 741 742 #[cfg(not(feature = "encoding"))] do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>743 fn do_unescape_and_decode_with_custom_entities<B: BufRead>( 744 &self, 745 reader: &Reader<B>, 746 custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, 747 ) -> Result<String> { 748 let decoded = reader.decode(&*self)?; 749 let unescaped = 750 do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; 751 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 752 } 753 754 /// Gets escaped content. escaped(&self) -> &[u8]755 pub fn escaped(&self) -> &[u8] { 756 self.content.as_ref() 757 } 758 } 759 760 impl<'a> std::fmt::Debug for BytesText<'a> { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result761 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 762 use crate::utils::write_byte_string; 763 764 write!(f, "BytesText {{ content: ")?; 765 write_byte_string(f, &self.content)?; 766 write!(f, " }}") 767 } 768 } 769 770 /// Event emitted by [`Reader::read_event`]. 771 /// 772 /// [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event 773 #[derive(Clone, Debug)] 774 pub enum Event<'a> { 775 /// Start tag (with attributes) `<tag attr="value">`. 776 Start(BytesStart<'a>), 777 /// End tag `</tag>`. 778 End(BytesEnd<'a>), 779 /// Empty element tag (with attributes) `<tag attr="value" />`. 780 Empty(BytesStart<'a>), 781 /// Character data between `Start` and `End` element. 782 Text(BytesText<'a>), 783 /// Comment `<!-- ... -->`. 784 Comment(BytesText<'a>), 785 /// CData `<![CDATA[...]]>`. 786 CData(BytesText<'a>), 787 /// XML declaration `<?xml ...?>`. 788 Decl(BytesDecl<'a>), 789 /// Processing instruction `<?...?>`. 790 PI(BytesText<'a>), 791 /// Doctype `<!DOCTYPE...>`. 792 DocType(BytesText<'a>), 793 /// End of XML document. 794 Eof, 795 } 796 797 impl<'a> Event<'a> { 798 /// Converts the event to an owned version, untied to the lifetime of 799 /// buffer used when reading but incurring a new, seperate allocation. into_owned(self) -> Event<'static>800 pub fn into_owned(self) -> Event<'static> { 801 match self { 802 Event::Start(e) => Event::Start(e.into_owned()), 803 Event::End(e) => Event::End(e.into_owned()), 804 Event::Empty(e) => Event::Empty(e.into_owned()), 805 Event::Text(e) => Event::Text(e.into_owned()), 806 Event::Comment(e) => Event::Comment(e.into_owned()), 807 Event::CData(e) => Event::CData(e.into_owned()), 808 Event::Decl(e) => Event::Decl(e.into_owned()), 809 Event::PI(e) => Event::PI(e.into_owned()), 810 Event::DocType(e) => Event::DocType(e.into_owned()), 811 Event::Eof => Event::Eof, 812 } 813 } 814 } 815 816 impl<'a> Deref for BytesStart<'a> { 817 type Target = [u8]; deref(&self) -> &[u8]818 fn deref(&self) -> &[u8] { 819 &*self.buf 820 } 821 } 822 823 impl<'a> Deref for BytesDecl<'a> { 824 type Target = [u8]; deref(&self) -> &[u8]825 fn deref(&self) -> &[u8] { 826 &*self.element 827 } 828 } 829 830 impl<'a> Deref for BytesEnd<'a> { 831 type Target = [u8]; deref(&self) -> &[u8]832 fn deref(&self) -> &[u8] { 833 &*self.name 834 } 835 } 836 837 impl<'a> Deref for BytesText<'a> { 838 type Target = [u8]; deref(&self) -> &[u8]839 fn deref(&self) -> &[u8] { 840 &*self.content 841 } 842 } 843 844 impl<'a> Deref for Event<'a> { 845 type Target = [u8]; deref(&self) -> &[u8]846 fn deref(&self) -> &[u8] { 847 match *self { 848 Event::Start(ref e) | Event::Empty(ref e) => &*e, 849 Event::End(ref e) => &*e, 850 Event::Text(ref e) => &*e, 851 Event::Decl(ref e) => &*e, 852 Event::PI(ref e) => &*e, 853 Event::CData(ref e) => &*e, 854 Event::Comment(ref e) => &*e, 855 Event::DocType(ref e) => &*e, 856 Event::Eof => &[], 857 } 858 } 859 } 860 861 impl<'a> AsRef<Event<'a>> for Event<'a> { as_ref(&self) -> &Event<'a>862 fn as_ref(&self) -> &Event<'a> { 863 self 864 } 865 } 866 867 #[cfg(test)] 868 mod test { 869 use super::*; 870 871 #[test] local_name()872 fn local_name() { 873 use std::str::from_utf8; 874 let xml = r#" 875 <foo:bus attr='bar'>foobusbar</foo:bus> 876 <foo: attr='bar'>foobusbar</foo:> 877 <:foo attr='bar'>foobusbar</:foo> 878 <foo:bus:baz attr='bar'>foobusbar</foo:bus:baz> 879 "#; 880 let mut rdr = Reader::from_str(xml); 881 let mut buf = Vec::new(); 882 let mut parsed_local_names = Vec::new(); 883 loop { 884 match rdr.read_event(&mut buf).expect("unable to read xml event") { 885 Event::Start(ref e) => parsed_local_names.push( 886 from_utf8(e.local_name()) 887 .expect("unable to build str from local_name") 888 .to_string(), 889 ), 890 Event::End(ref e) => parsed_local_names.push( 891 from_utf8(e.local_name()) 892 .expect("unable to build str from local_name") 893 .to_string(), 894 ), 895 Event::Eof => break, 896 _ => {} 897 } 898 } 899 assert_eq!(parsed_local_names[0], "bus".to_string()); 900 assert_eq!(parsed_local_names[1], "bus".to_string()); 901 assert_eq!(parsed_local_names[2], "".to_string()); 902 assert_eq!(parsed_local_names[3], "".to_string()); 903 assert_eq!(parsed_local_names[4], "foo".to_string()); 904 assert_eq!(parsed_local_names[5], "foo".to_string()); 905 assert_eq!(parsed_local_names[6], "bus:baz".to_string()); 906 assert_eq!(parsed_local_names[7], "bus:baz".to_string()); 907 } 908 909 #[test] bytestart_create()910 fn bytestart_create() { 911 let b = BytesStart::owned_name("test"); 912 assert_eq!(b.len(), 4); 913 assert_eq!(b.name(), b"test"); 914 } 915 916 #[test] bytestart_set_name()917 fn bytestart_set_name() { 918 let mut b = BytesStart::owned_name("test"); 919 assert_eq!(b.len(), 4); 920 assert_eq!(b.name(), b"test"); 921 assert_eq!(b.attributes_raw(), b""); 922 b.push_attribute(("x", "a")); 923 assert_eq!(b.len(), 10); 924 assert_eq!(b.attributes_raw(), b" x=\"a\""); 925 b.set_name(b"g"); 926 assert_eq!(b.len(), 7); 927 assert_eq!(b.name(), b"g"); 928 } 929 930 #[test] bytestart_clear_attributes()931 fn bytestart_clear_attributes() { 932 let mut b = BytesStart::owned_name("test"); 933 b.push_attribute(("x", "y\"z")); 934 b.push_attribute(("x", "y\"z")); 935 b.clear_attributes(); 936 assert!(b.attributes().next().is_none()); 937 assert_eq!(b.len(), 4); 938 assert_eq!(b.name(), b"test"); 939 } 940 } 941