1 //! Defines zero-copy XML events used throughout this library. 2 3 pub mod attributes; 4 5 #[cfg(feature = "encoding_rs")] 6 use encoding_rs::Encoding; 7 use std::borrow::Cow; 8 use std::io::BufRead; 9 use std::ops::Deref; 10 use std::str::from_utf8; 11 12 use self::attributes::{Attribute, Attributes}; 13 use errors::{Error, Result}; 14 use escape::{escape, unescape}; 15 use reader::Reader; 16 17 use memchr; 18 19 /// Opening tag data (`Event::Start`), with optional attributes. 20 /// 21 /// `<name attr="value">`. 22 /// 23 /// The name can be accessed using the [`name`], [`local_name`] or [`unescaped`] methods. An 24 /// iterator over the attributes is returned by the [`attributes`] method. 25 /// 26 /// [`name`]: #method.name 27 /// [`local_name`]: #method.local_name 28 /// [`unescaped`]: #method.unescaped 29 /// [`attributes`]: #method.attributes 30 #[derive(Clone)] 31 pub struct BytesStart<'a> { 32 /// content of the element, before any utf8 conversion 33 buf: Cow<'a, [u8]>, 34 /// end of the element name, the name starts at that the start of `buf` 35 name_len: usize, 36 } 37 38 impl<'a> BytesStart<'a> { 39 /// Creates a new `BytesStart` from the given content (name + attributes). 40 /// 41 /// # Warning 42 /// 43 /// `&content[..name_len]` is not checked to be a valid name 44 #[inline] borrowed(content: &'a [u8], name_len: usize) -> Self45 pub fn borrowed(content: &'a [u8], name_len: usize) -> Self { 46 BytesStart { 47 buf: Cow::Borrowed(content), 48 name_len, 49 } 50 } 51 52 /// Creates a new `BytesStart` from the given name. 53 /// 54 /// # Warning 55 /// 56 /// `&content` is not checked to be a valid name 57 #[inline] borrowed_name(name: &'a [u8]) -> BytesStart<'a>58 pub fn borrowed_name(name: &'a [u8]) -> BytesStart<'a> { 59 Self::borrowed(name, name.len()) 60 } 61 62 /// Creates a new `BytesStart` from the given content (name + attributes) 63 /// 64 /// Owns its contents. 65 #[inline] owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static>66 pub fn owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static> { 67 BytesStart { 68 buf: Cow::Owned(content.into()), 69 name_len, 70 } 71 } 72 73 /// Creates a new `BytesStart` from the given name 74 /// 75 /// Owns its contents. 76 #[inline] owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static>77 pub fn owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static> { 78 let content = name.into(); 79 BytesStart { 80 name_len: content.len(), 81 buf: Cow::Owned(content), 82 } 83 } 84 85 /// Converts the event into an owned event. into_owned(self) -> BytesStart<'static>86 pub fn into_owned(self) -> BytesStart<'static> { 87 Self::owned(self.buf.into_owned(), self.name_len) 88 } 89 90 /// Converts the event into an owned event without taking ownership of Event to_owned(&self) -> BytesStart<'static>91 pub fn to_owned(&self) -> BytesStart<'static> { 92 Self::owned(self.buf.to_owned(), self.name_len) 93 } 94 95 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator. 96 /// 97 /// The yielded items must be convertible to [`Attribute`] using `Into`. 98 /// 99 /// [`Attribute`]: attributes/struct.Attributes.html with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into<Attribute<'b>>,100 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self 101 where 102 I: IntoIterator, 103 I::Item: Into<Attribute<'b>>, 104 { 105 self.extend_attributes(attributes); 106 self 107 } 108 109 /// Gets the undecoded raw tag name as a `&[u8]`. 110 #[inline] name(&self) -> &[u8]111 pub fn name(&self) -> &[u8] { 112 &self.buf[..self.name_len] 113 } 114 115 /// Gets the undecoded raw local tag name (excluding namespace) as a `&[u8]`. 116 /// 117 /// All content up to and including the first `:` character is removed from the tag name. 118 #[inline] local_name(&self) -> &[u8]119 pub fn local_name(&self) -> &[u8] { 120 let name = self.name(); 121 memchr::memchr(b':', name).map_or(name, |i| &name[i + 1..]) 122 } 123 124 /// Gets the unescaped tag name. 125 /// 126 /// XML escape sequences like "`<`" will be replaced by their unescaped characters like 127 /// "`<`". 128 #[inline] unescaped(&self) -> Result<Cow<[u8]>>129 pub fn unescaped(&self) -> Result<Cow<[u8]>> { 130 unescape(&*self.buf).map_err(Error::EscapeError) 131 } 132 133 /// Returns an iterator over the attributes of this tag. attributes(&self) -> Attributes134 pub fn attributes(&self) -> Attributes { 135 Attributes::new(self, self.name_len) 136 } 137 138 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`). html_attributes(&self) -> Attributes139 pub fn html_attributes(&self) -> Attributes { 140 Attributes::html(self, self.name_len) 141 } 142 143 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`, 144 /// including the whitespace after the tag name if there is any. 145 #[inline] attributes_raw(&self) -> &[u8]146 pub fn attributes_raw(&self) -> &[u8] { 147 &self.buf[self.name_len..] 148 } 149 150 /// Add additional attributes to this tag using an iterator. 151 /// 152 /// The yielded items must be convertible to [`Attribute`] using `Into`. 153 /// 154 /// [`Attribute`]: attributes/struct.Attributes.html extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> where I: IntoIterator, I::Item: Into<Attribute<'b>>,155 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> 156 where 157 I: IntoIterator, 158 I::Item: Into<Attribute<'b>>, 159 { 160 for attr in attributes { 161 self.push_attribute(attr); 162 } 163 self 164 } 165 166 /// Returns the unescaped and decoded string value. 167 /// 168 /// This allocates a `String` in all cases. For performance reasons it might be a better idea to 169 /// instead use one of: 170 /// 171 /// * [`unescaped()`], as it doesn't allocate when no escape sequences are used. 172 /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise. 173 /// 174 /// [`unescaped()`]: #method.unescaped 175 /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode 176 #[cfg(feature = "encoding")] 177 #[inline] unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>178 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> { 179 let decoded = reader.decode(&*self); 180 let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; 181 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 182 } 183 184 /// Returns the unescaped and decoded string value. 185 /// 186 /// This allocates a `String` in all cases. For performance reasons it might be a better idea to 187 /// instead use one of: 188 /// 189 /// * [`unescaped()`], as it doesn't allocate when no escape sequences are used. 190 /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise. 191 /// 192 /// [`unescaped()`]: #method.unescaped 193 /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode 194 #[cfg(not(feature = "encoding"))] 195 #[inline] unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>196 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> { 197 let decoded = reader.decode(&*self)?; 198 let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; 199 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 200 } 201 202 /// Adds an attribute to this element. push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A)203 pub fn push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A) { 204 let a = attr.into(); 205 let bytes = self.buf.to_mut(); 206 bytes.push(b' '); 207 bytes.extend_from_slice(a.key); 208 bytes.extend_from_slice(b"=\""); 209 bytes.extend_from_slice(&*a.value); 210 bytes.push(b'"'); 211 } 212 213 /// Edit the name of the BytesStart in-place 214 /// 215 /// # Warning 216 /// 217 /// `name` is not checked to be a valid name set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a>218 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> { 219 let bytes = self.buf.to_mut(); 220 bytes.splice(..self.name_len, name.iter().cloned()); 221 self.name_len = name.len(); 222 self 223 } 224 225 /// Remove all attributes from the ByteStart clear_attributes(&mut self) -> &mut BytesStart<'a>226 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> { 227 self.buf.to_mut().truncate(self.name_len); 228 self 229 } 230 } 231 232 impl<'a> std::fmt::Debug for BytesStart<'a> { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result233 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 234 use crate::utils::write_byte_string; 235 236 write!(f, "BytesStart {{ buf: ")?; 237 write_byte_string(f, &self.buf)?; 238 write!(f, ", name_len: {} }}", self.name_len) 239 } 240 } 241 242 /// An XML declaration (`Event::Decl`). 243 /// 244 /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) 245 #[derive(Clone, Debug)] 246 pub struct BytesDecl<'a> { 247 element: BytesStart<'a>, 248 } 249 250 impl<'a> BytesDecl<'a> { 251 /// Creates a `BytesDecl` from a `BytesStart` from_start(start: BytesStart<'a>) -> BytesDecl<'a>252 pub fn from_start(start: BytesStart<'a>) -> BytesDecl<'a> { 253 BytesDecl { element: start } 254 } 255 256 /// Gets xml version, including quotes (' or ") version(&self) -> Result<Cow<[u8]>>257 pub fn version(&self) -> Result<Cow<[u8]>> { 258 // The version *must* be the first thing in the declaration. 259 match self.element.attributes().next() { 260 Some(Err(e)) => Err(e), 261 Some(Ok(Attribute { 262 key: b"version", 263 value: v, 264 })) => Ok(v), 265 Some(Ok(a)) => { 266 let found = from_utf8(a.key).map_err(Error::Utf8)?.to_string(); 267 Err(Error::XmlDeclWithoutVersion(Some(found))) 268 } 269 None => Err(Error::XmlDeclWithoutVersion(None)), 270 } 271 } 272 273 /// Gets xml encoding, including quotes (' or ") encoding(&self) -> Option<Result<Cow<[u8]>>>274 pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> { 275 for a in self.element.attributes() { 276 match a { 277 Err(e) => return Some(Err(e)), 278 Ok(Attribute { 279 key: b"encoding", 280 value: v, 281 }) => return Some(Ok(v)), 282 _ => (), 283 } 284 } 285 None 286 } 287 288 /// Gets xml standalone, including quotes (' or ") standalone(&self) -> Option<Result<Cow<[u8]>>>289 pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> { 290 for a in self.element.attributes() { 291 match a { 292 Err(e) => return Some(Err(e)), 293 Ok(Attribute { 294 key: b"standalone", 295 value: v, 296 }) => return Some(Ok(v)), 297 _ => (), 298 } 299 } 300 None 301 } 302 303 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), 304 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) 305 /// attribute. 306 /// 307 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. 308 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since 309 /// the double quote character is not allowed in any of the attribute values. new( version: &[u8], encoding: Option<&[u8]>, standalone: Option<&[u8]>, ) -> BytesDecl<'static>310 pub fn new( 311 version: &[u8], 312 encoding: Option<&[u8]>, 313 standalone: Option<&[u8]>, 314 ) -> BytesDecl<'static> { 315 // Compute length of the buffer based on supplied attributes 316 // ' encoding=""' => 12 317 let encoding_attr_len = if let Some(xs) = encoding { 318 12 + xs.len() 319 } else { 320 0 321 }; 322 // ' standalone=""' => 14 323 let standalone_attr_len = if let Some(xs) = standalone { 324 14 + xs.len() 325 } else { 326 0 327 }; 328 // 'xml version=""' => 14 329 let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len); 330 331 buf.extend_from_slice(b"xml version=\""); 332 buf.extend_from_slice(version); 333 334 if let Some(encoding_val) = encoding { 335 buf.extend_from_slice(b"\" encoding=\""); 336 buf.extend_from_slice(encoding_val); 337 } 338 339 if let Some(standalone_val) = standalone { 340 buf.extend_from_slice(b"\" standalone=\""); 341 buf.extend_from_slice(standalone_val); 342 } 343 buf.push(b'"'); 344 345 BytesDecl { 346 element: BytesStart::owned(buf, 3), 347 } 348 } 349 350 /// Gets the decoder struct 351 #[cfg(feature = "encoding_rs")] encoder(&self) -> Option<&'static Encoding>352 pub fn encoder(&self) -> Option<&'static Encoding> { 353 self.encoding() 354 .and_then(|e| e.ok()) 355 .and_then(|e| Encoding::for_label(&*e)) 356 } 357 358 /// Converts the event into an owned event. into_owned(self) -> BytesDecl<'static>359 pub fn into_owned(self) -> BytesDecl<'static> { 360 BytesDecl { 361 element: self.element.into_owned(), 362 } 363 } 364 } 365 366 /// A struct to manage `Event::End` events 367 #[derive(Clone)] 368 pub struct BytesEnd<'a> { 369 name: Cow<'a, [u8]>, 370 } 371 372 impl<'a> BytesEnd<'a> { 373 /// Creates a new `BytesEnd` borrowing a slice 374 #[inline] borrowed(name: &'a [u8]) -> BytesEnd<'a>375 pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> { 376 BytesEnd { 377 name: Cow::Borrowed(name), 378 } 379 } 380 381 /// Creates a new `BytesEnd` owning its name 382 #[inline] owned(name: Vec<u8>) -> BytesEnd<'static>383 pub fn owned(name: Vec<u8>) -> BytesEnd<'static> { 384 BytesEnd { 385 name: Cow::Owned(name), 386 } 387 } 388 389 /// Converts the event into an owned event. into_owned(self) -> BytesEnd<'static>390 pub fn into_owned(self) -> BytesEnd<'static> { 391 BytesEnd { 392 name: Cow::Owned(self.name.into_owned()), 393 } 394 } 395 396 /// Gets `BytesEnd` event name 397 #[inline] name(&self) -> &[u8]398 pub fn name(&self) -> &[u8] { 399 &*self.name 400 } 401 402 /// local name (excluding namespace) as &[u8] (without eventual attributes) 403 /// returns the name() with any leading namespace removed (all content up to 404 /// and including the first ':' character) 405 #[inline] local_name(&self) -> &[u8]406 pub fn local_name(&self) -> &[u8] { 407 if let Some(i) = self.name().iter().position(|b| *b == b':') { 408 &self.name()[i + 1..] 409 } else { 410 self.name() 411 } 412 } 413 } 414 415 impl<'a> std::fmt::Debug for BytesEnd<'a> { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result416 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 417 use crate::utils::write_byte_string; 418 419 write!(f, "BytesEnd {{ name: ")?; 420 write_byte_string(f, &self.name)?; 421 write!(f, " }}") 422 } 423 } 424 425 /// Data from various events (most notably, `Event::Text`). 426 #[derive(Clone)] 427 pub struct BytesText<'a> { 428 // Invariant: The content is always escaped. 429 content: Cow<'a, [u8]>, 430 } 431 432 impl<'a> BytesText<'a> { 433 /// Creates a new `BytesText` from an escaped byte sequence. 434 #[inline] from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a>435 pub fn from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a> { 436 BytesText { 437 content: content.into(), 438 } 439 } 440 441 /// Creates a new `BytesText` from a byte sequence. The byte sequence is 442 /// expected not to be escaped. 443 #[inline] from_plain(content: &'a [u8]) -> BytesText<'a>444 pub fn from_plain(content: &'a [u8]) -> BytesText<'a> { 445 BytesText { 446 content: escape(content), 447 } 448 } 449 450 /// Creates a new `BytesText` from an escaped string. 451 #[inline] from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a>452 pub fn from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a> { 453 Self::from_escaped(match content.into() { 454 Cow::Owned(o) => Cow::Owned(o.into_bytes()), 455 Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), 456 }) 457 } 458 459 /// Creates a new `BytesText` from a string. The string is expected not to 460 /// be escaped. 461 #[inline] from_plain_str(content: &'a str) -> BytesText<'a>462 pub fn from_plain_str(content: &'a str) -> BytesText<'a> { 463 Self::from_plain(content.as_bytes()) 464 } 465 466 /// Ensures that all data is owned to extend the object's lifetime if 467 /// necessary. 468 #[inline] into_owned(self) -> BytesText<'static>469 pub fn into_owned(self) -> BytesText<'static> { 470 BytesText { 471 content: self.content.into_owned().into(), 472 } 473 } 474 475 /// Extracts the inner `Cow` from the `BytesText` event container. 476 #[cfg(feature = "serialize")] 477 #[inline] into_inner(self) -> Cow<'a, [u8]>478 pub(crate) fn into_inner(self) -> Cow<'a, [u8]> { 479 self.content 480 } 481 482 /// gets escaped content 483 /// 484 /// Searches for '&' into content and try to escape the coded character if possible 485 /// returns Malformed error with index within element if '&' is not followed by ';' unescaped(&self) -> Result<Cow<[u8]>>486 pub fn unescaped(&self) -> Result<Cow<[u8]>> { 487 unescape(self).map_err(Error::EscapeError) 488 } 489 490 /// helper method to unescape then decode self using the reader encoding 491 /// but without BOM (Byte order mark) 492 /// 493 /// for performance reasons (could avoid allocating a `String`), 494 /// it might be wiser to manually use 495 /// 1. BytesText::unescaped() 496 /// 2. Reader::decode(...) 497 #[cfg(feature = "encoding")] unescape_and_decode_without_bom<B: BufRead>( &self, reader: &mut Reader<B>, ) -> Result<String>498 pub fn unescape_and_decode_without_bom<B: BufRead>( 499 &self, 500 reader: &mut Reader<B>, 501 ) -> Result<String> { 502 let decoded = reader.decode_without_bom(&*self); 503 let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; 504 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 505 } 506 507 /// helper method to unescape then decode self using the reader encoding 508 /// but without BOM (Byte order mark) 509 /// 510 /// for performance reasons (could avoid allocating a `String`), 511 /// it might be wiser to manually use 512 /// 1. BytesText::unescaped() 513 /// 2. Reader::decode(...) 514 #[cfg(not(feature = "encoding"))] unescape_and_decode_without_bom<B: BufRead>( &self, reader: &Reader<B>, ) -> Result<String>515 pub fn unescape_and_decode_without_bom<B: BufRead>( 516 &self, 517 reader: &Reader<B>, 518 ) -> Result<String> { 519 let decoded = reader.decode_without_bom(&*self)?; 520 let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; 521 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 522 } 523 524 /// helper method to unescape then decode self using the reader encoding 525 /// 526 /// for performance reasons (could avoid allocating a `String`), 527 /// it might be wiser to manually use 528 /// 1. BytesText::unescaped() 529 /// 2. Reader::decode(...) 530 #[cfg(feature = "encoding")] unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>531 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> { 532 let decoded = reader.decode(&*self); 533 let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; 534 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 535 } 536 537 /// helper method to unescape then decode self using the reader encoding 538 /// 539 /// for performance reasons (could avoid allocating a `String`), 540 /// it might be wiser to manually use 541 /// 1. BytesText::unescaped() 542 /// 2. Reader::decode(...) 543 #[cfg(not(feature = "encoding"))] unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>544 pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> { 545 let decoded = reader.decode(&*self)?; 546 let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; 547 String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) 548 } 549 550 /// Gets escaped content. escaped(&self) -> &[u8]551 pub fn escaped(&self) -> &[u8] { 552 self.content.as_ref() 553 } 554 } 555 556 impl<'a> std::fmt::Debug for BytesText<'a> { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result557 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 558 use crate::utils::write_byte_string; 559 560 write!(f, "BytesText {{ content: ")?; 561 write_byte_string(f, &self.content)?; 562 write!(f, " }}") 563 } 564 } 565 566 /// Event emitted by [`Reader::read_event`]. 567 /// 568 /// [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event 569 #[derive(Clone, Debug)] 570 pub enum Event<'a> { 571 /// Start tag (with attributes) `<tag attr="value">`. 572 Start(BytesStart<'a>), 573 /// End tag `</tag>`. 574 End(BytesEnd<'a>), 575 /// Empty element tag (with attributes) `<tag attr="value" />`. 576 Empty(BytesStart<'a>), 577 /// Character data between `Start` and `End` element. 578 Text(BytesText<'a>), 579 /// Comment `<!-- ... -->`. 580 Comment(BytesText<'a>), 581 /// CData `<![CDATA[...]]>`. 582 CData(BytesText<'a>), 583 /// XML declaration `<?xml ...?>`. 584 Decl(BytesDecl<'a>), 585 /// Processing instruction `<?...?>`. 586 PI(BytesText<'a>), 587 /// Doctype `<!DOCTYPE...>`. 588 DocType(BytesText<'a>), 589 /// End of XML document. 590 Eof, 591 } 592 593 impl<'a> Event<'a> { 594 /// Converts the event to an owned version, untied to the lifetime of 595 /// buffer used when reading but incurring a new, seperate allocation. into_owned(self) -> Event<'static>596 pub fn into_owned(self) -> Event<'static> { 597 match self { 598 Event::Start(e) => Event::Start(e.into_owned()), 599 Event::End(e) => Event::End(e.into_owned()), 600 Event::Empty(e) => Event::Empty(e.into_owned()), 601 Event::Text(e) => Event::Text(e.into_owned()), 602 Event::Comment(e) => Event::Comment(e.into_owned()), 603 Event::CData(e) => Event::CData(e.into_owned()), 604 Event::Decl(e) => Event::Decl(e.into_owned()), 605 Event::PI(e) => Event::PI(e.into_owned()), 606 Event::DocType(e) => Event::DocType(e.into_owned()), 607 Event::Eof => Event::Eof, 608 } 609 } 610 } 611 612 impl<'a> Deref for BytesStart<'a> { 613 type Target = [u8]; deref(&self) -> &[u8]614 fn deref(&self) -> &[u8] { 615 &*self.buf 616 } 617 } 618 619 impl<'a> Deref for BytesDecl<'a> { 620 type Target = [u8]; deref(&self) -> &[u8]621 fn deref(&self) -> &[u8] { 622 &*self.element 623 } 624 } 625 626 impl<'a> Deref for BytesEnd<'a> { 627 type Target = [u8]; deref(&self) -> &[u8]628 fn deref(&self) -> &[u8] { 629 &*self.name 630 } 631 } 632 633 impl<'a> Deref for BytesText<'a> { 634 type Target = [u8]; deref(&self) -> &[u8]635 fn deref(&self) -> &[u8] { 636 &*self.content 637 } 638 } 639 640 impl<'a> Deref for Event<'a> { 641 type Target = [u8]; deref(&self) -> &[u8]642 fn deref(&self) -> &[u8] { 643 match *self { 644 Event::Start(ref e) | Event::Empty(ref e) => &*e, 645 Event::End(ref e) => &*e, 646 Event::Text(ref e) => &*e, 647 Event::Decl(ref e) => &*e, 648 Event::PI(ref e) => &*e, 649 Event::CData(ref e) => &*e, 650 Event::Comment(ref e) => &*e, 651 Event::DocType(ref e) => &*e, 652 Event::Eof => &[], 653 } 654 } 655 } 656 657 impl<'a> AsRef<Event<'a>> for Event<'a> { as_ref(&self) -> &Event<'a>658 fn as_ref(&self) -> &Event<'a> { 659 self 660 } 661 } 662 663 #[cfg(test)] 664 mod test { 665 use super::*; 666 667 #[test] local_name()668 fn local_name() { 669 use std::str::from_utf8; 670 let xml = r#" 671 <foo:bus attr='bar'>foobusbar</foo:bus> 672 <foo: attr='bar'>foobusbar</foo:> 673 <:foo attr='bar'>foobusbar</:foo> 674 <foo:bus:baz attr='bar'>foobusbar</foo:bus:baz> 675 "#; 676 let mut rdr = Reader::from_str(xml); 677 let mut buf = Vec::new(); 678 let mut parsed_local_names = Vec::new(); 679 loop { 680 match rdr.read_event(&mut buf).expect("unable to read xml event") { 681 Event::Start(ref e) => parsed_local_names.push( 682 from_utf8(e.local_name()) 683 .expect("unable to build str from local_name") 684 .to_string(), 685 ), 686 Event::End(ref e) => parsed_local_names.push( 687 from_utf8(e.local_name()) 688 .expect("unable to build str from local_name") 689 .to_string(), 690 ), 691 Event::Eof => break, 692 _ => {} 693 } 694 } 695 assert_eq!(parsed_local_names[0], "bus".to_string()); 696 assert_eq!(parsed_local_names[1], "bus".to_string()); 697 assert_eq!(parsed_local_names[2], "".to_string()); 698 assert_eq!(parsed_local_names[3], "".to_string()); 699 assert_eq!(parsed_local_names[4], "foo".to_string()); 700 assert_eq!(parsed_local_names[5], "foo".to_string()); 701 assert_eq!(parsed_local_names[6], "bus:baz".to_string()); 702 assert_eq!(parsed_local_names[7], "bus:baz".to_string()); 703 } 704 705 #[test] bytestart_create()706 fn bytestart_create() { 707 let b = BytesStart::owned_name("test"); 708 assert_eq!(b.len(), 4); 709 assert_eq!(b.name(), b"test"); 710 } 711 712 #[test] bytestart_set_name()713 fn bytestart_set_name() { 714 let mut b = BytesStart::owned_name("test"); 715 assert_eq!(b.len(), 4); 716 assert_eq!(b.name(), b"test"); 717 assert_eq!(b.attributes_raw(), b""); 718 b.push_attribute(("x", "a")); 719 assert_eq!(b.len(), 10); 720 assert_eq!(b.attributes_raw(), b" x=\"a\""); 721 b.set_name(b"g"); 722 assert_eq!(b.len(), 7); 723 assert_eq!(b.name(), b"g"); 724 } 725 726 #[test] bytestart_clear_attributes()727 fn bytestart_clear_attributes() { 728 let mut b = BytesStart::owned_name("test"); 729 b.push_attribute(("x", "y\"z")); 730 b.push_attribute(("x", "y\"z")); 731 b.clear_attributes(); 732 assert!(b.attributes().next().is_none()); 733 assert_eq!(b.len(), 4); 734 assert_eq!(b.name(), b"test"); 735 } 736 } 737