1 //! Defines zero-copy XML events used throughout this library.
2 
3 pub mod attributes;
4 
5 #[cfg(feature = "encoding_rs")]
6 use encoding_rs::Encoding;
7 use std::borrow::Cow;
8 use std::collections::HashMap;
9 use std::io::BufRead;
10 use std::ops::Deref;
11 use std::str::from_utf8;
12 
13 use self::attributes::{Attribute, Attributes};
14 use errors::{Error, Result};
15 use escape::{do_unescape, escape};
16 use reader::Reader;
17 
18 use memchr;
19 
20 /// Opening tag data (`Event::Start`), with optional attributes.
21 ///
22 /// `<name attr="value">`.
23 ///
24 /// The name can be accessed using the [`name`], [`local_name`] or [`unescaped`] methods. An
25 /// iterator over the attributes is returned by the [`attributes`] method.
26 ///
27 /// [`name`]: #method.name
28 /// [`local_name`]: #method.local_name
29 /// [`unescaped`]: #method.unescaped
30 /// [`attributes`]: #method.attributes
31 #[derive(Clone)]
32 pub struct BytesStart<'a> {
33     /// content of the element, before any utf8 conversion
34     buf: Cow<'a, [u8]>,
35     /// end of the element name, the name starts at that the start of `buf`
36     name_len: usize,
37 }
38 
39 impl<'a> BytesStart<'a> {
40     /// Creates a new `BytesStart` from the given content (name + attributes).
41     ///
42     /// # Warning
43     ///
44     /// `&content[..name_len]` is not checked to be a valid name
45     #[inline]
borrowed(content: &'a [u8], name_len: usize) -> Self46     pub fn borrowed(content: &'a [u8], name_len: usize) -> Self {
47         BytesStart {
48             buf: Cow::Borrowed(content),
49             name_len,
50         }
51     }
52 
53     /// Creates a new `BytesStart` from the given name.
54     ///
55     /// # Warning
56     ///
57     /// `&content` is not checked to be a valid name
58     #[inline]
borrowed_name(name: &'a [u8]) -> BytesStart<'a>59     pub fn borrowed_name(name: &'a [u8]) -> BytesStart<'a> {
60         Self::borrowed(name, name.len())
61     }
62 
63     /// Creates a new `BytesStart` from the given content (name + attributes)
64     ///
65     /// Owns its contents.
66     #[inline]
owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static>67     pub fn owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static> {
68         BytesStart {
69             buf: Cow::Owned(content.into()),
70             name_len,
71         }
72     }
73 
74     /// Creates a new `BytesStart` from the given name
75     ///
76     /// Owns its contents.
77     #[inline]
owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static>78     pub fn owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static> {
79         let content = name.into();
80         BytesStart {
81             name_len: content.len(),
82             buf: Cow::Owned(content),
83         }
84     }
85 
86     /// Converts the event into an owned event.
into_owned(self) -> BytesStart<'static>87     pub fn into_owned(self) -> BytesStart<'static> {
88         Self::owned(self.buf.into_owned(), self.name_len)
89     }
90 
91     /// Converts the event into an owned event without taking ownership of Event
to_owned(&self) -> BytesStart<'static>92     pub fn to_owned(&self) -> BytesStart<'static> {
93         Self::owned(self.buf.to_owned(), self.name_len)
94     }
95 
96     /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
97     ///
98     /// # Example
99     ///
100     /// ```
101     /// # use quick_xml::{Error, Writer};
102     /// use quick_xml::events::{BytesStart, Event};
103     ///
104     /// struct SomeStruct<'a> {
105     ///     attrs: BytesStart<'a>,
106     ///     // ...
107     /// }
108     /// # impl<'a> SomeStruct<'a> {
109     /// # fn example(&self) -> Result<(), Error> {
110     /// # let mut writer = Writer::new(Vec::new());
111     ///
112     /// writer.write_event(Event::Start(self.attrs.to_borrowed()))?;
113     /// // ...
114     /// writer.write_event(Event::End(self.attrs.to_end()))?;
115     /// # Ok(())
116     /// # }}
117     /// ```
118     ///
119     /// [`to_end`]: #method.to_end
to_borrowed(&self) -> BytesStart120     pub fn to_borrowed(&self) -> BytesStart {
121         BytesStart::borrowed(&self.buf, self.name_len)
122     }
123 
124     /// Creates new paired close tag
to_end(&self) -> BytesEnd125     pub fn to_end(&self) -> BytesEnd {
126         BytesEnd::borrowed(self.name())
127     }
128 
129     /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
130     ///
131     /// The yielded items must be convertible to [`Attribute`] using `Into`.
132     ///
133     /// [`Attribute`]: attributes/struct.Attributes.html
with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into<Attribute<'b>>,134     pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
135     where
136         I: IntoIterator,
137         I::Item: Into<Attribute<'b>>,
138     {
139         self.extend_attributes(attributes);
140         self
141     }
142 
143     /// Gets the undecoded raw tag name as a `&[u8]`.
144     #[inline]
name(&self) -> &[u8]145     pub fn name(&self) -> &[u8] {
146         &self.buf[..self.name_len]
147     }
148 
149     /// Gets the undecoded raw local tag name (excluding namespace) as a `&[u8]`.
150     ///
151     /// All content up to and including the first `:` character is removed from the tag name.
152     #[inline]
local_name(&self) -> &[u8]153     pub fn local_name(&self) -> &[u8] {
154         let name = self.name();
155         memchr::memchr(b':', name).map_or(name, |i| &name[i + 1..])
156     }
157 
158     /// Gets the unescaped tag name.
159     ///
160     /// XML escape sequences like "`&lt;`" will be replaced by their unescaped characters like
161     /// "`<`".
162     ///
163     /// See also [`unescaped_with_custom_entities()`](#method.unescaped_with_custom_entities)
164     #[inline]
unescaped(&self) -> Result<Cow<[u8]>>165     pub fn unescaped(&self) -> Result<Cow<[u8]>> {
166         self.make_unescaped(None)
167     }
168 
169     /// Gets the unescaped tag name, using custom entities.
170     ///
171     /// XML escape sequences like "`&lt;`" will be replaced by their unescaped characters like
172     /// "`<`".
173     /// Additional entities can be provided in `custom_entities`.
174     ///
175     /// # Pre-condition
176     ///
177     /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
178     ///
179     /// See also [`unescaped()`](#method.unescaped)
180     #[inline]
unescaped_with_custom_entities<'s>( &'s self, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<Cow<'s, [u8]>>181     pub fn unescaped_with_custom_entities<'s>(
182         &'s self,
183         custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
184     ) -> Result<Cow<'s, [u8]>> {
185         self.make_unescaped(Some(custom_entities))
186     }
187 
188     #[inline]
make_unescaped<'s>( &'s self, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<Cow<'s, [u8]>>189     fn make_unescaped<'s>(
190         &'s self,
191         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
192     ) -> Result<Cow<'s, [u8]>> {
193         do_unescape(&*self.buf, custom_entities).map_err(Error::EscapeError)
194     }
195 
196     /// Returns an iterator over the attributes of this tag.
attributes(&self) -> Attributes197     pub fn attributes(&self) -> Attributes {
198         Attributes::new(self, self.name_len)
199     }
200 
201     /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
html_attributes(&self) -> Attributes202     pub fn html_attributes(&self) -> Attributes {
203         Attributes::html(self, self.name_len)
204     }
205 
206     /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
207     /// including the whitespace after the tag name if there is any.
208     #[inline]
attributes_raw(&self) -> &[u8]209     pub fn attributes_raw(&self) -> &[u8] {
210         &self.buf[self.name_len..]
211     }
212 
213     /// Add additional attributes to this tag using an iterator.
214     ///
215     /// The yielded items must be convertible to [`Attribute`] using `Into`.
216     ///
217     /// [`Attribute`]: attributes/struct.Attributes.html
extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> where I: IntoIterator, I::Item: Into<Attribute<'b>>,218     pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
219     where
220         I: IntoIterator,
221         I::Item: Into<Attribute<'b>>,
222     {
223         for attr in attributes {
224             self.push_attribute(attr);
225         }
226         self
227     }
228 
229     /// Returns the unescaped and decoded string value.
230     ///
231     /// This allocates a `String` in all cases. For performance reasons it might be a better idea to
232     /// instead use one of:
233     ///
234     /// * [`unescaped()`], as it doesn't allocate when no escape sequences are used.
235     /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise.
236     ///
237     /// [`unescaped()`]: #method.unescaped
238     /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode
239     #[inline]
unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>240     pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
241         self.do_unescape_and_decode_with_custom_entities(reader, None)
242     }
243 
244     /// Returns the unescaped and decoded string value with custom entities.
245     ///
246     /// This allocates a `String` in all cases. For performance reasons it might be a better idea to
247     /// instead use one of:
248     ///
249     /// * [`unescaped_with_custom_entities()`], as it doesn't allocate when no escape sequences are used.
250     /// * [`Reader::decode()`], as it only allocates when the decoding can't be performed otherwise.
251     ///
252     /// [`unescaped_with_custom_entities()`]: #method.unescaped_with_custom_entities
253     /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode
254     ///
255     /// # Pre-condition
256     ///
257     /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
258     #[inline]
unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>259     pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
260         &self,
261         reader: &Reader<B>,
262         custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
263     ) -> Result<String> {
264         self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
265     }
266 
267     #[cfg(feature = "encoding")]
268     #[inline]
do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>269     fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
270         &self,
271         reader: &Reader<B>,
272         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
273     ) -> Result<String> {
274         let decoded = reader.decode(&*self);
275         let unescaped =
276             do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
277         String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
278     }
279 
280     #[cfg(not(feature = "encoding"))]
281     #[inline]
do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>282     fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
283         &self,
284         reader: &Reader<B>,
285         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
286     ) -> Result<String> {
287         let decoded = reader.decode(&*self)?;
288         let unescaped =
289             do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
290         String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
291     }
292 
293     /// Adds an attribute to this element.
push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A)294     pub fn push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A) {
295         let a = attr.into();
296         let bytes = self.buf.to_mut();
297         bytes.push(b' ');
298         bytes.extend_from_slice(a.key);
299         bytes.extend_from_slice(b"=\"");
300         bytes.extend_from_slice(&*a.value);
301         bytes.push(b'"');
302     }
303 
304     /// Edit the name of the BytesStart in-place
305     ///
306     /// # Warning
307     ///
308     /// `name` is not checked to be a valid name
set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a>309     pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
310         let bytes = self.buf.to_mut();
311         bytes.splice(..self.name_len, name.iter().cloned());
312         self.name_len = name.len();
313         self
314     }
315 
316     /// Remove all attributes from the ByteStart
clear_attributes(&mut self) -> &mut BytesStart<'a>317     pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
318         self.buf.to_mut().truncate(self.name_len);
319         self
320     }
321 }
322 
323 impl<'a> std::fmt::Debug for BytesStart<'a> {
fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result324     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
325         use crate::utils::write_byte_string;
326 
327         write!(f, "BytesStart {{ buf: ")?;
328         write_byte_string(f, &self.buf)?;
329         write!(f, ", name_len: {} }}", self.name_len)
330     }
331 }
332 
333 /// An XML declaration (`Event::Decl`).
334 ///
335 /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
336 #[derive(Clone, Debug)]
337 pub struct BytesDecl<'a> {
338     element: BytesStart<'a>,
339 }
340 
341 impl<'a> BytesDecl<'a> {
342     /// Creates a `BytesDecl` from a `BytesStart`
from_start(start: BytesStart<'a>) -> BytesDecl<'a>343     pub fn from_start(start: BytesStart<'a>) -> BytesDecl<'a> {
344         BytesDecl { element: start }
345     }
346 
347     /// Gets xml version, including quotes (' or ")
version(&self) -> Result<Cow<[u8]>>348     pub fn version(&self) -> Result<Cow<[u8]>> {
349         // The version *must* be the first thing in the declaration.
350         match self.element.attributes().next() {
351             Some(Err(e)) => Err(e),
352             Some(Ok(Attribute {
353                 key: b"version",
354                 value: v,
355             })) => Ok(v),
356             Some(Ok(a)) => {
357                 let found = from_utf8(a.key).map_err(Error::Utf8)?.to_string();
358                 Err(Error::XmlDeclWithoutVersion(Some(found)))
359             }
360             None => Err(Error::XmlDeclWithoutVersion(None)),
361         }
362     }
363 
364     /// Gets xml encoding, including quotes (' or ")
encoding(&self) -> Option<Result<Cow<[u8]>>>365     pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
366         for a in self.element.attributes() {
367             match a {
368                 Err(e) => return Some(Err(e)),
369                 Ok(Attribute {
370                     key: b"encoding",
371                     value: v,
372                 }) => return Some(Ok(v)),
373                 _ => (),
374             }
375         }
376         None
377     }
378 
379     /// Gets xml standalone, including quotes (' or ")
standalone(&self) -> Option<Result<Cow<[u8]>>>380     pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
381         for a in self.element.attributes() {
382             match a {
383                 Err(e) => return Some(Err(e)),
384                 Ok(Attribute {
385                     key: b"standalone",
386                     value: v,
387                 }) => return Some(Ok(v)),
388                 _ => (),
389             }
390         }
391         None
392     }
393 
394     /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
395     /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
396     /// attribute.
397     ///
398     /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
399     /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
400     /// the double quote character is not allowed in any of the attribute values.
new( version: &[u8], encoding: Option<&[u8]>, standalone: Option<&[u8]>, ) -> BytesDecl<'static>401     pub fn new(
402         version: &[u8],
403         encoding: Option<&[u8]>,
404         standalone: Option<&[u8]>,
405     ) -> BytesDecl<'static> {
406         // Compute length of the buffer based on supplied attributes
407         // ' encoding=""'   => 12
408         let encoding_attr_len = if let Some(xs) = encoding {
409             12 + xs.len()
410         } else {
411             0
412         };
413         // ' standalone=""' => 14
414         let standalone_attr_len = if let Some(xs) = standalone {
415             14 + xs.len()
416         } else {
417             0
418         };
419         // 'xml version=""' => 14
420         let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len);
421 
422         buf.extend_from_slice(b"xml version=\"");
423         buf.extend_from_slice(version);
424 
425         if let Some(encoding_val) = encoding {
426             buf.extend_from_slice(b"\" encoding=\"");
427             buf.extend_from_slice(encoding_val);
428         }
429 
430         if let Some(standalone_val) = standalone {
431             buf.extend_from_slice(b"\" standalone=\"");
432             buf.extend_from_slice(standalone_val);
433         }
434         buf.push(b'"');
435 
436         BytesDecl {
437             element: BytesStart::owned(buf, 3),
438         }
439     }
440 
441     /// Gets the decoder struct
442     #[cfg(feature = "encoding_rs")]
encoder(&self) -> Option<&'static Encoding>443     pub fn encoder(&self) -> Option<&'static Encoding> {
444         self.encoding()
445             .and_then(|e| e.ok())
446             .and_then(|e| Encoding::for_label(&*e))
447     }
448 
449     /// Converts the event into an owned event.
into_owned(self) -> BytesDecl<'static>450     pub fn into_owned(self) -> BytesDecl<'static> {
451         BytesDecl {
452             element: self.element.into_owned(),
453         }
454     }
455 }
456 
457 /// A struct to manage `Event::End` events
458 #[derive(Clone)]
459 pub struct BytesEnd<'a> {
460     name: Cow<'a, [u8]>,
461 }
462 
463 impl<'a> BytesEnd<'a> {
464     /// Creates a new `BytesEnd` borrowing a slice
465     #[inline]
borrowed(name: &'a [u8]) -> BytesEnd<'a>466     pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> {
467         BytesEnd {
468             name: Cow::Borrowed(name),
469         }
470     }
471 
472     /// Creates a new `BytesEnd` owning its name
473     #[inline]
owned(name: Vec<u8>) -> BytesEnd<'static>474     pub fn owned(name: Vec<u8>) -> BytesEnd<'static> {
475         BytesEnd {
476             name: Cow::Owned(name),
477         }
478     }
479 
480     /// Converts the event into an owned event.
into_owned(self) -> BytesEnd<'static>481     pub fn into_owned(self) -> BytesEnd<'static> {
482         BytesEnd {
483             name: Cow::Owned(self.name.into_owned()),
484         }
485     }
486 
487     /// Gets `BytesEnd` event name
488     #[inline]
name(&self) -> &[u8]489     pub fn name(&self) -> &[u8] {
490         &*self.name
491     }
492 
493     /// local name (excluding namespace) as &[u8] (without eventual attributes)
494     /// returns the name() with any leading namespace removed (all content up to
495     /// and including the first ':' character)
496     #[inline]
local_name(&self) -> &[u8]497     pub fn local_name(&self) -> &[u8] {
498         if let Some(i) = self.name().iter().position(|b| *b == b':') {
499             &self.name()[i + 1..]
500         } else {
501             self.name()
502         }
503     }
504 }
505 
506 impl<'a> std::fmt::Debug for BytesEnd<'a> {
fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result507     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
508         use crate::utils::write_byte_string;
509 
510         write!(f, "BytesEnd {{ name: ")?;
511         write_byte_string(f, &self.name)?;
512         write!(f, " }}")
513     }
514 }
515 
516 /// Data from various events (most notably, `Event::Text`).
517 #[derive(Clone)]
518 pub struct BytesText<'a> {
519     // Invariant: The content is always escaped.
520     content: Cow<'a, [u8]>,
521 }
522 
523 impl<'a> BytesText<'a> {
524     /// Creates a new `BytesText` from an escaped byte sequence.
525     #[inline]
from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a>526     pub fn from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a> {
527         BytesText {
528             content: content.into(),
529         }
530     }
531 
532     /// Creates a new `BytesText` from a byte sequence. The byte sequence is
533     /// expected not to be escaped.
534     #[inline]
from_plain(content: &'a [u8]) -> BytesText<'a>535     pub fn from_plain(content: &'a [u8]) -> BytesText<'a> {
536         BytesText {
537             content: escape(content),
538         }
539     }
540 
541     /// Creates a new `BytesText` from an escaped string.
542     #[inline]
from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a>543     pub fn from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a> {
544         Self::from_escaped(match content.into() {
545             Cow::Owned(o) => Cow::Owned(o.into_bytes()),
546             Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
547         })
548     }
549 
550     /// Creates a new `BytesText` from a string. The string is expected not to
551     /// be escaped.
552     #[inline]
from_plain_str(content: &'a str) -> BytesText<'a>553     pub fn from_plain_str(content: &'a str) -> BytesText<'a> {
554         Self::from_plain(content.as_bytes())
555     }
556 
557     /// Ensures that all data is owned to extend the object's lifetime if
558     /// necessary.
559     #[inline]
into_owned(self) -> BytesText<'static>560     pub fn into_owned(self) -> BytesText<'static> {
561         BytesText {
562             content: self.content.into_owned().into(),
563         }
564     }
565 
566     /// Extracts the inner `Cow` from the `BytesText` event container.
567     #[cfg(feature = "serialize")]
568     #[inline]
into_inner(self) -> Cow<'a, [u8]>569     pub(crate) fn into_inner(self) -> Cow<'a, [u8]> {
570         self.content
571     }
572 
573     /// gets escaped content
574     ///
575     /// Searches for '&' into content and try to escape the coded character if possible
576     /// returns Malformed error with index within element if '&' is not followed by ';'
577     ///
578     /// See also [`unescaped_with_custom_entities()`](#method.unescaped_with_custom_entities)
unescaped(&self) -> Result<Cow<[u8]>>579     pub fn unescaped(&self) -> Result<Cow<[u8]>> {
580         self.make_unescaped(None)
581     }
582 
583     /// gets escaped content with custom entities
584     ///
585     /// Searches for '&' into content and try to escape the coded character if possible
586     /// returns Malformed error with index within element if '&' is not followed by ';'
587     /// Additional entities can be provided in `custom_entities`.
588     ///
589     /// # Pre-condition
590     ///
591     /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
592     ///
593     /// See also [`unescaped()`](#method.unescaped)
unescaped_with_custom_entities<'s>( &'s self, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<Cow<'s, [u8]>>594     pub fn unescaped_with_custom_entities<'s>(
595         &'s self,
596         custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
597     ) -> Result<Cow<'s, [u8]>> {
598         self.make_unescaped(Some(custom_entities))
599     }
600 
make_unescaped<'s>( &'s self, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<Cow<'s, [u8]>>601     fn make_unescaped<'s>(
602         &'s self,
603         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
604     ) -> Result<Cow<'s, [u8]>> {
605         do_unescape(self, custom_entities).map_err(Error::EscapeError)
606     }
607 
608     /// helper method to unescape then decode self using the reader encoding
609     /// but without BOM (Byte order mark)
610     ///
611     /// for performance reasons (could avoid allocating a `String`),
612     /// it might be wiser to manually use
613     /// 1. BytesText::unescaped()
614     /// 2. Reader::decode(...)
615     #[cfg(feature = "encoding")]
unescape_and_decode_without_bom<B: BufRead>( &self, reader: &mut Reader<B>, ) -> Result<String>616     pub fn unescape_and_decode_without_bom<B: BufRead>(
617         &self,
618         reader: &mut Reader<B>,
619     ) -> Result<String> {
620         self.do_unescape_and_decode_without_bom(reader, None)
621     }
622 
623     /// helper method to unescape then decode self using the reader encoding
624     /// but without BOM (Byte order mark)
625     ///
626     /// for performance reasons (could avoid allocating a `String`),
627     /// it might be wiser to manually use
628     /// 1. BytesText::unescaped()
629     /// 2. Reader::decode(...)
630     #[cfg(not(feature = "encoding"))]
unescape_and_decode_without_bom<B: BufRead>( &self, reader: &Reader<B>, ) -> Result<String>631     pub fn unescape_and_decode_without_bom<B: BufRead>(
632         &self,
633         reader: &Reader<B>,
634     ) -> Result<String> {
635         self.do_unescape_and_decode_without_bom(reader, None)
636     }
637 
638     /// helper method to unescape then decode self using the reader encoding with custom entities
639     /// but without BOM (Byte order mark)
640     ///
641     /// for performance reasons (could avoid allocating a `String`),
642     /// it might be wiser to manually use
643     /// 1. BytesText::unescaped()
644     /// 2. Reader::decode(...)
645     ///
646     /// # Pre-condition
647     ///
648     /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
649     #[cfg(feature = "encoding")]
unescape_and_decode_without_bom_with_custom_entities<B: BufRead>( &self, reader: &mut Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>650     pub fn unescape_and_decode_without_bom_with_custom_entities<B: BufRead>(
651         &self,
652         reader: &mut Reader<B>,
653         custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
654     ) -> Result<String> {
655         self.do_unescape_and_decode_without_bom(reader, Some(custom_entities))
656     }
657 
658     /// helper method to unescape then decode self using the reader encoding with custom entities
659     /// but without BOM (Byte order mark)
660     ///
661     /// for performance reasons (could avoid allocating a `String`),
662     /// it might be wiser to manually use
663     /// 1. BytesText::unescaped()
664     /// 2. Reader::decode(...)
665     ///
666     /// # Pre-condition
667     ///
668     /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
669     #[cfg(not(feature = "encoding"))]
unescape_and_decode_without_bom_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>670     pub fn unescape_and_decode_without_bom_with_custom_entities<B: BufRead>(
671         &self,
672         reader: &Reader<B>,
673         custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
674     ) -> Result<String> {
675         self.do_unescape_and_decode_without_bom(reader, Some(custom_entities))
676     }
677 
678     #[cfg(feature = "encoding")]
do_unescape_and_decode_without_bom<B: BufRead>( &self, reader: &mut Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>679     fn do_unescape_and_decode_without_bom<B: BufRead>(
680         &self,
681         reader: &mut Reader<B>,
682         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
683     ) -> Result<String> {
684         let decoded = reader.decode_without_bom(&*self);
685         let unescaped =
686             do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
687         String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
688     }
689 
690     #[cfg(not(feature = "encoding"))]
do_unescape_and_decode_without_bom<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>691     fn do_unescape_and_decode_without_bom<B: BufRead>(
692         &self,
693         reader: &Reader<B>,
694         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
695     ) -> Result<String> {
696         let decoded = reader.decode_without_bom(&*self)?;
697         let unescaped =
698             do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
699         String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
700     }
701 
702     /// helper method to unescape then decode self using the reader encoding
703     ///
704     /// for performance reasons (could avoid allocating a `String`),
705     /// it might be wiser to manually use
706     /// 1. BytesText::unescaped()
707     /// 2. Reader::decode(...)
unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String>708     pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
709         self.do_unescape_and_decode_with_custom_entities(reader, None)
710     }
711 
712     /// helper method to unescape then decode self using the reader encoding with custom entities
713     ///
714     /// for performance reasons (could avoid allocating a `String`),
715     /// it might be wiser to manually use
716     /// 1. BytesText::unescaped()
717     /// 2. Reader::decode(...)
718     ///
719     /// # Pre-condition
720     ///
721     /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: &HashMap<Vec<u8>, Vec<u8>>, ) -> Result<String>722     pub fn unescape_and_decode_with_custom_entities<B: BufRead>(
723         &self,
724         reader: &Reader<B>,
725         custom_entities: &HashMap<Vec<u8>, Vec<u8>>,
726     ) -> Result<String> {
727         self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities))
728     }
729 
730     #[cfg(feature = "encoding")]
do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>731     fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
732         &self,
733         reader: &Reader<B>,
734         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
735     ) -> Result<String> {
736         let decoded = reader.decode(&*self);
737         let unescaped =
738             do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
739         String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
740     }
741 
742     #[cfg(not(feature = "encoding"))]
do_unescape_and_decode_with_custom_entities<B: BufRead>( &self, reader: &Reader<B>, custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>, ) -> Result<String>743     fn do_unescape_and_decode_with_custom_entities<B: BufRead>(
744         &self,
745         reader: &Reader<B>,
746         custom_entities: Option<&HashMap<Vec<u8>, Vec<u8>>>,
747     ) -> Result<String> {
748         let decoded = reader.decode(&*self)?;
749         let unescaped =
750             do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?;
751         String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
752     }
753 
754     /// Gets escaped content.
escaped(&self) -> &[u8]755     pub fn escaped(&self) -> &[u8] {
756         self.content.as_ref()
757     }
758 }
759 
760 impl<'a> std::fmt::Debug for BytesText<'a> {
fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result761     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
762         use crate::utils::write_byte_string;
763 
764         write!(f, "BytesText {{ content: ")?;
765         write_byte_string(f, &self.content)?;
766         write!(f, " }}")
767     }
768 }
769 
770 /// Event emitted by [`Reader::read_event`].
771 ///
772 /// [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
773 #[derive(Clone, Debug)]
774 pub enum Event<'a> {
775     /// Start tag (with attributes) `<tag attr="value">`.
776     Start(BytesStart<'a>),
777     /// End tag `</tag>`.
778     End(BytesEnd<'a>),
779     /// Empty element tag (with attributes) `<tag attr="value" />`.
780     Empty(BytesStart<'a>),
781     /// Character data between `Start` and `End` element.
782     Text(BytesText<'a>),
783     /// Comment `<!-- ... -->`.
784     Comment(BytesText<'a>),
785     /// CData `<![CDATA[...]]>`.
786     CData(BytesText<'a>),
787     /// XML declaration `<?xml ...?>`.
788     Decl(BytesDecl<'a>),
789     /// Processing instruction `<?...?>`.
790     PI(BytesText<'a>),
791     /// Doctype `<!DOCTYPE...>`.
792     DocType(BytesText<'a>),
793     /// End of XML document.
794     Eof,
795 }
796 
797 impl<'a> Event<'a> {
798     /// Converts the event to an owned version, untied to the lifetime of
799     /// buffer used when reading but incurring a new, seperate allocation.
into_owned(self) -> Event<'static>800     pub fn into_owned(self) -> Event<'static> {
801         match self {
802             Event::Start(e) => Event::Start(e.into_owned()),
803             Event::End(e) => Event::End(e.into_owned()),
804             Event::Empty(e) => Event::Empty(e.into_owned()),
805             Event::Text(e) => Event::Text(e.into_owned()),
806             Event::Comment(e) => Event::Comment(e.into_owned()),
807             Event::CData(e) => Event::CData(e.into_owned()),
808             Event::Decl(e) => Event::Decl(e.into_owned()),
809             Event::PI(e) => Event::PI(e.into_owned()),
810             Event::DocType(e) => Event::DocType(e.into_owned()),
811             Event::Eof => Event::Eof,
812         }
813     }
814 }
815 
816 impl<'a> Deref for BytesStart<'a> {
817     type Target = [u8];
deref(&self) -> &[u8]818     fn deref(&self) -> &[u8] {
819         &*self.buf
820     }
821 }
822 
823 impl<'a> Deref for BytesDecl<'a> {
824     type Target = [u8];
deref(&self) -> &[u8]825     fn deref(&self) -> &[u8] {
826         &*self.element
827     }
828 }
829 
830 impl<'a> Deref for BytesEnd<'a> {
831     type Target = [u8];
deref(&self) -> &[u8]832     fn deref(&self) -> &[u8] {
833         &*self.name
834     }
835 }
836 
837 impl<'a> Deref for BytesText<'a> {
838     type Target = [u8];
deref(&self) -> &[u8]839     fn deref(&self) -> &[u8] {
840         &*self.content
841     }
842 }
843 
844 impl<'a> Deref for Event<'a> {
845     type Target = [u8];
deref(&self) -> &[u8]846     fn deref(&self) -> &[u8] {
847         match *self {
848             Event::Start(ref e) | Event::Empty(ref e) => &*e,
849             Event::End(ref e) => &*e,
850             Event::Text(ref e) => &*e,
851             Event::Decl(ref e) => &*e,
852             Event::PI(ref e) => &*e,
853             Event::CData(ref e) => &*e,
854             Event::Comment(ref e) => &*e,
855             Event::DocType(ref e) => &*e,
856             Event::Eof => &[],
857         }
858     }
859 }
860 
861 impl<'a> AsRef<Event<'a>> for Event<'a> {
as_ref(&self) -> &Event<'a>862     fn as_ref(&self) -> &Event<'a> {
863         self
864     }
865 }
866 
867 #[cfg(test)]
868 mod test {
869     use super::*;
870 
871     #[test]
local_name()872     fn local_name() {
873         use std::str::from_utf8;
874         let xml = r#"
875             <foo:bus attr='bar'>foobusbar</foo:bus>
876             <foo: attr='bar'>foobusbar</foo:>
877             <:foo attr='bar'>foobusbar</:foo>
878             <foo:bus:baz attr='bar'>foobusbar</foo:bus:baz>
879             "#;
880         let mut rdr = Reader::from_str(xml);
881         let mut buf = Vec::new();
882         let mut parsed_local_names = Vec::new();
883         loop {
884             match rdr.read_event(&mut buf).expect("unable to read xml event") {
885                 Event::Start(ref e) => parsed_local_names.push(
886                     from_utf8(e.local_name())
887                         .expect("unable to build str from local_name")
888                         .to_string(),
889                 ),
890                 Event::End(ref e) => parsed_local_names.push(
891                     from_utf8(e.local_name())
892                         .expect("unable to build str from local_name")
893                         .to_string(),
894                 ),
895                 Event::Eof => break,
896                 _ => {}
897             }
898         }
899         assert_eq!(parsed_local_names[0], "bus".to_string());
900         assert_eq!(parsed_local_names[1], "bus".to_string());
901         assert_eq!(parsed_local_names[2], "".to_string());
902         assert_eq!(parsed_local_names[3], "".to_string());
903         assert_eq!(parsed_local_names[4], "foo".to_string());
904         assert_eq!(parsed_local_names[5], "foo".to_string());
905         assert_eq!(parsed_local_names[6], "bus:baz".to_string());
906         assert_eq!(parsed_local_names[7], "bus:baz".to_string());
907     }
908 
909     #[test]
bytestart_create()910     fn bytestart_create() {
911         let b = BytesStart::owned_name("test");
912         assert_eq!(b.len(), 4);
913         assert_eq!(b.name(), b"test");
914     }
915 
916     #[test]
bytestart_set_name()917     fn bytestart_set_name() {
918         let mut b = BytesStart::owned_name("test");
919         assert_eq!(b.len(), 4);
920         assert_eq!(b.name(), b"test");
921         assert_eq!(b.attributes_raw(), b"");
922         b.push_attribute(("x", "a"));
923         assert_eq!(b.len(), 10);
924         assert_eq!(b.attributes_raw(), b" x=\"a\"");
925         b.set_name(b"g");
926         assert_eq!(b.len(), 7);
927         assert_eq!(b.name(), b"g");
928     }
929 
930     #[test]
bytestart_clear_attributes()931     fn bytestart_clear_attributes() {
932         let mut b = BytesStart::owned_name("test");
933         b.push_attribute(("x", "y\"z"));
934         b.push_attribute(("x", "y\"z"));
935         b.clear_attributes();
936         assert!(b.attributes().next().is_none());
937         assert_eq!(b.len(), 4);
938         assert_eq!(b.name(), b"test");
939     }
940 }
941