1 //! A module to handle `Reader`
2 
3 #[cfg(feature = "encoding")]
4 use std::borrow::Cow;
5 use std::fs::File;
6 use std::io::{self, BufRead, BufReader};
7 use std::path::Path;
8 use std::str::from_utf8;
9 
10 #[cfg(feature = "encoding")]
11 use encoding_rs::{Encoding, UTF_16BE, UTF_16LE};
12 
13 use errors::{Error, Result};
14 use events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
15 
16 use memchr;
17 
18 #[derive(Clone)]
19 enum TagState {
20     Opened,
21     Closed,
22     Empty,
23     /// Either Eof or Errored
24     Exit,
25 }
26 
27 /// A low level encoding-agnostic XML event reader.
28 ///
29 /// Consumes a `BufRead` and streams XML `Event`s.
30 ///
31 /// # Examples
32 ///
33 /// ```
34 /// use quick_xml::Reader;
35 /// use quick_xml::events::Event;
36 ///
37 /// let xml = r#"<tag1 att1 = "test">
38 ///                 <tag2><!--Test comment-->Test</tag2>
39 ///                 <tag2>Test 2</tag2>
40 ///             </tag1>"#;
41 /// let mut reader = Reader::from_str(xml);
42 /// reader.trim_text(true);
43 /// let mut count = 0;
44 /// let mut txt = Vec::new();
45 /// let mut buf = Vec::new();
46 /// loop {
47 ///     match reader.read_event(&mut buf) {
48 ///         Ok(Event::Start(ref e)) => {
49 ///             match e.name() {
50 ///                 b"tag1" => println!("attributes values: {:?}",
51 ///                                     e.attributes().map(|a| a.unwrap().value)
52 ///                                     .collect::<Vec<_>>()),
53 ///                 b"tag2" => count += 1,
54 ///                 _ => (),
55 ///             }
56 ///         },
57 ///         Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()),
58 ///         Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
59 ///         Ok(Event::Eof) => break,
60 ///         _ => (),
61 ///     }
62 ///     buf.clear();
63 /// }
64 /// ```
65 #[derive(Clone)]
66 pub struct Reader<B: BufRead> {
67     /// reader
68     reader: B,
69     /// current buffer position, useful for debuging errors
70     buf_position: usize,
71     /// current state Open/Close
72     tag_state: TagState,
73     /// expand empty element into an opening and closing element
74     expand_empty_elements: bool,
75     /// trims leading whitespace in Text events, skip the element if text is empty
76     trim_text_start: bool,
77     /// trims trailing whitespace in Text events.
78     trim_text_end: bool,
79     /// trims trailing whitespaces from markup names in closing tags `</a >`
80     trim_markup_names_in_closing_tags: bool,
81     /// check if End nodes match last Start node
82     check_end_names: bool,
83     /// check if comments contains `--` (false per default)
84     check_comments: bool,
85     /// all currently Started elements which didn't have a matching
86     /// End element yet
87     opened_buffer: Vec<u8>,
88     /// opened name start indexes
89     opened_starts: Vec<usize>,
90     /// a buffer to manage namespaces
91     ns_buffer: NamespaceBufferIndex,
92     #[cfg(feature = "encoding")]
93     /// the encoding specified in the xml, defaults to utf8
94     encoding: &'static Encoding,
95     #[cfg(feature = "encoding")]
96     /// check if quick-rs could find out the encoding
97     is_encoding_set: bool,
98 }
99 
100 impl<B: BufRead> Reader<B> {
101     /// Creates a `Reader` that reads from a reader implementing `BufRead`.
from_reader(reader: B) -> Reader<B>102     pub fn from_reader(reader: B) -> Reader<B> {
103         Reader {
104             reader,
105             opened_buffer: Vec::new(),
106             opened_starts: Vec::new(),
107             tag_state: TagState::Closed,
108             expand_empty_elements: false,
109             trim_text_start: false,
110             trim_text_end: false,
111             trim_markup_names_in_closing_tags: true,
112             check_end_names: true,
113             buf_position: 0,
114             check_comments: false,
115             ns_buffer: NamespaceBufferIndex::default(),
116             #[cfg(feature = "encoding")]
117             encoding: ::encoding_rs::UTF_8,
118             #[cfg(feature = "encoding")]
119             is_encoding_set: false,
120         }
121     }
122 
123     /// Changes whether empty elements should be split into an `Open` and a `Close` event.
124     ///
125     /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `<tag/>` are
126     /// expanded into a [`Start`] event followed by a [`End`] event. When set to `false` (the
127     /// default), those tags are represented by an [`Empty`] event instead.
128     ///
129     /// (`false` by default)
130     ///
131     /// [`Empty`]: events/enum.Event.html#variant.Empty
132     /// [`Start`]: events/enum.Event.html#variant.Start
133     /// [`End`]: events/enum.Event.html#variant.End
expand_empty_elements(&mut self, val: bool) -> &mut Reader<B>134     pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader<B> {
135         self.expand_empty_elements = val;
136         self
137     }
138 
139     /// Changes whether whitespace before and after character data should be removed.
140     ///
141     /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be
142     /// pushed.
143     ///
144     /// (`false` by default)
145     ///
146     /// [`Text`]: events/enum.Event.html#variant.Text
trim_text(&mut self, val: bool) -> &mut Reader<B>147     pub fn trim_text(&mut self, val: bool) -> &mut Reader<B> {
148         self.trim_text_start = val;
149         self.trim_text_end = val;
150         self
151     }
152 
153     /// Changes whether whitespace after character data should be removed.
154     ///
155     /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
156     ///
157     /// (`false` by default)
158     ///
159     /// [`Text`]: events/enum.Event.html#variant.Text
trim_text_end(&mut self, val: bool) -> &mut Reader<B>160     pub fn trim_text_end(&mut self, val: bool) -> &mut Reader<B> {
161         self.trim_text_end = val;
162         self
163     }
164 
165     /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags
166     /// `</a >`.
167     ///
168     /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name.
169     ///
170     /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is
171     /// going to fail erronously if a closing tag contains trailing whitespaces.
172     ///
173     /// (`true` by default)
174     ///
175     /// [`End`]: events/enum.Event.html#variant.End
trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader<B>176     pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader<B> {
177         self.trim_markup_names_in_closing_tags = val;
178         self
179     }
180 
181     /// Changes whether mismatched closing tag names should be detected.
182     ///
183     /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag.
184     /// For example, `<mytag></different_tag>` will be permitted.
185     ///
186     /// If the XML is known to be sane (already processed, etc.) this saves extra time.
187     ///
188     /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will
189     /// contain the data of the mismatched end tag.
190     ///
191     /// (`true` by default)
192     ///
193     /// [`End`]: events/enum.Event.html#variant.End
check_end_names(&mut self, val: bool) -> &mut Reader<B>194     pub fn check_end_names(&mut self, val: bool) -> &mut Reader<B> {
195         self.check_end_names = val;
196         self
197     }
198 
199     /// Changes whether comments should be validated.
200     ///
201     /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which
202     /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't
203     /// really care about comment correctness, thus the default value is `false` to improve
204     /// performance.
205     ///
206     /// (`false` by default)
207     ///
208     /// [`Comment`]: events/enum.Event.html#variant.Comment
check_comments(&mut self, val: bool) -> &mut Reader<B>209     pub fn check_comments(&mut self, val: bool) -> &mut Reader<B> {
210         self.check_comments = val;
211         self
212     }
213 
214     /// Gets the current byte position in the input data.
215     ///
216     /// Useful when debugging errors.
buffer_position(&self) -> usize217     pub fn buffer_position(&self) -> usize {
218         // when internal state is Opened, we have actually read until '<',
219         // which we don't want to show
220         if let TagState::Opened = self.tag_state {
221             self.buf_position - 1
222         } else {
223             self.buf_position
224         }
225     }
226 
227     /// private function to read until '<' is found
228     /// return a `Text` event
read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>229     fn read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
230         self.tag_state = TagState::Opened;
231         let buf_start = buf.len();
232         match read_until(&mut self.reader, b'<', buf, &mut self.buf_position) {
233             Ok(0) => Ok(Event::Eof),
234             Ok(_) => {
235                 let (start, len) = (
236                     buf_start
237                         + if self.trim_text_start {
238                             match buf.iter().skip(buf_start).position(|&b| !is_whitespace(b)) {
239                                 Some(start) => start,
240                                 None => return self.read_event(buf),
241                             }
242                         } else {
243                             0
244                         },
245                     if self.trim_text_end {
246                         buf.iter()
247                             .rposition(|&b| !is_whitespace(b))
248                             .map_or_else(|| buf.len(), |p| p + 1)
249                     } else {
250                         buf.len()
251                     },
252                 );
253                 Ok(Event::Text(BytesText::from_escaped(&buf[start..len])))
254             }
255             Err(e) => Err(e),
256         }
257     }
258 
259     /// private function to read until '>' is found
read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>260     fn read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
261         self.tag_state = TagState::Closed;
262 
263         // need to read 1 character to decide whether pay special attention to attribute values
264         let buf_start = buf.len();
265         let start = loop {
266             match self.reader.fill_buf() {
267                 Ok(n) if n.is_empty() => return Ok(Event::Eof),
268                 Ok(n) => {
269                     // We intentionally don't `consume()` the byte, otherwise we would have to
270                     // handle things like '<>' here already.
271                     break n[0];
272                 }
273                 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
274                 Err(e) => return Err(Error::Io(e)),
275             }
276         };
277 
278         if start != b'/' && start != b'!' && start != b'?' {
279             match read_elem_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
280                 Ok(0) => Ok(Event::Eof),
281                 Ok(_) => {
282                     // we already *know* that we are in this case
283                     self.read_start(&buf[buf_start..])
284                 }
285                 Err(e) => Err(e),
286             }
287         } else {
288             match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
289                 Ok(0) => Ok(Event::Eof),
290                 Ok(_) => match start {
291                     b'/' => self.read_end(&buf[buf_start..]),
292                     b'!' => self.read_bang(buf_start, buf),
293                     b'?' => self.read_question_mark(&buf[buf_start..]),
294                     _ => unreachable!(
295                         "We checked that `start` must be one of [/!?], was {:?} \
296                              instead.",
297                         start
298                     ),
299                 },
300                 Err(e) => Err(e),
301             }
302         }
303     }
304 
305     /// reads `BytesElement` starting with a `/`,
306     /// if `self.check_end_names`, checks that element matches last opened element
307     /// return `End` event
read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>308     fn read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
309         // XML standard permits whitespaces after the markup name in closing tags.
310         // Let's strip them from the buffer before comparing tag names.
311         let name = if self.trim_markup_names_in_closing_tags {
312             if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) {
313                 let (name, _) = buf[1..].split_at(pos_end_name + 1);
314                 name
315             } else {
316                 &buf[1..]
317             }
318         } else {
319             &buf[1..]
320         };
321         if self.check_end_names {
322             let mismatch_err = |expected: &[u8], found: &[u8], buf_position: &mut usize| {
323                 *buf_position -= buf.len();
324                 Err(Error::EndEventMismatch {
325                     expected: from_utf8(expected).unwrap_or("").to_owned(),
326                     found: from_utf8(found).unwrap_or("").to_owned(),
327                 })
328             };
329             match self.opened_starts.pop() {
330                 Some(start) => {
331                     if name != &self.opened_buffer[start..] {
332                         let expected = &self.opened_buffer[start..];
333                         mismatch_err(expected, name, &mut self.buf_position)
334                     } else {
335                         self.opened_buffer.truncate(start);
336                         Ok(Event::End(BytesEnd::borrowed(name)))
337                     }
338                 }
339                 None => mismatch_err(b"", &buf[1..], &mut self.buf_position),
340             }
341         } else {
342             Ok(Event::End(BytesEnd::borrowed(name)))
343         }
344     }
345 
346     /// reads `BytesElement` starting with a `!`,
347     /// return `Comment`, `CData` or `DocType` event
348     ///
349     /// Note: depending on the start of the Event, we may need to read more
350     /// data, thus we need a mutable buffer
read_bang<'a, 'b>( &'a mut self, buf_start: usize, buf: &'b mut Vec<u8>, ) -> Result<Event<'b>>351     fn read_bang<'a, 'b>(
352         &'a mut self,
353         buf_start: usize,
354         buf: &'b mut Vec<u8>,
355     ) -> Result<Event<'b>> {
356         if buf[buf_start..].starts_with(b"!--") {
357             while buf.len() < buf_start + 5 || !buf.ends_with(b"--") {
358                 buf.push(b'>');
359                 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
360                     Ok(0) => {
361                         self.buf_position -= buf.len() - buf_start;
362                         return Err(Error::UnexpectedEof("Comment".to_string()));
363                     }
364                     Ok(_) => (),
365                     Err(e) => return Err(e),
366                 }
367             }
368             let len = buf.len();
369             if self.check_comments {
370                 // search if '--' not in comments
371                 if let Some(p) = memchr::memchr_iter(b'-', &buf[buf_start + 3..len - 2])
372                     .position(|p| buf[buf_start + 3 + p + 1] == b'-')
373                 {
374                     self.buf_position -= buf.len() - buf_start + p;
375                     return Err(Error::UnexpectedToken("--".to_string()));
376                 }
377             }
378             Ok(Event::Comment(BytesText::from_escaped(
379                 &buf[buf_start + 3..len - 2],
380             )))
381         } else if buf.len() >= buf_start + 8 {
382             match &buf[buf_start + 1..buf_start + 8] {
383                 b"[CDATA[" => {
384                     while buf.len() < 10 || !buf.ends_with(b"]]") {
385                         buf.push(b'>');
386                         match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
387                             Ok(0) => {
388                                 self.buf_position -= buf.len() - buf_start;
389                                 return Err(Error::UnexpectedEof("CData".to_string()));
390                             }
391                             Ok(_) => (),
392                             Err(e) => return Err(e),
393                         }
394                     }
395                     Ok(Event::CData(BytesText::from_plain(
396                         &buf[buf_start + 8..buf.len() - 2],
397                     )))
398                 }
399                 x if x.eq_ignore_ascii_case(b"DOCTYPE") => {
400                     let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count();
401                     while count > 0 {
402                         buf.push(b'>');
403                         match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
404                             Ok(0) => {
405                                 self.buf_position -= buf.len() - buf_start;
406                                 return Err(Error::UnexpectedEof("DOCTYPE".to_string()));
407                             }
408                             Ok(n) => {
409                                 let start = buf.len() - n;
410                                 count += buf.iter().skip(start).filter(|&&b| b == b'<').count();
411                                 count -= 1;
412                             }
413                             Err(e) => return Err(e),
414                         }
415                     }
416                     Ok(Event::DocType(BytesText::from_escaped(
417                         &buf[buf_start + 8..buf.len()],
418                     )))
419                 }
420                 _ => Err(Error::UnexpectedBang),
421             }
422         } else {
423             self.buf_position -= buf.len() - buf_start;
424             Err(Error::UnexpectedBang)
425         }
426     }
427 
428     /// reads `BytesElement` starting with a `?`,
429     /// return `Decl` or `PI` event
430     #[cfg(feature = "encoding")]
read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>431     fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
432         let len = buf.len();
433         if len > 2 && buf[len - 1] == b'?' {
434             if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
435                 let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3));
436                 // Try getting encoding from the declaration event
437                 if let Some(enc) = event.encoder() {
438                     self.encoding = enc;
439                     self.is_encoding_set = true;
440                 }
441                 Ok(Event::Decl(event))
442             } else {
443                 Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1])))
444             }
445         } else {
446             self.buf_position -= len;
447             Err(Error::UnexpectedEof("XmlDecl".to_string()))
448         }
449     }
450 
451     /// reads `BytesElement` starting with a `?`,
452     /// return `Decl` or `PI` event
453     #[cfg(not(feature = "encoding"))]
read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>454     fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
455         let len = buf.len();
456         if len > 2 && buf[len - 1] == b'?' {
457             if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
458                 let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3));
459                 Ok(Event::Decl(event))
460             } else {
461                 Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1])))
462             }
463         } else {
464             self.buf_position -= len;
465             Err(Error::UnexpectedEof("XmlDecl".to_string()))
466         }
467     }
468 
469     #[inline]
close_expanded_empty(&mut self) -> Result<Event<'static>>470     fn close_expanded_empty(&mut self) -> Result<Event<'static>> {
471         self.tag_state = TagState::Closed;
472         let name = self
473             .opened_buffer
474             .split_off(self.opened_starts.pop().unwrap());
475         Ok(Event::End(BytesEnd::owned(name)))
476     }
477 
478     /// reads `BytesElement` starting with any character except `/`, `!` or ``?`
479     /// return `Start` or `Empty` event
read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>480     fn read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
481         // TODO: do this directly when reading bufreader ...
482         let len = buf.len();
483         let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len);
484         if let Some(&b'/') = buf.last() {
485             let end = if name_end < len { name_end } else { len - 1 };
486             if self.expand_empty_elements {
487                 self.tag_state = TagState::Empty;
488                 self.opened_starts.push(self.opened_buffer.len());
489                 self.opened_buffer.extend(&buf[..end]);
490                 Ok(Event::Start(BytesStart::borrowed(&buf[..len - 1], end)))
491             } else {
492                 Ok(Event::Empty(BytesStart::borrowed(&buf[..len - 1], end)))
493             }
494         } else {
495             if self.check_end_names {
496                 self.opened_starts.push(self.opened_buffer.len());
497                 self.opened_buffer.extend(&buf[..name_end]);
498             }
499             Ok(Event::Start(BytesStart::borrowed(buf, name_end)))
500         }
501     }
502 
503     /// Reads the next `Event`.
504     ///
505     /// This is the main entry point for reading XML `Event`s.
506     ///
507     /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow`
508     /// internally).
509     ///
510     /// Having the possibility to control the internal buffers gives you some additional benefits
511     /// such as:
512     ///
513     /// - Reduce the number of allocations by reusing the same buffer. For constrained systems,
514     ///   you can call `buf.clear()` once you are done with processing the event (typically at the
515     ///   end of your loop).
516     /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`).
517     ///
518     /// # Examples
519     ///
520     /// ```
521     /// use quick_xml::Reader;
522     /// use quick_xml::events::Event;
523     ///
524     /// let xml = r#"<tag1 att1 = "test">
525     ///                 <tag2><!--Test comment-->Test</tag2>
526     ///                 <tag2>Test 2</tag2>
527     ///             </tag1>"#;
528     /// let mut reader = Reader::from_str(xml);
529     /// reader.trim_text(true);
530     /// let mut count = 0;
531     /// let mut buf = Vec::new();
532     /// let mut txt = Vec::new();
533     /// loop {
534     ///     match reader.read_event(&mut buf) {
535     ///         Ok(Event::Start(ref e)) => count += 1,
536     ///         Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")),
537     ///         Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
538     ///         Ok(Event::Eof) => break,
539     ///         _ => (),
540     ///     }
541     ///     buf.clear();
542     /// }
543     /// println!("Found {} start events", count);
544     /// println!("Text events: {:?}", txt);
545     /// ```
read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>546     pub fn read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
547         let event = match self.tag_state {
548             TagState::Opened => self.read_until_close(buf),
549             TagState::Closed => self.read_until_open(buf),
550             TagState::Empty => self.close_expanded_empty(),
551             TagState::Exit => return Ok(Event::Eof),
552         };
553         match event {
554             Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit,
555             _ => {}
556         }
557         event
558     }
559 
560     /// Resolves a potentially qualified **event name** into (namespace name, local name).
561     ///
562     /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
563     /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
564     /// can be defined on the same element as the attribute in question.
565     ///
566     /// *Unqualified* event inherits the current *default namespace*.
567     #[inline]
event_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], namespace_buffer: &'c [u8], ) -> (Option<&'c [u8]>, &'b [u8])568     pub fn event_namespace<'a, 'b, 'c>(
569         &'a self,
570         qname: &'b [u8],
571         namespace_buffer: &'c [u8],
572     ) -> (Option<&'c [u8]>, &'b [u8]) {
573         self.ns_buffer
574             .resolve_namespace(qname, namespace_buffer, true)
575     }
576 
577     /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
578     ///
579     /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
580     /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
581     /// can be defined on the same element as the attribute in question.
582     ///
583     /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
584     #[inline]
attribute_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], namespace_buffer: &'c [u8], ) -> (Option<&'c [u8]>, &'b [u8])585     pub fn attribute_namespace<'a, 'b, 'c>(
586         &'a self,
587         qname: &'b [u8],
588         namespace_buffer: &'c [u8],
589     ) -> (Option<&'c [u8]>, &'b [u8]) {
590         self.ns_buffer
591             .resolve_namespace(qname, namespace_buffer, false)
592     }
593 
594     /// Reads the next event and resolves its namespace (if applicable).
595     ///
596     /// # Examples
597     ///
598     /// ```
599     /// use std::str::from_utf8;
600     /// use quick_xml::Reader;
601     /// use quick_xml::events::Event;
602     ///
603     /// let xml = r#"<x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
604     ///                 <y:tag2><!--Test comment-->Test</y:tag2>
605     ///                 <y:tag2>Test 2</y:tag2>
606     ///             </x:tag1>"#;
607     /// let mut reader = Reader::from_str(xml);
608     /// reader.trim_text(true);
609     /// let mut count = 0;
610     /// let mut buf = Vec::new();
611     /// let mut ns_buf = Vec::new();
612     /// let mut txt = Vec::new();
613     /// loop {
614     ///     match reader.read_namespaced_event(&mut buf, &mut ns_buf) {
615     ///         Ok((ref ns, Event::Start(ref e))) => {
616     ///             count += 1;
617     ///             match (*ns, e.local_name()) {
618     ///                 (Some(b"www.xxxx"), b"tag1") => (),
619     ///                 (Some(b"www.yyyy"), b"tag2") => (),
620     ///                 (ns, n) => panic!("Namespace and local name mismatch"),
621     ///             }
622     ///             println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok()));
623     ///         }
624     ///         Ok((_, Event::Text(e))) => {
625     ///             txt.push(e.unescape_and_decode(&reader).expect("Error!"))
626     ///         },
627     ///         Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
628     ///         Ok((_, Event::Eof)) => break,
629     ///         _ => (),
630     ///     }
631     ///     buf.clear();
632     /// }
633     /// println!("Found {} start events", count);
634     /// println!("Text events: {:?}", txt);
635     /// ```
read_namespaced_event<'a, 'b, 'c>( &'a mut self, buf: &'b mut Vec<u8>, namespace_buffer: &'c mut Vec<u8>, ) -> Result<(Option<&'c [u8]>, Event<'b>)>636     pub fn read_namespaced_event<'a, 'b, 'c>(
637         &'a mut self,
638         buf: &'b mut Vec<u8>,
639         namespace_buffer: &'c mut Vec<u8>,
640     ) -> Result<(Option<&'c [u8]>, Event<'b>)> {
641         self.ns_buffer.pop_empty_namespaces(namespace_buffer);
642         match self.read_event(buf) {
643             Ok(Event::Eof) => Ok((None, Event::Eof)),
644             Ok(Event::Start(e)) => {
645                 self.ns_buffer.push_new_namespaces(&e, namespace_buffer);
646                 Ok((
647                     self.ns_buffer
648                         .find_namespace_value(e.name(), &**namespace_buffer),
649                     Event::Start(e),
650                 ))
651             }
652             Ok(Event::Empty(e)) => {
653                 // For empty elements we need to 'artificially' keep the namespace scope on the
654                 // stack until the next `next()` call occurs.
655                 // Otherwise the caller has no chance to use `resolve` in the context of the
656                 // namespace declarations that are 'in scope' for the empty element alone.
657                 // Ex: <img rdf:nodeID="abc" xmlns:rdf="urn:the-rdf-uri" />
658                 self.ns_buffer.push_new_namespaces(&e, namespace_buffer);
659                 // notify next `read_namespaced_event()` invocation that it needs to pop this
660                 // namespace scope
661                 self.ns_buffer.pending_pop = true;
662                 Ok((
663                     self.ns_buffer
664                         .find_namespace_value(e.name(), &**namespace_buffer),
665                     Event::Empty(e),
666                 ))
667             }
668             Ok(Event::End(e)) => {
669                 // notify next `read_namespaced_event()` invocation that it needs to pop this
670                 // namespace scope
671                 self.ns_buffer.pending_pop = true;
672                 Ok((
673                     self.ns_buffer
674                         .find_namespace_value(e.name(), &**namespace_buffer),
675                     Event::End(e),
676                 ))
677             }
678             Ok(e) => Ok((None, e)),
679             Err(e) => Err(e),
680         }
681     }
682 
683     /// Returns the `Reader`s encoding.
684     ///
685     /// The used encoding may change after parsing the XML declaration.
686     ///
687     /// This encoding will be used by [`decode`].
688     ///
689     /// [`decode`]: #method.decode
690     #[cfg(feature = "encoding")]
encoding(&self) -> &'static Encoding691     pub fn encoding(&self) -> &'static Encoding {
692         self.encoding
693     }
694 
695     /// Decodes a slice using the encoding specified in the XML declaration.
696     ///
697     /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
698     /// `U+FFFD REPLACEMENT CHARACTER`.
699     ///
700     /// If no encoding is specified, defaults to UTF-8.
701     #[inline]
702     #[cfg(feature = "encoding")]
decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str>703     pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> {
704         self.encoding.decode(bytes).0
705     }
706 
707     /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration.
708     ///
709     /// Decode `bytes` without BOM and with malformed sequences replaced with the
710     /// `U+FFFD REPLACEMENT CHARACTER`.
711     ///
712     /// # Note
713     ///
714     /// If you instead want to use XML declared encoding, use the `encoding` feature
715     #[inline]
716     #[cfg(not(feature = "encoding"))]
decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>717     pub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
718         if bytes.starts_with(b"\xEF\xBB\xBF") {
719             from_utf8(&bytes[3..]).map_err(Error::Utf8)
720         } else {
721             from_utf8(bytes).map_err(Error::Utf8)
722         }
723     }
724 
725     /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration.
726     ///
727     /// Decode `bytes` without BOM and with malformed sequences replaced with the
728     /// `U+FFFD REPLACEMENT CHARACTER`.
729     ///
730     /// If no encoding is specified, defaults to UTF-8.
731     #[inline]
732     #[cfg(feature = "encoding")]
decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str>733     pub fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> {
734         if self.is_encoding_set {
735             return self.encoding.decode_with_bom_removal(bytes).0;
736         }
737         if bytes.starts_with(b"\xEF\xBB\xBF") {
738             self.is_encoding_set = true;
739             bytes = &bytes[3..];
740         } else if bytes.starts_with(b"\xFF\xFE") {
741             self.is_encoding_set = true;
742             self.encoding = UTF_16LE;
743             bytes = &bytes[2..];
744         } else if bytes.starts_with(b"\xFE\xFF") {
745             self.is_encoding_set = true;
746             self.encoding = UTF_16BE;
747             bytes = &bytes[3..];
748         };
749         self.encoding.decode_without_bom_handling(bytes).0
750     }
751 
752     /// Decodes a UTF8 slice regardless of XML declaration.
753     ///
754     /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
755     /// `U+FFFD REPLACEMENT CHARACTER`.
756     ///
757     /// # Note
758     ///
759     /// If you instead want to use XML declared encoding, use the `encoding` feature
760     #[inline]
761     #[cfg(not(feature = "encoding"))]
decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>762     pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
763         from_utf8(bytes).map_err(Error::Utf8)
764     }
765 
766     /// Get utf8 decoder
767     #[cfg(feature = "encoding")]
decoder(&self) -> Decoder768     pub fn decoder(&self) -> Decoder {
769         Decoder {
770             encoding: self.encoding,
771         }
772     }
773 
774     /// Get utf8 decoder
775     #[cfg(not(feature = "encoding"))]
decoder(&self) -> Decoder776     pub fn decoder(&self) -> Decoder {
777         Decoder
778     }
779 
780     /// Reads until end element is found
781     ///
782     /// Manages nested cases where parent and child elements have the same name
read_to_end<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<()>783     pub fn read_to_end<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<()> {
784         let mut depth = 0;
785         let end = end.as_ref();
786         loop {
787             match self.read_event(buf) {
788                 Ok(Event::End(ref e)) if e.name() == end => {
789                     if depth == 0 {
790                         return Ok(());
791                     }
792                     depth -= 1;
793                 }
794                 Ok(Event::Start(ref e)) if e.name() == end => depth += 1,
795                 Err(e) => return Err(e),
796                 Ok(Event::Eof) => {
797                     return Err(Error::UnexpectedEof(format!("</{:?}>", from_utf8(end))));
798                 }
799                 _ => (),
800             }
801             buf.clear();
802         }
803     }
804 
805     /// Reads optional text between start and end tags.
806     ///
807     /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a
808     /// `String`. If the next event is an [`End`] event, returns the empty string. In all other
809     /// cases, returns an error.
810     ///
811     /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8
812     /// if none is specified).
813     ///
814     /// # Examples
815     ///
816     /// ```
817     /// use quick_xml::Reader;
818     /// use quick_xml::events::Event;
819     ///
820     /// let mut xml = Reader::from_reader(b"
821     ///     <a>&lt;b&gt;</a>
822     ///     <a></a>
823     /// " as &[u8]);
824     /// xml.trim_text(true);
825     ///
826     /// let expected = ["<b>", ""];
827     /// for &content in expected.iter() {
828     ///     match xml.read_event(&mut Vec::new()) {
829     ///         Ok(Event::Start(ref e)) => {
830     ///             assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).unwrap(), content);
831     ///         },
832     ///         e => panic!("Expecting Start event, found {:?}", e),
833     ///     }
834     /// }
835     /// ```
836     ///
837     /// [`Text`]: events/enum.Event.html#variant.Text
838     /// [`End`]: events/enum.Event.html#variant.End
read_text<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<String>839     pub fn read_text<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<String> {
840         let s = match self.read_event(buf) {
841             Ok(Event::Text(e)) => e.unescape_and_decode(self),
842             Ok(Event::End(ref e)) if e.name() == end.as_ref() => return Ok("".to_string()),
843             Err(e) => return Err(e),
844             Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())),
845             _ => return Err(Error::TextNotFound),
846         };
847         self.read_to_end(end, buf)?;
848         s
849     }
850 
851     /// Consumes `Reader` returning the underlying reader
852     ///
853     /// Can be used to compute line and column of a parsing error position
854     ///
855     /// # Examples
856     ///
857     /// ```
858     /// use std::{str, io::Cursor};
859     /// use quick_xml::Reader;
860     /// use quick_xml::events::Event;
861     ///
862     /// let xml = r#"<tag1 att1 = "test">
863     ///                 <tag2><!--Test comment-->Test</tag2>
864     ///                 <tag3>Test 2</tag3>
865     ///             </tag1>"#;
866     /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
867     /// let mut buf = Vec::new();
868     ///
869     /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
870     ///     let end_pos = reader.buffer_position();
871     ///     let mut cursor = reader.into_underlying_reader();
872     ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
873     ///         .expect("can't make a string");
874     ///     let mut line = 1;
875     ///     let mut column = 0;
876     ///     for c in s.chars() {
877     ///         if c == '\n' {
878     ///             line += 1;
879     ///             column = 0;
880     ///         } else {
881     ///             column += 1;
882     ///         }
883     ///     }
884     ///     (line, column)
885     /// }
886     ///
887     /// loop {
888     ///     match reader.read_event(&mut buf) {
889     ///         Ok(Event::Start(ref e)) => match e.name() {
890     ///             b"tag1" | b"tag2" => (),
891     ///             tag => {
892     ///                 assert_eq!(b"tag3", tag);
893     ///                 assert_eq!((3, 22), into_line_and_column(reader));
894     ///                 break;
895     ///             }
896     ///         },
897     ///         Ok(Event::Eof) => unreachable!(),
898     ///         _ => (),
899     ///     }
900     ///     buf.clear();
901     /// }
902     /// ```
into_underlying_reader(self) -> B903     pub fn into_underlying_reader(self) -> B {
904         self.reader
905     }
906 }
907 
908 impl Reader<BufReader<File>> {
909     /// Creates an XML reader from a file path.
from_file<P: AsRef<Path>>(path: P) -> Result<Reader<BufReader<File>>>910     pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Reader<BufReader<File>>> {
911         let file = File::open(path).map_err(Error::Io)?;
912         let reader = BufReader::new(file);
913         Ok(Reader::from_reader(reader))
914     }
915 }
916 
917 impl<'a> Reader<&'a [u8]> {
918     /// Creates an XML reader from a string slice.
from_str(s: &'a str) -> Reader<&'a [u8]>919     pub fn from_str(s: &'a str) -> Reader<&'a [u8]> {
920         Reader::from_reader(s.as_bytes())
921     }
922 }
923 
924 /// read until `byte` is found or end of file
925 /// return the position of byte
926 #[inline]
read_until<R: BufRead>( r: &mut R, byte: u8, buf: &mut Vec<u8>, position: &mut usize, ) -> Result<usize>927 fn read_until<R: BufRead>(
928     r: &mut R,
929     byte: u8,
930     buf: &mut Vec<u8>,
931     position: &mut usize,
932 ) -> Result<usize> {
933     let mut read = 0;
934     let mut done = false;
935     while !done {
936         let used = {
937             let available = match r.fill_buf() {
938                 Ok(n) if n.is_empty() => break,
939                 Ok(n) => n,
940                 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
941                 Err(e) => {
942                     *position += read;
943                     return Err(Error::Io(e));
944                 }
945             };
946 
947             match memchr::memchr(byte, available) {
948                 Some(i) => {
949                     buf.extend_from_slice(&available[..i]);
950                     done = true;
951                     i + 1
952                 }
953                 None => {
954                     buf.extend_from_slice(available);
955                     available.len()
956                 }
957             }
958         };
959         r.consume(used);
960         read += used;
961     }
962     *position += read;
963     Ok(read)
964 }
965 
966 /// Derived from `read_until`, but modified to handle XML attributes using a minimal state machine.
967 /// [W3C Extensible Markup Language (XML) 1.1 (2006)](https://www.w3.org/TR/xml11)
968 ///
969 /// Attribute values are defined as follows:
970 /// ```plain
971 /// AttValue := '"' (([^<&"]) | Reference)* '"'
972 ///           | "'" (([^<&']) | Reference)* "'"
973 /// ```
974 /// (`Reference` is something like `&quot;`, but we don't care about escaped characters at this
975 /// level)
976 #[inline]
read_elem_until<R: BufRead>( r: &mut R, end_byte: u8, buf: &mut Vec<u8>, position: &mut usize, ) -> Result<usize>977 fn read_elem_until<R: BufRead>(
978     r: &mut R,
979     end_byte: u8,
980     buf: &mut Vec<u8>,
981     position: &mut usize,
982 ) -> Result<usize> {
983     #[derive(Clone, Copy)]
984     enum State {
985         /// The initial state (inside element, but outside of attribute value)
986         Elem,
987         /// Inside a single-quoted attribute value
988         SingleQ,
989         /// Inside a double-quoted attribute value
990         DoubleQ,
991     }
992     let mut state = State::Elem;
993     let mut read = 0;
994     let mut done = false;
995     while !done {
996         let used = {
997             let available = match r.fill_buf() {
998                 Ok(n) if n.is_empty() => return Ok(read),
999                 Ok(n) => n,
1000                 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1001                 Err(e) => {
1002                     *position += read;
1003                     return Err(Error::Io(e));
1004                 }
1005             };
1006 
1007             let mut memiter = memchr::memchr3_iter(end_byte, b'\'', b'"', available);
1008             let used: usize;
1009             loop {
1010                 match memiter.next() {
1011                     Some(i) => {
1012                         state = match (state, available[i]) {
1013                             (State::Elem, b) if b == end_byte => {
1014                                 // only allowed to match `end_byte` while we are in state `Elem`
1015                                 buf.extend_from_slice(&available[..i]);
1016                                 done = true;
1017                                 used = i + 1;
1018                                 break;
1019                             }
1020                             (State::Elem, b'\'') => State::SingleQ,
1021                             (State::Elem, b'\"') => State::DoubleQ,
1022 
1023                             // the only end_byte that gets us out if the same character
1024                             (State::SingleQ, b'\'') | (State::DoubleQ, b'\"') => State::Elem,
1025 
1026                             // all other bytes: no state change
1027                             _ => state,
1028                         };
1029                     }
1030                     None => {
1031                         buf.extend_from_slice(available);
1032                         used = available.len();
1033                         break;
1034                     }
1035                 }
1036             }
1037             used
1038         };
1039         r.consume(used);
1040         read += used;
1041     }
1042     *position += read;
1043     Ok(read)
1044 }
1045 
1046 /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab)
1047 #[inline]
is_whitespace(b: u8) -> bool1048 pub(crate) fn is_whitespace(b: u8) -> bool {
1049     match b {
1050         b' ' | b'\r' | b'\n' | b'\t' => true,
1051         _ => false,
1052     }
1053 }
1054 
1055 /// A namespace declaration. Can either bind a namespace to a prefix or define the current default
1056 /// namespace.
1057 #[derive(Debug, Clone)]
1058 struct Namespace {
1059     /// Index of the namespace in the buffer
1060     start: usize,
1061     /// Length of the prefix
1062     /// * if bigger than start, then binds this namespace to the corresponding slice.
1063     /// * else defines the current default namespace.
1064     prefix_len: usize,
1065     /// The namespace name (the URI) of this namespace declaration.
1066     ///
1067     /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration
1068     /// for the extent of its scope. For prefix declarations that's not very interesting, but it is
1069     /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default
1070     /// behaviour of leaving unqualified element names unqualified.
1071     value_len: usize,
1072     /// Level of nesting at which this namespace was declared. The declaring element is included,
1073     /// i.e., a declaration on the document root has `level = 1`.
1074     /// This is used to pop the namespace when the element gets closed.
1075     level: i32,
1076 }
1077 
1078 impl Namespace {
1079     /// Gets the value slice out of namespace buffer
1080     ///
1081     /// Returns `None` if `value_len == 0`
1082     #[inline]
opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]>1083     fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> {
1084         if self.value_len == 0 {
1085             None
1086         } else {
1087             let start = self.start + self.prefix_len;
1088             Some(&ns_buffer[start..start + self.value_len])
1089         }
1090     }
1091 
1092     /// Check if the namespace matches the potentially qualified name
1093     #[inline]
is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool1094     fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool {
1095         if self.prefix_len == 0 {
1096             !qname.contains(&b':')
1097         } else {
1098             qname.get(self.prefix_len).map_or(false, |n| *n == b':')
1099                 && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len])
1100         }
1101     }
1102 }
1103 
1104 /// A namespace management buffer.
1105 ///
1106 /// Holds all internal logic to push/pop namespaces with their levels.
1107 #[derive(Debug, Default, Clone)]
1108 struct NamespaceBufferIndex {
1109     /// a buffer of namespace ranges
1110     slices: Vec<Namespace>,
1111     /// The number of open tags at the moment. We need to keep track of this to know which namespace
1112     /// declarations to remove when we encounter an `End` event.
1113     nesting_level: i32,
1114     /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the
1115     /// consumer has a chance to use `resolve` in the context of the empty element. We perform the
1116     /// pop as the first operation in the next `next()` call.
1117     pending_pop: bool,
1118 }
1119 
1120 impl NamespaceBufferIndex {
1121     #[inline]
find_namespace_value<'a, 'b, 'c>( &'a self, element_name: &'b [u8], buffer: &'c [u8], ) -> Option<&'c [u8]>1122     fn find_namespace_value<'a, 'b, 'c>(
1123         &'a self,
1124         element_name: &'b [u8],
1125         buffer: &'c [u8],
1126     ) -> Option<&'c [u8]> {
1127         self.slices
1128             .iter()
1129             .rfind(|n| n.is_match(buffer, element_name))
1130             .and_then(|n| n.opt_value(buffer))
1131     }
1132 
pop_empty_namespaces(&mut self, buffer: &mut Vec<u8>)1133     fn pop_empty_namespaces(&mut self, buffer: &mut Vec<u8>) {
1134         if !self.pending_pop {
1135             return;
1136         }
1137         self.pending_pop = false;
1138         self.nesting_level -= 1;
1139         let current_level = self.nesting_level;
1140         // from the back (most deeply nested scope), look for the first scope that is still valid
1141         match self.slices.iter().rposition(|n| n.level <= current_level) {
1142             // none of the namespaces are valid, remove all of them
1143             None => {
1144                 buffer.clear();
1145                 self.slices.clear();
1146             }
1147             // drop all namespaces past the last valid namespace
1148             Some(last_valid_pos) => {
1149                 if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) {
1150                     buffer.truncate(len);
1151                     self.slices.truncate(last_valid_pos + 1);
1152                 }
1153             }
1154         }
1155     }
1156 
push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec<u8>)1157     fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec<u8>) {
1158         self.nesting_level += 1;
1159         let level = self.nesting_level;
1160         // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns'
1161         // (default namespace) attribute.
1162         for a in e.attributes().with_checks(false) {
1163             if let Ok(Attribute { key: k, value: v }) = a {
1164                 if k.starts_with(b"xmlns") {
1165                     match k.get(5) {
1166                         None => {
1167                             let start = buffer.len();
1168                             buffer.extend_from_slice(&*v);
1169                             self.slices.push(Namespace {
1170                                 start,
1171                                 prefix_len: 0,
1172                                 value_len: v.len(),
1173                                 level,
1174                             });
1175                         }
1176                         Some(&b':') => {
1177                             let start = buffer.len();
1178                             buffer.extend_from_slice(&k[6..]);
1179                             buffer.extend_from_slice(&*v);
1180                             self.slices.push(Namespace {
1181                                 start,
1182                                 prefix_len: k.len() - 6,
1183                                 value_len: v.len(),
1184                                 level,
1185                             });
1186                         }
1187                         _ => break,
1188                     }
1189                 }
1190             } else {
1191                 break;
1192             }
1193         }
1194     }
1195 
1196     /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
1197     ///
1198     /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
1199     /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
1200     /// can be defined on the same element as the attribute in question.
1201     ///
1202     /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
1203     #[inline]
resolve_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], buffer: &'c [u8], use_default: bool, ) -> (Option<&'c [u8]>, &'b [u8])1204     fn resolve_namespace<'a, 'b, 'c>(
1205         &'a self,
1206         qname: &'b [u8],
1207         buffer: &'c [u8],
1208         use_default: bool,
1209     ) -> (Option<&'c [u8]>, &'b [u8]) {
1210         self.slices
1211             .iter()
1212             .rfind(|n| n.is_match(buffer, qname))
1213             .map_or((None, qname), |n| {
1214                 let len = n.prefix_len;
1215                 if len > 0 {
1216                     (n.opt_value(buffer), &qname[len + 1..])
1217                 } else if use_default {
1218                     (n.opt_value(buffer), qname)
1219                 } else {
1220                     (None, qname)
1221                 }
1222             })
1223     }
1224 }
1225 
1226 /// Utf8 Decoder
1227 #[cfg(not(feature = "encoding"))]
1228 #[derive(Clone, Copy)]
1229 pub struct Decoder;
1230 
1231 /// Utf8 Decoder
1232 #[cfg(feature = "encoding")]
1233 #[derive(Clone, Copy)]
1234 pub struct Decoder {
1235     encoding: &'static Encoding,
1236 }
1237 
1238 impl Decoder {
1239     #[cfg(not(feature = "encoding"))]
decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>1240     pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
1241         from_utf8(bytes).map_err(Error::Utf8)
1242     }
1243 
1244     #[cfg(feature = "encoding")]
decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str>1245     pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> {
1246         self.encoding.decode(bytes).0
1247     }
1248 }
1249