1 //! A module to handle `Reader`
2
3 #[cfg(feature = "encoding")]
4 use std::borrow::Cow;
5 use std::fs::File;
6 use std::io::{self, BufRead, BufReader};
7 use std::path::Path;
8 use std::str::from_utf8;
9
10 #[cfg(feature = "encoding")]
11 use encoding_rs::{Encoding, UTF_16BE, UTF_16LE};
12
13 use errors::{Error, Result};
14 use events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
15
16 use memchr;
17
18 #[derive(Clone)]
19 enum TagState {
20 Opened,
21 Closed,
22 Empty,
23 /// Either Eof or Errored
24 Exit,
25 }
26
27 /// A low level encoding-agnostic XML event reader.
28 ///
29 /// Consumes a `BufRead` and streams XML `Event`s.
30 ///
31 /// # Examples
32 ///
33 /// ```
34 /// use quick_xml::Reader;
35 /// use quick_xml::events::Event;
36 ///
37 /// let xml = r#"<tag1 att1 = "test">
38 /// <tag2><!--Test comment-->Test</tag2>
39 /// <tag2>Test 2</tag2>
40 /// </tag1>"#;
41 /// let mut reader = Reader::from_str(xml);
42 /// reader.trim_text(true);
43 /// let mut count = 0;
44 /// let mut txt = Vec::new();
45 /// let mut buf = Vec::new();
46 /// loop {
47 /// match reader.read_event(&mut buf) {
48 /// Ok(Event::Start(ref e)) => {
49 /// match e.name() {
50 /// b"tag1" => println!("attributes values: {:?}",
51 /// e.attributes().map(|a| a.unwrap().value)
52 /// .collect::<Vec<_>>()),
53 /// b"tag2" => count += 1,
54 /// _ => (),
55 /// }
56 /// },
57 /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()),
58 /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
59 /// Ok(Event::Eof) => break,
60 /// _ => (),
61 /// }
62 /// buf.clear();
63 /// }
64 /// ```
65 #[derive(Clone)]
66 pub struct Reader<B: BufRead> {
67 /// reader
68 reader: B,
69 /// current buffer position, useful for debuging errors
70 buf_position: usize,
71 /// current state Open/Close
72 tag_state: TagState,
73 /// expand empty element into an opening and closing element
74 expand_empty_elements: bool,
75 /// trims leading whitespace in Text events, skip the element if text is empty
76 trim_text_start: bool,
77 /// trims trailing whitespace in Text events.
78 trim_text_end: bool,
79 /// trims trailing whitespaces from markup names in closing tags `</a >`
80 trim_markup_names_in_closing_tags: bool,
81 /// check if End nodes match last Start node
82 check_end_names: bool,
83 /// check if comments contains `--` (false per default)
84 check_comments: bool,
85 /// all currently Started elements which didn't have a matching
86 /// End element yet
87 opened_buffer: Vec<u8>,
88 /// opened name start indexes
89 opened_starts: Vec<usize>,
90 /// a buffer to manage namespaces
91 ns_buffer: NamespaceBufferIndex,
92 #[cfg(feature = "encoding")]
93 /// the encoding specified in the xml, defaults to utf8
94 encoding: &'static Encoding,
95 #[cfg(feature = "encoding")]
96 /// check if quick-rs could find out the encoding
97 is_encoding_set: bool,
98 }
99
100 impl<B: BufRead> Reader<B> {
101 /// Creates a `Reader` that reads from a reader implementing `BufRead`.
from_reader(reader: B) -> Reader<B>102 pub fn from_reader(reader: B) -> Reader<B> {
103 Reader {
104 reader,
105 opened_buffer: Vec::new(),
106 opened_starts: Vec::new(),
107 tag_state: TagState::Closed,
108 expand_empty_elements: false,
109 trim_text_start: false,
110 trim_text_end: false,
111 trim_markup_names_in_closing_tags: true,
112 check_end_names: true,
113 buf_position: 0,
114 check_comments: false,
115 ns_buffer: NamespaceBufferIndex::default(),
116 #[cfg(feature = "encoding")]
117 encoding: ::encoding_rs::UTF_8,
118 #[cfg(feature = "encoding")]
119 is_encoding_set: false,
120 }
121 }
122
123 /// Changes whether empty elements should be split into an `Open` and a `Close` event.
124 ///
125 /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `<tag/>` are
126 /// expanded into a [`Start`] event followed by a [`End`] event. When set to `false` (the
127 /// default), those tags are represented by an [`Empty`] event instead.
128 ///
129 /// (`false` by default)
130 ///
131 /// [`Empty`]: events/enum.Event.html#variant.Empty
132 /// [`Start`]: events/enum.Event.html#variant.Start
133 /// [`End`]: events/enum.Event.html#variant.End
expand_empty_elements(&mut self, val: bool) -> &mut Reader<B>134 pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader<B> {
135 self.expand_empty_elements = val;
136 self
137 }
138
139 /// Changes whether whitespace before and after character data should be removed.
140 ///
141 /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be
142 /// pushed.
143 ///
144 /// (`false` by default)
145 ///
146 /// [`Text`]: events/enum.Event.html#variant.Text
trim_text(&mut self, val: bool) -> &mut Reader<B>147 pub fn trim_text(&mut self, val: bool) -> &mut Reader<B> {
148 self.trim_text_start = val;
149 self.trim_text_end = val;
150 self
151 }
152
153 /// Changes whether whitespace after character data should be removed.
154 ///
155 /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
156 ///
157 /// (`false` by default)
158 ///
159 /// [`Text`]: events/enum.Event.html#variant.Text
trim_text_end(&mut self, val: bool) -> &mut Reader<B>160 pub fn trim_text_end(&mut self, val: bool) -> &mut Reader<B> {
161 self.trim_text_end = val;
162 self
163 }
164
165 /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags
166 /// `</a >`.
167 ///
168 /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name.
169 ///
170 /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is
171 /// going to fail erronously if a closing tag contains trailing whitespaces.
172 ///
173 /// (`true` by default)
174 ///
175 /// [`End`]: events/enum.Event.html#variant.End
trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader<B>176 pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader<B> {
177 self.trim_markup_names_in_closing_tags = val;
178 self
179 }
180
181 /// Changes whether mismatched closing tag names should be detected.
182 ///
183 /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag.
184 /// For example, `<mytag></different_tag>` will be permitted.
185 ///
186 /// If the XML is known to be sane (already processed, etc.) this saves extra time.
187 ///
188 /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will
189 /// contain the data of the mismatched end tag.
190 ///
191 /// (`true` by default)
192 ///
193 /// [`End`]: events/enum.Event.html#variant.End
check_end_names(&mut self, val: bool) -> &mut Reader<B>194 pub fn check_end_names(&mut self, val: bool) -> &mut Reader<B> {
195 self.check_end_names = val;
196 self
197 }
198
199 /// Changes whether comments should be validated.
200 ///
201 /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which
202 /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't
203 /// really care about comment correctness, thus the default value is `false` to improve
204 /// performance.
205 ///
206 /// (`false` by default)
207 ///
208 /// [`Comment`]: events/enum.Event.html#variant.Comment
check_comments(&mut self, val: bool) -> &mut Reader<B>209 pub fn check_comments(&mut self, val: bool) -> &mut Reader<B> {
210 self.check_comments = val;
211 self
212 }
213
214 /// Gets the current byte position in the input data.
215 ///
216 /// Useful when debugging errors.
buffer_position(&self) -> usize217 pub fn buffer_position(&self) -> usize {
218 // when internal state is Opened, we have actually read until '<',
219 // which we don't want to show
220 if let TagState::Opened = self.tag_state {
221 self.buf_position - 1
222 } else {
223 self.buf_position
224 }
225 }
226
227 /// private function to read until '<' is found
228 /// return a `Text` event
read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>229 fn read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
230 self.tag_state = TagState::Opened;
231 let buf_start = buf.len();
232 match read_until(&mut self.reader, b'<', buf, &mut self.buf_position) {
233 Ok(0) => Ok(Event::Eof),
234 Ok(_) => {
235 let (start, len) = (
236 buf_start
237 + if self.trim_text_start {
238 match buf.iter().skip(buf_start).position(|&b| !is_whitespace(b)) {
239 Some(start) => start,
240 None => return self.read_event(buf),
241 }
242 } else {
243 0
244 },
245 if self.trim_text_end {
246 buf.iter()
247 .rposition(|&b| !is_whitespace(b))
248 .map_or_else(|| buf.len(), |p| p + 1)
249 } else {
250 buf.len()
251 },
252 );
253 Ok(Event::Text(BytesText::from_escaped(&buf[start..len])))
254 }
255 Err(e) => Err(e),
256 }
257 }
258
259 /// private function to read until '>' is found
read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>260 fn read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
261 self.tag_state = TagState::Closed;
262
263 // need to read 1 character to decide whether pay special attention to attribute values
264 let buf_start = buf.len();
265 let start = loop {
266 match self.reader.fill_buf() {
267 Ok(n) if n.is_empty() => return Ok(Event::Eof),
268 Ok(n) => {
269 // We intentionally don't `consume()` the byte, otherwise we would have to
270 // handle things like '<>' here already.
271 break n[0];
272 }
273 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
274 Err(e) => return Err(Error::Io(e)),
275 }
276 };
277
278 if start != b'/' && start != b'!' && start != b'?' {
279 match read_elem_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
280 Ok(0) => Ok(Event::Eof),
281 Ok(_) => {
282 // we already *know* that we are in this case
283 self.read_start(&buf[buf_start..])
284 }
285 Err(e) => Err(e),
286 }
287 } else {
288 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
289 Ok(0) => Ok(Event::Eof),
290 Ok(_) => match start {
291 b'/' => self.read_end(&buf[buf_start..]),
292 b'!' => self.read_bang(buf_start, buf),
293 b'?' => self.read_question_mark(&buf[buf_start..]),
294 _ => unreachable!(
295 "We checked that `start` must be one of [/!?], was {:?} \
296 instead.",
297 start
298 ),
299 },
300 Err(e) => Err(e),
301 }
302 }
303 }
304
305 /// reads `BytesElement` starting with a `/`,
306 /// if `self.check_end_names`, checks that element matches last opened element
307 /// return `End` event
read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>308 fn read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
309 // XML standard permits whitespaces after the markup name in closing tags.
310 // Let's strip them from the buffer before comparing tag names.
311 let name = if self.trim_markup_names_in_closing_tags {
312 if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) {
313 let (name, _) = buf[1..].split_at(pos_end_name + 1);
314 name
315 } else {
316 &buf[1..]
317 }
318 } else {
319 &buf[1..]
320 };
321 if self.check_end_names {
322 let mismatch_err = |expected: &[u8], found: &[u8], buf_position: &mut usize| {
323 *buf_position -= buf.len();
324 Err(Error::EndEventMismatch {
325 expected: from_utf8(expected).unwrap_or("").to_owned(),
326 found: from_utf8(found).unwrap_or("").to_owned(),
327 })
328 };
329 match self.opened_starts.pop() {
330 Some(start) => {
331 if name != &self.opened_buffer[start..] {
332 let expected = &self.opened_buffer[start..];
333 mismatch_err(expected, name, &mut self.buf_position)
334 } else {
335 self.opened_buffer.truncate(start);
336 Ok(Event::End(BytesEnd::borrowed(name)))
337 }
338 }
339 None => mismatch_err(b"", &buf[1..], &mut self.buf_position),
340 }
341 } else {
342 Ok(Event::End(BytesEnd::borrowed(name)))
343 }
344 }
345
346 /// reads `BytesElement` starting with a `!`,
347 /// return `Comment`, `CData` or `DocType` event
348 ///
349 /// Note: depending on the start of the Event, we may need to read more
350 /// data, thus we need a mutable buffer
read_bang<'a, 'b>( &'a mut self, buf_start: usize, buf: &'b mut Vec<u8>, ) -> Result<Event<'b>>351 fn read_bang<'a, 'b>(
352 &'a mut self,
353 buf_start: usize,
354 buf: &'b mut Vec<u8>,
355 ) -> Result<Event<'b>> {
356 if buf[buf_start..].starts_with(b"!--") {
357 while buf.len() < buf_start + 5 || !buf.ends_with(b"--") {
358 buf.push(b'>');
359 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
360 Ok(0) => {
361 self.buf_position -= buf.len() - buf_start;
362 return Err(Error::UnexpectedEof("Comment".to_string()));
363 }
364 Ok(_) => (),
365 Err(e) => return Err(e),
366 }
367 }
368 let len = buf.len();
369 if self.check_comments {
370 // search if '--' not in comments
371 if let Some(p) = memchr::memchr_iter(b'-', &buf[buf_start + 3..len - 2])
372 .position(|p| buf[buf_start + 3 + p + 1] == b'-')
373 {
374 self.buf_position -= buf.len() - buf_start + p;
375 return Err(Error::UnexpectedToken("--".to_string()));
376 }
377 }
378 Ok(Event::Comment(BytesText::from_escaped(
379 &buf[buf_start + 3..len - 2],
380 )))
381 } else if buf.len() >= buf_start + 8 {
382 match &buf[buf_start + 1..buf_start + 8] {
383 b"[CDATA[" => {
384 while buf.len() < 10 || !buf.ends_with(b"]]") {
385 buf.push(b'>');
386 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
387 Ok(0) => {
388 self.buf_position -= buf.len() - buf_start;
389 return Err(Error::UnexpectedEof("CData".to_string()));
390 }
391 Ok(_) => (),
392 Err(e) => return Err(e),
393 }
394 }
395 Ok(Event::CData(BytesText::from_plain(
396 &buf[buf_start + 8..buf.len() - 2],
397 )))
398 }
399 x if x.eq_ignore_ascii_case(b"DOCTYPE") => {
400 let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count();
401 while count > 0 {
402 buf.push(b'>');
403 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
404 Ok(0) => {
405 self.buf_position -= buf.len() - buf_start;
406 return Err(Error::UnexpectedEof("DOCTYPE".to_string()));
407 }
408 Ok(n) => {
409 let start = buf.len() - n;
410 count += buf.iter().skip(start).filter(|&&b| b == b'<').count();
411 count -= 1;
412 }
413 Err(e) => return Err(e),
414 }
415 }
416 Ok(Event::DocType(BytesText::from_escaped(
417 &buf[buf_start + 8..buf.len()],
418 )))
419 }
420 _ => Err(Error::UnexpectedBang),
421 }
422 } else {
423 self.buf_position -= buf.len() - buf_start;
424 Err(Error::UnexpectedBang)
425 }
426 }
427
428 /// reads `BytesElement` starting with a `?`,
429 /// return `Decl` or `PI` event
430 #[cfg(feature = "encoding")]
read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>431 fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
432 let len = buf.len();
433 if len > 2 && buf[len - 1] == b'?' {
434 if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
435 let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3));
436 // Try getting encoding from the declaration event
437 if let Some(enc) = event.encoder() {
438 self.encoding = enc;
439 self.is_encoding_set = true;
440 }
441 Ok(Event::Decl(event))
442 } else {
443 Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1])))
444 }
445 } else {
446 self.buf_position -= len;
447 Err(Error::UnexpectedEof("XmlDecl".to_string()))
448 }
449 }
450
451 /// reads `BytesElement` starting with a `?`,
452 /// return `Decl` or `PI` event
453 #[cfg(not(feature = "encoding"))]
read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>454 fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
455 let len = buf.len();
456 if len > 2 && buf[len - 1] == b'?' {
457 if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
458 let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3));
459 Ok(Event::Decl(event))
460 } else {
461 Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1])))
462 }
463 } else {
464 self.buf_position -= len;
465 Err(Error::UnexpectedEof("XmlDecl".to_string()))
466 }
467 }
468
469 #[inline]
close_expanded_empty(&mut self) -> Result<Event<'static>>470 fn close_expanded_empty(&mut self) -> Result<Event<'static>> {
471 self.tag_state = TagState::Closed;
472 let name = self
473 .opened_buffer
474 .split_off(self.opened_starts.pop().unwrap());
475 Ok(Event::End(BytesEnd::owned(name)))
476 }
477
478 /// reads `BytesElement` starting with any character except `/`, `!` or ``?`
479 /// return `Start` or `Empty` event
read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>480 fn read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
481 // TODO: do this directly when reading bufreader ...
482 let len = buf.len();
483 let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len);
484 if let Some(&b'/') = buf.last() {
485 let end = if name_end < len { name_end } else { len - 1 };
486 if self.expand_empty_elements {
487 self.tag_state = TagState::Empty;
488 self.opened_starts.push(self.opened_buffer.len());
489 self.opened_buffer.extend(&buf[..end]);
490 Ok(Event::Start(BytesStart::borrowed(&buf[..len - 1], end)))
491 } else {
492 Ok(Event::Empty(BytesStart::borrowed(&buf[..len - 1], end)))
493 }
494 } else {
495 if self.check_end_names {
496 self.opened_starts.push(self.opened_buffer.len());
497 self.opened_buffer.extend(&buf[..name_end]);
498 }
499 Ok(Event::Start(BytesStart::borrowed(buf, name_end)))
500 }
501 }
502
503 /// Reads the next `Event`.
504 ///
505 /// This is the main entry point for reading XML `Event`s.
506 ///
507 /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow`
508 /// internally).
509 ///
510 /// Having the possibility to control the internal buffers gives you some additional benefits
511 /// such as:
512 ///
513 /// - Reduce the number of allocations by reusing the same buffer. For constrained systems,
514 /// you can call `buf.clear()` once you are done with processing the event (typically at the
515 /// end of your loop).
516 /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`).
517 ///
518 /// # Examples
519 ///
520 /// ```
521 /// use quick_xml::Reader;
522 /// use quick_xml::events::Event;
523 ///
524 /// let xml = r#"<tag1 att1 = "test">
525 /// <tag2><!--Test comment-->Test</tag2>
526 /// <tag2>Test 2</tag2>
527 /// </tag1>"#;
528 /// let mut reader = Reader::from_str(xml);
529 /// reader.trim_text(true);
530 /// let mut count = 0;
531 /// let mut buf = Vec::new();
532 /// let mut txt = Vec::new();
533 /// loop {
534 /// match reader.read_event(&mut buf) {
535 /// Ok(Event::Start(ref e)) => count += 1,
536 /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")),
537 /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
538 /// Ok(Event::Eof) => break,
539 /// _ => (),
540 /// }
541 /// buf.clear();
542 /// }
543 /// println!("Found {} start events", count);
544 /// println!("Text events: {:?}", txt);
545 /// ```
read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>546 pub fn read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
547 let event = match self.tag_state {
548 TagState::Opened => self.read_until_close(buf),
549 TagState::Closed => self.read_until_open(buf),
550 TagState::Empty => self.close_expanded_empty(),
551 TagState::Exit => return Ok(Event::Eof),
552 };
553 match event {
554 Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit,
555 _ => {}
556 }
557 event
558 }
559
560 /// Resolves a potentially qualified **event name** into (namespace name, local name).
561 ///
562 /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
563 /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
564 /// can be defined on the same element as the attribute in question.
565 ///
566 /// *Unqualified* event inherits the current *default namespace*.
567 #[inline]
event_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], namespace_buffer: &'c [u8], ) -> (Option<&'c [u8]>, &'b [u8])568 pub fn event_namespace<'a, 'b, 'c>(
569 &'a self,
570 qname: &'b [u8],
571 namespace_buffer: &'c [u8],
572 ) -> (Option<&'c [u8]>, &'b [u8]) {
573 self.ns_buffer
574 .resolve_namespace(qname, namespace_buffer, true)
575 }
576
577 /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
578 ///
579 /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
580 /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
581 /// can be defined on the same element as the attribute in question.
582 ///
583 /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
584 #[inline]
attribute_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], namespace_buffer: &'c [u8], ) -> (Option<&'c [u8]>, &'b [u8])585 pub fn attribute_namespace<'a, 'b, 'c>(
586 &'a self,
587 qname: &'b [u8],
588 namespace_buffer: &'c [u8],
589 ) -> (Option<&'c [u8]>, &'b [u8]) {
590 self.ns_buffer
591 .resolve_namespace(qname, namespace_buffer, false)
592 }
593
594 /// Reads the next event and resolves its namespace (if applicable).
595 ///
596 /// # Examples
597 ///
598 /// ```
599 /// use std::str::from_utf8;
600 /// use quick_xml::Reader;
601 /// use quick_xml::events::Event;
602 ///
603 /// let xml = r#"<x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
604 /// <y:tag2><!--Test comment-->Test</y:tag2>
605 /// <y:tag2>Test 2</y:tag2>
606 /// </x:tag1>"#;
607 /// let mut reader = Reader::from_str(xml);
608 /// reader.trim_text(true);
609 /// let mut count = 0;
610 /// let mut buf = Vec::new();
611 /// let mut ns_buf = Vec::new();
612 /// let mut txt = Vec::new();
613 /// loop {
614 /// match reader.read_namespaced_event(&mut buf, &mut ns_buf) {
615 /// Ok((ref ns, Event::Start(ref e))) => {
616 /// count += 1;
617 /// match (*ns, e.local_name()) {
618 /// (Some(b"www.xxxx"), b"tag1") => (),
619 /// (Some(b"www.yyyy"), b"tag2") => (),
620 /// (ns, n) => panic!("Namespace and local name mismatch"),
621 /// }
622 /// println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok()));
623 /// }
624 /// Ok((_, Event::Text(e))) => {
625 /// txt.push(e.unescape_and_decode(&reader).expect("Error!"))
626 /// },
627 /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
628 /// Ok((_, Event::Eof)) => break,
629 /// _ => (),
630 /// }
631 /// buf.clear();
632 /// }
633 /// println!("Found {} start events", count);
634 /// println!("Text events: {:?}", txt);
635 /// ```
read_namespaced_event<'a, 'b, 'c>( &'a mut self, buf: &'b mut Vec<u8>, namespace_buffer: &'c mut Vec<u8>, ) -> Result<(Option<&'c [u8]>, Event<'b>)>636 pub fn read_namespaced_event<'a, 'b, 'c>(
637 &'a mut self,
638 buf: &'b mut Vec<u8>,
639 namespace_buffer: &'c mut Vec<u8>,
640 ) -> Result<(Option<&'c [u8]>, Event<'b>)> {
641 self.ns_buffer.pop_empty_namespaces(namespace_buffer);
642 match self.read_event(buf) {
643 Ok(Event::Eof) => Ok((None, Event::Eof)),
644 Ok(Event::Start(e)) => {
645 self.ns_buffer.push_new_namespaces(&e, namespace_buffer);
646 Ok((
647 self.ns_buffer
648 .find_namespace_value(e.name(), &**namespace_buffer),
649 Event::Start(e),
650 ))
651 }
652 Ok(Event::Empty(e)) => {
653 // For empty elements we need to 'artificially' keep the namespace scope on the
654 // stack until the next `next()` call occurs.
655 // Otherwise the caller has no chance to use `resolve` in the context of the
656 // namespace declarations that are 'in scope' for the empty element alone.
657 // Ex: <img rdf:nodeID="abc" xmlns:rdf="urn:the-rdf-uri" />
658 self.ns_buffer.push_new_namespaces(&e, namespace_buffer);
659 // notify next `read_namespaced_event()` invocation that it needs to pop this
660 // namespace scope
661 self.ns_buffer.pending_pop = true;
662 Ok((
663 self.ns_buffer
664 .find_namespace_value(e.name(), &**namespace_buffer),
665 Event::Empty(e),
666 ))
667 }
668 Ok(Event::End(e)) => {
669 // notify next `read_namespaced_event()` invocation that it needs to pop this
670 // namespace scope
671 self.ns_buffer.pending_pop = true;
672 Ok((
673 self.ns_buffer
674 .find_namespace_value(e.name(), &**namespace_buffer),
675 Event::End(e),
676 ))
677 }
678 Ok(e) => Ok((None, e)),
679 Err(e) => Err(e),
680 }
681 }
682
683 /// Returns the `Reader`s encoding.
684 ///
685 /// The used encoding may change after parsing the XML declaration.
686 ///
687 /// This encoding will be used by [`decode`].
688 ///
689 /// [`decode`]: #method.decode
690 #[cfg(feature = "encoding")]
encoding(&self) -> &'static Encoding691 pub fn encoding(&self) -> &'static Encoding {
692 self.encoding
693 }
694
695 /// Decodes a slice using the encoding specified in the XML declaration.
696 ///
697 /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
698 /// `U+FFFD REPLACEMENT CHARACTER`.
699 ///
700 /// If no encoding is specified, defaults to UTF-8.
701 #[inline]
702 #[cfg(feature = "encoding")]
decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str>703 pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> {
704 self.encoding.decode(bytes).0
705 }
706
707 /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration.
708 ///
709 /// Decode `bytes` without BOM and with malformed sequences replaced with the
710 /// `U+FFFD REPLACEMENT CHARACTER`.
711 ///
712 /// # Note
713 ///
714 /// If you instead want to use XML declared encoding, use the `encoding` feature
715 #[inline]
716 #[cfg(not(feature = "encoding"))]
decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>717 pub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
718 if bytes.starts_with(b"\xEF\xBB\xBF") {
719 from_utf8(&bytes[3..]).map_err(Error::Utf8)
720 } else {
721 from_utf8(bytes).map_err(Error::Utf8)
722 }
723 }
724
725 /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration.
726 ///
727 /// Decode `bytes` without BOM and with malformed sequences replaced with the
728 /// `U+FFFD REPLACEMENT CHARACTER`.
729 ///
730 /// If no encoding is specified, defaults to UTF-8.
731 #[inline]
732 #[cfg(feature = "encoding")]
decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str>733 pub fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> {
734 if self.is_encoding_set {
735 return self.encoding.decode_with_bom_removal(bytes).0;
736 }
737 if bytes.starts_with(b"\xEF\xBB\xBF") {
738 self.is_encoding_set = true;
739 bytes = &bytes[3..];
740 } else if bytes.starts_with(b"\xFF\xFE") {
741 self.is_encoding_set = true;
742 self.encoding = UTF_16LE;
743 bytes = &bytes[2..];
744 } else if bytes.starts_with(b"\xFE\xFF") {
745 self.is_encoding_set = true;
746 self.encoding = UTF_16BE;
747 bytes = &bytes[3..];
748 };
749 self.encoding.decode_without_bom_handling(bytes).0
750 }
751
752 /// Decodes a UTF8 slice regardless of XML declaration.
753 ///
754 /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
755 /// `U+FFFD REPLACEMENT CHARACTER`.
756 ///
757 /// # Note
758 ///
759 /// If you instead want to use XML declared encoding, use the `encoding` feature
760 #[inline]
761 #[cfg(not(feature = "encoding"))]
decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>762 pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
763 from_utf8(bytes).map_err(Error::Utf8)
764 }
765
766 /// Get utf8 decoder
767 #[cfg(feature = "encoding")]
decoder(&self) -> Decoder768 pub fn decoder(&self) -> Decoder {
769 Decoder {
770 encoding: self.encoding,
771 }
772 }
773
774 /// Get utf8 decoder
775 #[cfg(not(feature = "encoding"))]
decoder(&self) -> Decoder776 pub fn decoder(&self) -> Decoder {
777 Decoder
778 }
779
780 /// Reads until end element is found
781 ///
782 /// Manages nested cases where parent and child elements have the same name
read_to_end<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<()>783 pub fn read_to_end<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<()> {
784 let mut depth = 0;
785 let end = end.as_ref();
786 loop {
787 match self.read_event(buf) {
788 Ok(Event::End(ref e)) if e.name() == end => {
789 if depth == 0 {
790 return Ok(());
791 }
792 depth -= 1;
793 }
794 Ok(Event::Start(ref e)) if e.name() == end => depth += 1,
795 Err(e) => return Err(e),
796 Ok(Event::Eof) => {
797 return Err(Error::UnexpectedEof(format!("</{:?}>", from_utf8(end))));
798 }
799 _ => (),
800 }
801 buf.clear();
802 }
803 }
804
805 /// Reads optional text between start and end tags.
806 ///
807 /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a
808 /// `String`. If the next event is an [`End`] event, returns the empty string. In all other
809 /// cases, returns an error.
810 ///
811 /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8
812 /// if none is specified).
813 ///
814 /// # Examples
815 ///
816 /// ```
817 /// use quick_xml::Reader;
818 /// use quick_xml::events::Event;
819 ///
820 /// let mut xml = Reader::from_reader(b"
821 /// <a><b></a>
822 /// <a></a>
823 /// " as &[u8]);
824 /// xml.trim_text(true);
825 ///
826 /// let expected = ["<b>", ""];
827 /// for &content in expected.iter() {
828 /// match xml.read_event(&mut Vec::new()) {
829 /// Ok(Event::Start(ref e)) => {
830 /// assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).unwrap(), content);
831 /// },
832 /// e => panic!("Expecting Start event, found {:?}", e),
833 /// }
834 /// }
835 /// ```
836 ///
837 /// [`Text`]: events/enum.Event.html#variant.Text
838 /// [`End`]: events/enum.Event.html#variant.End
read_text<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<String>839 pub fn read_text<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<String> {
840 let s = match self.read_event(buf) {
841 Ok(Event::Text(e)) => e.unescape_and_decode(self),
842 Ok(Event::End(ref e)) if e.name() == end.as_ref() => return Ok("".to_string()),
843 Err(e) => return Err(e),
844 Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())),
845 _ => return Err(Error::TextNotFound),
846 };
847 self.read_to_end(end, buf)?;
848 s
849 }
850
851 /// Consumes `Reader` returning the underlying reader
852 ///
853 /// Can be used to compute line and column of a parsing error position
854 ///
855 /// # Examples
856 ///
857 /// ```
858 /// use std::{str, io::Cursor};
859 /// use quick_xml::Reader;
860 /// use quick_xml::events::Event;
861 ///
862 /// let xml = r#"<tag1 att1 = "test">
863 /// <tag2><!--Test comment-->Test</tag2>
864 /// <tag3>Test 2</tag3>
865 /// </tag1>"#;
866 /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
867 /// let mut buf = Vec::new();
868 ///
869 /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
870 /// let end_pos = reader.buffer_position();
871 /// let mut cursor = reader.into_underlying_reader();
872 /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
873 /// .expect("can't make a string");
874 /// let mut line = 1;
875 /// let mut column = 0;
876 /// for c in s.chars() {
877 /// if c == '\n' {
878 /// line += 1;
879 /// column = 0;
880 /// } else {
881 /// column += 1;
882 /// }
883 /// }
884 /// (line, column)
885 /// }
886 ///
887 /// loop {
888 /// match reader.read_event(&mut buf) {
889 /// Ok(Event::Start(ref e)) => match e.name() {
890 /// b"tag1" | b"tag2" => (),
891 /// tag => {
892 /// assert_eq!(b"tag3", tag);
893 /// assert_eq!((3, 22), into_line_and_column(reader));
894 /// break;
895 /// }
896 /// },
897 /// Ok(Event::Eof) => unreachable!(),
898 /// _ => (),
899 /// }
900 /// buf.clear();
901 /// }
902 /// ```
into_underlying_reader(self) -> B903 pub fn into_underlying_reader(self) -> B {
904 self.reader
905 }
906 }
907
908 impl Reader<BufReader<File>> {
909 /// Creates an XML reader from a file path.
from_file<P: AsRef<Path>>(path: P) -> Result<Reader<BufReader<File>>>910 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Reader<BufReader<File>>> {
911 let file = File::open(path).map_err(Error::Io)?;
912 let reader = BufReader::new(file);
913 Ok(Reader::from_reader(reader))
914 }
915 }
916
917 impl<'a> Reader<&'a [u8]> {
918 /// Creates an XML reader from a string slice.
from_str(s: &'a str) -> Reader<&'a [u8]>919 pub fn from_str(s: &'a str) -> Reader<&'a [u8]> {
920 Reader::from_reader(s.as_bytes())
921 }
922 }
923
924 /// read until `byte` is found or end of file
925 /// return the position of byte
926 #[inline]
read_until<R: BufRead>( r: &mut R, byte: u8, buf: &mut Vec<u8>, position: &mut usize, ) -> Result<usize>927 fn read_until<R: BufRead>(
928 r: &mut R,
929 byte: u8,
930 buf: &mut Vec<u8>,
931 position: &mut usize,
932 ) -> Result<usize> {
933 let mut read = 0;
934 let mut done = false;
935 while !done {
936 let used = {
937 let available = match r.fill_buf() {
938 Ok(n) if n.is_empty() => break,
939 Ok(n) => n,
940 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
941 Err(e) => {
942 *position += read;
943 return Err(Error::Io(e));
944 }
945 };
946
947 match memchr::memchr(byte, available) {
948 Some(i) => {
949 buf.extend_from_slice(&available[..i]);
950 done = true;
951 i + 1
952 }
953 None => {
954 buf.extend_from_slice(available);
955 available.len()
956 }
957 }
958 };
959 r.consume(used);
960 read += used;
961 }
962 *position += read;
963 Ok(read)
964 }
965
966 /// Derived from `read_until`, but modified to handle XML attributes using a minimal state machine.
967 /// [W3C Extensible Markup Language (XML) 1.1 (2006)](https://www.w3.org/TR/xml11)
968 ///
969 /// Attribute values are defined as follows:
970 /// ```plain
971 /// AttValue := '"' (([^<&"]) | Reference)* '"'
972 /// | "'" (([^<&']) | Reference)* "'"
973 /// ```
974 /// (`Reference` is something like `"`, but we don't care about escaped characters at this
975 /// level)
976 #[inline]
read_elem_until<R: BufRead>( r: &mut R, end_byte: u8, buf: &mut Vec<u8>, position: &mut usize, ) -> Result<usize>977 fn read_elem_until<R: BufRead>(
978 r: &mut R,
979 end_byte: u8,
980 buf: &mut Vec<u8>,
981 position: &mut usize,
982 ) -> Result<usize> {
983 #[derive(Clone, Copy)]
984 enum State {
985 /// The initial state (inside element, but outside of attribute value)
986 Elem,
987 /// Inside a single-quoted attribute value
988 SingleQ,
989 /// Inside a double-quoted attribute value
990 DoubleQ,
991 }
992 let mut state = State::Elem;
993 let mut read = 0;
994 let mut done = false;
995 while !done {
996 let used = {
997 let available = match r.fill_buf() {
998 Ok(n) if n.is_empty() => return Ok(read),
999 Ok(n) => n,
1000 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
1001 Err(e) => {
1002 *position += read;
1003 return Err(Error::Io(e));
1004 }
1005 };
1006
1007 let mut memiter = memchr::memchr3_iter(end_byte, b'\'', b'"', available);
1008 let used: usize;
1009 loop {
1010 match memiter.next() {
1011 Some(i) => {
1012 state = match (state, available[i]) {
1013 (State::Elem, b) if b == end_byte => {
1014 // only allowed to match `end_byte` while we are in state `Elem`
1015 buf.extend_from_slice(&available[..i]);
1016 done = true;
1017 used = i + 1;
1018 break;
1019 }
1020 (State::Elem, b'\'') => State::SingleQ,
1021 (State::Elem, b'\"') => State::DoubleQ,
1022
1023 // the only end_byte that gets us out if the same character
1024 (State::SingleQ, b'\'') | (State::DoubleQ, b'\"') => State::Elem,
1025
1026 // all other bytes: no state change
1027 _ => state,
1028 };
1029 }
1030 None => {
1031 buf.extend_from_slice(available);
1032 used = available.len();
1033 break;
1034 }
1035 }
1036 }
1037 used
1038 };
1039 r.consume(used);
1040 read += used;
1041 }
1042 *position += read;
1043 Ok(read)
1044 }
1045
1046 /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab)
1047 #[inline]
is_whitespace(b: u8) -> bool1048 pub(crate) fn is_whitespace(b: u8) -> bool {
1049 match b {
1050 b' ' | b'\r' | b'\n' | b'\t' => true,
1051 _ => false,
1052 }
1053 }
1054
1055 /// A namespace declaration. Can either bind a namespace to a prefix or define the current default
1056 /// namespace.
1057 #[derive(Debug, Clone)]
1058 struct Namespace {
1059 /// Index of the namespace in the buffer
1060 start: usize,
1061 /// Length of the prefix
1062 /// * if bigger than start, then binds this namespace to the corresponding slice.
1063 /// * else defines the current default namespace.
1064 prefix_len: usize,
1065 /// The namespace name (the URI) of this namespace declaration.
1066 ///
1067 /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration
1068 /// for the extent of its scope. For prefix declarations that's not very interesting, but it is
1069 /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default
1070 /// behaviour of leaving unqualified element names unqualified.
1071 value_len: usize,
1072 /// Level of nesting at which this namespace was declared. The declaring element is included,
1073 /// i.e., a declaration on the document root has `level = 1`.
1074 /// This is used to pop the namespace when the element gets closed.
1075 level: i32,
1076 }
1077
1078 impl Namespace {
1079 /// Gets the value slice out of namespace buffer
1080 ///
1081 /// Returns `None` if `value_len == 0`
1082 #[inline]
opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]>1083 fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> {
1084 if self.value_len == 0 {
1085 None
1086 } else {
1087 let start = self.start + self.prefix_len;
1088 Some(&ns_buffer[start..start + self.value_len])
1089 }
1090 }
1091
1092 /// Check if the namespace matches the potentially qualified name
1093 #[inline]
is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool1094 fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool {
1095 if self.prefix_len == 0 {
1096 !qname.contains(&b':')
1097 } else {
1098 qname.get(self.prefix_len).map_or(false, |n| *n == b':')
1099 && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len])
1100 }
1101 }
1102 }
1103
1104 /// A namespace management buffer.
1105 ///
1106 /// Holds all internal logic to push/pop namespaces with their levels.
1107 #[derive(Debug, Default, Clone)]
1108 struct NamespaceBufferIndex {
1109 /// a buffer of namespace ranges
1110 slices: Vec<Namespace>,
1111 /// The number of open tags at the moment. We need to keep track of this to know which namespace
1112 /// declarations to remove when we encounter an `End` event.
1113 nesting_level: i32,
1114 /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the
1115 /// consumer has a chance to use `resolve` in the context of the empty element. We perform the
1116 /// pop as the first operation in the next `next()` call.
1117 pending_pop: bool,
1118 }
1119
1120 impl NamespaceBufferIndex {
1121 #[inline]
find_namespace_value<'a, 'b, 'c>( &'a self, element_name: &'b [u8], buffer: &'c [u8], ) -> Option<&'c [u8]>1122 fn find_namespace_value<'a, 'b, 'c>(
1123 &'a self,
1124 element_name: &'b [u8],
1125 buffer: &'c [u8],
1126 ) -> Option<&'c [u8]> {
1127 self.slices
1128 .iter()
1129 .rfind(|n| n.is_match(buffer, element_name))
1130 .and_then(|n| n.opt_value(buffer))
1131 }
1132
pop_empty_namespaces(&mut self, buffer: &mut Vec<u8>)1133 fn pop_empty_namespaces(&mut self, buffer: &mut Vec<u8>) {
1134 if !self.pending_pop {
1135 return;
1136 }
1137 self.pending_pop = false;
1138 self.nesting_level -= 1;
1139 let current_level = self.nesting_level;
1140 // from the back (most deeply nested scope), look for the first scope that is still valid
1141 match self.slices.iter().rposition(|n| n.level <= current_level) {
1142 // none of the namespaces are valid, remove all of them
1143 None => {
1144 buffer.clear();
1145 self.slices.clear();
1146 }
1147 // drop all namespaces past the last valid namespace
1148 Some(last_valid_pos) => {
1149 if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) {
1150 buffer.truncate(len);
1151 self.slices.truncate(last_valid_pos + 1);
1152 }
1153 }
1154 }
1155 }
1156
push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec<u8>)1157 fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec<u8>) {
1158 self.nesting_level += 1;
1159 let level = self.nesting_level;
1160 // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns'
1161 // (default namespace) attribute.
1162 for a in e.attributes().with_checks(false) {
1163 if let Ok(Attribute { key: k, value: v }) = a {
1164 if k.starts_with(b"xmlns") {
1165 match k.get(5) {
1166 None => {
1167 let start = buffer.len();
1168 buffer.extend_from_slice(&*v);
1169 self.slices.push(Namespace {
1170 start,
1171 prefix_len: 0,
1172 value_len: v.len(),
1173 level,
1174 });
1175 }
1176 Some(&b':') => {
1177 let start = buffer.len();
1178 buffer.extend_from_slice(&k[6..]);
1179 buffer.extend_from_slice(&*v);
1180 self.slices.push(Namespace {
1181 start,
1182 prefix_len: k.len() - 6,
1183 value_len: v.len(),
1184 level,
1185 });
1186 }
1187 _ => break,
1188 }
1189 }
1190 } else {
1191 break;
1192 }
1193 }
1194 }
1195
1196 /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
1197 ///
1198 /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
1199 /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
1200 /// can be defined on the same element as the attribute in question.
1201 ///
1202 /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
1203 #[inline]
resolve_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], buffer: &'c [u8], use_default: bool, ) -> (Option<&'c [u8]>, &'b [u8])1204 fn resolve_namespace<'a, 'b, 'c>(
1205 &'a self,
1206 qname: &'b [u8],
1207 buffer: &'c [u8],
1208 use_default: bool,
1209 ) -> (Option<&'c [u8]>, &'b [u8]) {
1210 self.slices
1211 .iter()
1212 .rfind(|n| n.is_match(buffer, qname))
1213 .map_or((None, qname), |n| {
1214 let len = n.prefix_len;
1215 if len > 0 {
1216 (n.opt_value(buffer), &qname[len + 1..])
1217 } else if use_default {
1218 (n.opt_value(buffer), qname)
1219 } else {
1220 (None, qname)
1221 }
1222 })
1223 }
1224 }
1225
1226 /// Utf8 Decoder
1227 #[cfg(not(feature = "encoding"))]
1228 #[derive(Clone, Copy)]
1229 pub struct Decoder;
1230
1231 /// Utf8 Decoder
1232 #[cfg(feature = "encoding")]
1233 #[derive(Clone, Copy)]
1234 pub struct Decoder {
1235 encoding: &'static Encoding,
1236 }
1237
1238 impl Decoder {
1239 #[cfg(not(feature = "encoding"))]
decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>1240 pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
1241 from_utf8(bytes).map_err(Error::Utf8)
1242 }
1243
1244 #[cfg(feature = "encoding")]
decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str>1245 pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> {
1246 self.encoding.decode(bytes).0
1247 }
1248 }
1249