1 //! A module to handle `Reader`
2
3 #[cfg(feature = "encoding")]
4 use std::borrow::Cow;
5 use std::fs::File;
6 use std::io::{self, BufRead, BufReader};
7 use std::path::Path;
8 use std::str::from_utf8;
9
10 #[cfg(feature = "encoding")]
11 use encoding_rs::{Encoding, UTF_16BE, UTF_16LE};
12
13 use errors::{Error, Result};
14 use events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
15
16 use memchr;
17
18 #[derive(Clone)]
19 enum TagState {
20 Opened,
21 Closed,
22 Empty,
23 /// Either Eof or Errored
24 Exit,
25 }
26
27 /// A low level encoding-agnostic XML event reader.
28 ///
29 /// Consumes a `BufRead` and streams XML `Event`s.
30 ///
31 /// # Examples
32 ///
33 /// ```
34 /// use quick_xml::Reader;
35 /// use quick_xml::events::Event;
36 ///
37 /// let xml = r#"<tag1 att1 = "test">
38 /// <tag2><!--Test comment-->Test</tag2>
39 /// <tag2>Test 2</tag2>
40 /// </tag1>"#;
41 /// let mut reader = Reader::from_str(xml);
42 /// reader.trim_text(true);
43 /// let mut count = 0;
44 /// let mut txt = Vec::new();
45 /// let mut buf = Vec::new();
46 /// loop {
47 /// match reader.read_event(&mut buf) {
48 /// Ok(Event::Start(ref e)) => {
49 /// match e.name() {
50 /// b"tag1" => println!("attributes values: {:?}",
51 /// e.attributes().map(|a| a.unwrap().value)
52 /// .collect::<Vec<_>>()),
53 /// b"tag2" => count += 1,
54 /// _ => (),
55 /// }
56 /// },
57 /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()),
58 /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
59 /// Ok(Event::Eof) => break,
60 /// _ => (),
61 /// }
62 /// buf.clear();
63 /// }
64 /// ```
65 #[derive(Clone)]
66 pub struct Reader<B: BufRead> {
67 /// reader
68 reader: B,
69 /// current buffer position, useful for debuging errors
70 buf_position: usize,
71 /// current state Open/Close
72 tag_state: TagState,
73 /// expand empty element into an opening and closing element
74 expand_empty_elements: bool,
75 /// trims Text events, skip the element if text is empty
76 trim_text: bool,
77 /// trims trailing whitespaces from markup names in closing tags `</a >`
78 trim_markup_names_in_closing_tags: bool,
79 /// check if End nodes match last Start node
80 check_end_names: bool,
81 /// check if comments contains `--` (false per default)
82 check_comments: bool,
83 /// all currently Started elements which didn't have a matching
84 /// End element yet
85 opened_buffer: Vec<u8>,
86 /// opened name start indexes
87 opened_starts: Vec<usize>,
88 /// a buffer to manage namespaces
89 ns_buffer: NamespaceBufferIndex,
90 #[cfg(feature = "encoding")]
91 /// the encoding specified in the xml, defaults to utf8
92 encoding: &'static Encoding,
93 #[cfg(feature = "encoding")]
94 /// check if quick-rs could find out the encoding
95 is_encoding_set: bool,
96 }
97
98 impl<B: BufRead> Reader<B> {
99 /// Creates a `Reader` that reads from a reader implementing `BufRead`.
from_reader(reader: B) -> Reader<B>100 pub fn from_reader(reader: B) -> Reader<B> {
101 Reader {
102 reader,
103 opened_buffer: Vec::new(),
104 opened_starts: Vec::new(),
105 tag_state: TagState::Closed,
106 expand_empty_elements: false,
107 trim_text: false,
108 trim_markup_names_in_closing_tags: true,
109 check_end_names: true,
110 buf_position: 0,
111 check_comments: false,
112 ns_buffer: NamespaceBufferIndex::default(),
113 #[cfg(feature = "encoding")]
114 encoding: ::encoding_rs::UTF_8,
115 #[cfg(feature = "encoding")]
116 is_encoding_set: false,
117 }
118 }
119
120 /// Changes whether empty elements should be split into an `Open` and a `Close` event.
121 ///
122 /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `<tag/>` are
123 /// expanded into a [`Start`] event followed by a [`End`] event. When set to `false` (the
124 /// default), those tags are represented by an [`Empty`] event instead.
125 ///
126 /// (`false` by default)
127 ///
128 /// [`Empty`]: events/enum.Event.html#variant.Empty
129 /// [`Start`]: events/enum.Event.html#variant.Start
130 /// [`End`]: events/enum.Event.html#variant.End
expand_empty_elements(&mut self, val: bool) -> &mut Reader<B>131 pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader<B> {
132 self.expand_empty_elements = val;
133 self
134 }
135
136 /// Changes whether whitespace before and after character data should be removed.
137 ///
138 /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be
139 /// pushed.
140 ///
141 /// (`false` by default)
142 ///
143 /// [`Text`]: events/enum.Event.html#variant.Text
trim_text(&mut self, val: bool) -> &mut Reader<B>144 pub fn trim_text(&mut self, val: bool) -> &mut Reader<B> {
145 self.trim_text = val;
146 self
147 }
148
149 /// Changes wether trailing whitespaces after the markup name are trimmed in closing tags
150 /// `</a >`.
151 ///
152 /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name.
153 ///
154 /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is
155 /// going to fail erronously if a closing tag contains trailing whitespaces.
156 ///
157 /// (`true` by default)
158 ///
159 /// [`End`]: events/enum.Event.html#variant.End
trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader<B>160 pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader<B> {
161 self.trim_markup_names_in_closing_tags = val;
162 self
163 }
164
165 /// Changes whether mismatched closing tag names should be detected.
166 ///
167 /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag.
168 /// For example, `<mytag></different_tag>` will be permitted.
169 ///
170 /// If the XML is known to be sane (already processed, etc.) this saves extra time.
171 ///
172 /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will
173 /// contain the data of the mismatched end tag.
174 ///
175 /// (`true` by default)
176 ///
177 /// [`End`]: events/enum.Event.html#variant.End
check_end_names(&mut self, val: bool) -> &mut Reader<B>178 pub fn check_end_names(&mut self, val: bool) -> &mut Reader<B> {
179 self.check_end_names = val;
180 self
181 }
182
183 /// Changes whether comments should be validated.
184 ///
185 /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which
186 /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't
187 /// really care about comment correctness, thus the default value is `false` to improve
188 /// performance.
189 ///
190 /// (`false` by default)
191 ///
192 /// [`Comment`]: events/enum.Event.html#variant.Comment
check_comments(&mut self, val: bool) -> &mut Reader<B>193 pub fn check_comments(&mut self, val: bool) -> &mut Reader<B> {
194 self.check_comments = val;
195 self
196 }
197
198 /// Gets the current byte position in the input data.
199 ///
200 /// Useful when debugging errors.
buffer_position(&self) -> usize201 pub fn buffer_position(&self) -> usize {
202 // when internal state is Opened, we have actually read until '<',
203 // which we don't want to show
204 if let TagState::Opened = self.tag_state {
205 self.buf_position - 1
206 } else {
207 self.buf_position
208 }
209 }
210
211 /// private function to read until '<' is found
212 /// return a `Text` event
read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>213 fn read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
214 self.tag_state = TagState::Opened;
215 let buf_start = buf.len();
216 match read_until(&mut self.reader, b'<', buf, &mut self.buf_position) {
217 Ok(0) => Ok(Event::Eof),
218 Ok(_) => {
219 let (start, len) = if self.trim_text {
220 match buf.iter().skip(buf_start).position(|&b| !is_whitespace(b)) {
221 Some(start) => (
222 buf_start + start,
223 buf.iter()
224 .rposition(|&b| !is_whitespace(b))
225 .map_or_else(|| buf.len(), |p| p + 1),
226 ),
227 None => return self.read_event(buf),
228 }
229 } else {
230 (buf_start, buf.len())
231 };
232 Ok(Event::Text(BytesText::from_escaped(&buf[start..len])))
233 }
234 Err(e) => Err(e),
235 }
236 }
237
238 /// private function to read until '>' is found
read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>239 fn read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
240 self.tag_state = TagState::Closed;
241
242 // need to read 1 character to decide whether pay special attention to attribute values
243 let buf_start = buf.len();
244 let start = loop {
245 match self.reader.fill_buf() {
246 Ok(n) if n.is_empty() => return Ok(Event::Eof),
247 Ok(n) => {
248 // We intentionally don't `consume()` the byte, otherwise we would have to
249 // handle things like '<>' here already.
250 break n[0];
251 }
252 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
253 Err(e) => return Err(Error::Io(e)),
254 }
255 };
256
257 if start != b'/' && start != b'!' && start != b'?' {
258 match read_elem_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
259 Ok(0) => Ok(Event::Eof),
260 Ok(_) => {
261 // we already *know* that we are in this case
262 self.read_start(&buf[buf_start..])
263 }
264 Err(e) => Err(e),
265 }
266 } else {
267 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
268 Ok(0) => Ok(Event::Eof),
269 Ok(_) => match start {
270 b'/' => self.read_end(&buf[buf_start..]),
271 b'!' => self.read_bang(buf_start, buf),
272 b'?' => self.read_question_mark(&buf[buf_start..]),
273 _ => unreachable!(
274 "We checked that `start` must be one of [/!?], was {:?} \
275 instead.",
276 start
277 ),
278 },
279 Err(e) => Err(e),
280 }
281 }
282 }
283
284 /// reads `BytesElement` starting with a `/`,
285 /// if `self.check_end_names`, checks that element matches last opened element
286 /// return `End` event
read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>287 fn read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
288 // XML standard permits whitespaces after the markup name in closing tags.
289 // Let's strip them from the buffer before comparing tag names.
290 let name = if self.trim_markup_names_in_closing_tags {
291 if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) {
292 let (name, _) = buf[1..].split_at(pos_end_name + 1);
293 name
294 } else {
295 &buf[1..]
296 }
297 } else {
298 &buf[1..]
299 };
300 if self.check_end_names {
301 let mismatch_err = |expected: &[u8], found: &[u8], buf_position: &mut usize| {
302 *buf_position -= buf.len();
303 Err(Error::EndEventMismatch {
304 expected: from_utf8(expected).unwrap_or("").to_owned(),
305 found: from_utf8(found).unwrap_or("").to_owned(),
306 })
307 };
308 match self.opened_starts.pop() {
309 Some(start) => {
310 if name != &self.opened_buffer[start..] {
311 let expected = &self.opened_buffer[start..];
312 mismatch_err(expected, name, &mut self.buf_position)
313 } else {
314 self.opened_buffer.truncate(start);
315 Ok(Event::End(BytesEnd::borrowed(name)))
316 }
317 }
318 None => mismatch_err(b"", &buf[1..], &mut self.buf_position),
319 }
320 } else {
321 Ok(Event::End(BytesEnd::borrowed(name)))
322 }
323 }
324
325 /// reads `BytesElement` starting with a `!`,
326 /// return `Comment`, `CData` or `DocType` event
327 ///
328 /// Note: depending on the start of the Event, we may need to read more
329 /// data, thus we need a mutable buffer
read_bang<'a, 'b>( &'a mut self, buf_start: usize, buf: &'b mut Vec<u8>, ) -> Result<Event<'b>>330 fn read_bang<'a, 'b>(
331 &'a mut self,
332 buf_start: usize,
333 buf: &'b mut Vec<u8>,
334 ) -> Result<Event<'b>> {
335 if buf[buf_start..].starts_with(b"!--") {
336 while buf.len() < buf_start + 5 || !buf.ends_with(b"--") {
337 buf.push(b'>');
338 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
339 Ok(0) => {
340 self.buf_position -= buf.len() - buf_start;
341 return Err(Error::UnexpectedEof("Comment".to_string()));
342 }
343 Ok(_) => (),
344 Err(e) => return Err(e),
345 }
346 }
347 let len = buf.len();
348 if self.check_comments {
349 // search if '--' not in comments
350 if let Some(p) = memchr::memchr_iter(b'-', &buf[buf_start + 3..len - 2])
351 .position(|p| buf[buf_start + 3 + p + 1] == b'-')
352 {
353 self.buf_position -= buf.len() - buf_start + p;
354 return Err(Error::UnexpectedToken("--".to_string()));
355 }
356 }
357 Ok(Event::Comment(BytesText::from_escaped(
358 &buf[buf_start + 3..len - 2],
359 )))
360 } else if buf.len() >= buf_start + 8 {
361 match &buf[buf_start + 1..buf_start + 8] {
362 b"[CDATA[" => {
363 while buf.len() < 10 || !buf.ends_with(b"]]") {
364 buf.push(b'>');
365 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
366 Ok(0) => {
367 self.buf_position -= buf.len() - buf_start;
368 return Err(Error::UnexpectedEof("CData".to_string()));
369 }
370 Ok(_) => (),
371 Err(e) => return Err(e),
372 }
373 }
374 Ok(Event::CData(BytesText::from_escaped(
375 &buf[buf_start + 8..buf.len() - 2],
376 )))
377 }
378 b"DOCTYPE" => {
379 let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count();
380 while count > 0 {
381 buf.push(b'>');
382 match read_until(&mut self.reader, b'>', buf, &mut self.buf_position) {
383 Ok(0) => {
384 self.buf_position -= buf.len() - buf_start;
385 return Err(Error::UnexpectedEof("DOCTYPE".to_string()));
386 }
387 Ok(n) => {
388 let start = buf.len() - n;
389 count += buf.iter().skip(start).filter(|&&b| b == b'<').count();
390 count -= 1;
391 }
392 Err(e) => return Err(e),
393 }
394 }
395 Ok(Event::DocType(BytesText::from_escaped(
396 &buf[buf_start + 8..buf.len()],
397 )))
398 }
399 _ => Err(Error::UnexpectedBang),
400 }
401 } else {
402 self.buf_position -= buf.len() - buf_start;
403 Err(Error::UnexpectedBang)
404 }
405 }
406
407 /// reads `BytesElement` starting with a `?`,
408 /// return `Decl` or `PI` event
409 #[cfg(feature = "encoding")]
read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>410 fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
411 let len = buf.len();
412 if len > 2 && buf[len - 1] == b'?' {
413 if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
414 let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3));
415 // Try getting encoding from the declaration event
416 if let Some(enc) = event.encoder() {
417 self.encoding = enc;
418 self.is_encoding_set = true;
419 }
420 Ok(Event::Decl(event))
421 } else {
422 Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1])))
423 }
424 } else {
425 self.buf_position -= len;
426 Err(Error::UnexpectedEof("XmlDecl".to_string()))
427 }
428 }
429
430 /// reads `BytesElement` starting with a `?`,
431 /// return `Decl` or `PI` event
432 #[cfg(not(feature = "encoding"))]
read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>433 fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
434 let len = buf.len();
435 if len > 2 && buf[len - 1] == b'?' {
436 if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) {
437 let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3));
438 Ok(Event::Decl(event))
439 } else {
440 Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1])))
441 }
442 } else {
443 self.buf_position -= len;
444 Err(Error::UnexpectedEof("XmlDecl".to_string()))
445 }
446 }
447
448 #[inline]
close_expanded_empty(&mut self) -> Result<Event<'static>>449 fn close_expanded_empty(&mut self) -> Result<Event<'static>> {
450 self.tag_state = TagState::Closed;
451 let name = self
452 .opened_buffer
453 .split_off(self.opened_starts.pop().unwrap());
454 Ok(Event::End(BytesEnd::owned(name)))
455 }
456
457 /// reads `BytesElement` starting with any character except `/`, `!` or ``?`
458 /// return `Start` or `Empty` event
read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>>459 fn read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
460 // TODO: do this directly when reading bufreader ...
461 let len = buf.len();
462 let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len);
463 if let Some(&b'/') = buf.last() {
464 let end = if name_end < len { name_end } else { len - 1 };
465 if self.expand_empty_elements {
466 self.tag_state = TagState::Empty;
467 self.opened_starts.push(self.opened_buffer.len());
468 self.opened_buffer.extend(&buf[..end]);
469 Ok(Event::Start(BytesStart::borrowed(&buf[..len - 1], end)))
470 } else {
471 Ok(Event::Empty(BytesStart::borrowed(&buf[..len - 1], end)))
472 }
473 } else {
474 if self.check_end_names {
475 self.opened_starts.push(self.opened_buffer.len());
476 self.opened_buffer.extend(&buf[..name_end]);
477 }
478 Ok(Event::Start(BytesStart::borrowed(buf, name_end)))
479 }
480 }
481
482 /// Reads the next `Event`.
483 ///
484 /// This is the main entry point for reading XML `Event`s.
485 ///
486 /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow`
487 /// internally).
488 ///
489 /// Having the possibility to control the internal buffers gives you some additional benefits
490 /// such as:
491 ///
492 /// - Reduce the number of allocations by reusing the same buffer. For constrained systems,
493 /// you can call `buf.clear()` once you are done with processing the event (typically at the
494 /// end of your loop).
495 /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`).
496 ///
497 /// # Examples
498 ///
499 /// ```
500 /// use quick_xml::Reader;
501 /// use quick_xml::events::Event;
502 ///
503 /// let xml = r#"<tag1 att1 = "test">
504 /// <tag2><!--Test comment-->Test</tag2>
505 /// <tag2>Test 2</tag2>
506 /// </tag1>"#;
507 /// let mut reader = Reader::from_str(xml);
508 /// reader.trim_text(true);
509 /// let mut count = 0;
510 /// let mut buf = Vec::new();
511 /// let mut txt = Vec::new();
512 /// loop {
513 /// match reader.read_event(&mut buf) {
514 /// Ok(Event::Start(ref e)) => count += 1,
515 /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")),
516 /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
517 /// Ok(Event::Eof) => break,
518 /// _ => (),
519 /// }
520 /// buf.clear();
521 /// }
522 /// println!("Found {} start events", count);
523 /// println!("Text events: {:?}", txt);
524 /// ```
read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>>525 pub fn read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec<u8>) -> Result<Event<'b>> {
526 let event = match self.tag_state {
527 TagState::Opened => self.read_until_close(buf),
528 TagState::Closed => self.read_until_open(buf),
529 TagState::Empty => self.close_expanded_empty(),
530 TagState::Exit => return Ok(Event::Eof),
531 };
532 match event {
533 Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit,
534 _ => {}
535 }
536 event
537 }
538
539 /// Resolves a potentially qualified **event name** into (namespace name, local name).
540 ///
541 /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
542 /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
543 /// can be defined on the same element as the attribute in question.
544 ///
545 /// *Unqualified* event inherits the current *default namespace*.
546 #[inline]
event_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], namespace_buffer: &'c [u8], ) -> (Option<&'c [u8]>, &'b [u8])547 pub fn event_namespace<'a, 'b, 'c>(
548 &'a self,
549 qname: &'b [u8],
550 namespace_buffer: &'c [u8],
551 ) -> (Option<&'c [u8]>, &'b [u8]) {
552 self.ns_buffer
553 .resolve_namespace(qname, namespace_buffer, true)
554 }
555
556 /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
557 ///
558 /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
559 /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
560 /// can be defined on the same element as the attribute in question.
561 ///
562 /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
563 #[inline]
attribute_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], namespace_buffer: &'c [u8], ) -> (Option<&'c [u8]>, &'b [u8])564 pub fn attribute_namespace<'a, 'b, 'c>(
565 &'a self,
566 qname: &'b [u8],
567 namespace_buffer: &'c [u8],
568 ) -> (Option<&'c [u8]>, &'b [u8]) {
569 self.ns_buffer
570 .resolve_namespace(qname, namespace_buffer, false)
571 }
572
573 /// Reads the next event and resolves its namespace (if applicable).
574 ///
575 /// # Examples
576 ///
577 /// ```
578 /// use std::str::from_utf8;
579 /// use quick_xml::Reader;
580 /// use quick_xml::events::Event;
581 ///
582 /// let xml = r#"<x:tag1 xmlns:x="www.xxxx" xmlns:y="www.yyyy" att1 = "test">
583 /// <y:tag2><!--Test comment-->Test</y:tag2>
584 /// <y:tag2>Test 2</y:tag2>
585 /// </x:tag1>"#;
586 /// let mut reader = Reader::from_str(xml);
587 /// reader.trim_text(true);
588 /// let mut count = 0;
589 /// let mut buf = Vec::new();
590 /// let mut ns_buf = Vec::new();
591 /// let mut txt = Vec::new();
592 /// loop {
593 /// match reader.read_namespaced_event(&mut buf, &mut ns_buf) {
594 /// Ok((ref ns, Event::Start(ref e))) => {
595 /// count += 1;
596 /// match (*ns, e.local_name()) {
597 /// (Some(b"www.xxxx"), b"tag1") => (),
598 /// (Some(b"www.yyyy"), b"tag2") => (),
599 /// (ns, n) => panic!("Namespace and local name mismatch"),
600 /// }
601 /// println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok()));
602 /// }
603 /// Ok((_, Event::Text(e))) => {
604 /// txt.push(e.unescape_and_decode(&reader).expect("Error!"))
605 /// },
606 /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
607 /// Ok((_, Event::Eof)) => break,
608 /// _ => (),
609 /// }
610 /// buf.clear();
611 /// }
612 /// println!("Found {} start events", count);
613 /// println!("Text events: {:?}", txt);
614 /// ```
read_namespaced_event<'a, 'b, 'c>( &'a mut self, buf: &'b mut Vec<u8>, namespace_buffer: &'c mut Vec<u8>, ) -> Result<(Option<&'c [u8]>, Event<'b>)>615 pub fn read_namespaced_event<'a, 'b, 'c>(
616 &'a mut self,
617 buf: &'b mut Vec<u8>,
618 namespace_buffer: &'c mut Vec<u8>,
619 ) -> Result<(Option<&'c [u8]>, Event<'b>)> {
620 self.ns_buffer.pop_empty_namespaces(namespace_buffer);
621 match self.read_event(buf) {
622 Ok(Event::Eof) => Ok((None, Event::Eof)),
623 Ok(Event::Start(e)) => {
624 self.ns_buffer.push_new_namespaces(&e, namespace_buffer);
625 Ok((
626 self.ns_buffer
627 .find_namespace_value(e.name(), &**namespace_buffer),
628 Event::Start(e),
629 ))
630 }
631 Ok(Event::Empty(e)) => {
632 // For empty elements we need to 'artificially' keep the namespace scope on the
633 // stack until the next `next()` call occurs.
634 // Otherwise the caller has no chance to use `resolve` in the context of the
635 // namespace declarations that are 'in scope' for the empty element alone.
636 // Ex: <img rdf:nodeID="abc" xmlns:rdf="urn:the-rdf-uri" />
637 self.ns_buffer.push_new_namespaces(&e, namespace_buffer);
638 // notify next `read_namespaced_event()` invocation that it needs to pop this
639 // namespace scope
640 self.ns_buffer.pending_pop = true;
641 Ok((
642 self.ns_buffer
643 .find_namespace_value(e.name(), &**namespace_buffer),
644 Event::Empty(e),
645 ))
646 }
647 Ok(Event::End(e)) => {
648 // notify next `read_namespaced_event()` invocation that it needs to pop this
649 // namespace scope
650 self.ns_buffer.pending_pop = true;
651 Ok((
652 self.ns_buffer
653 .find_namespace_value(e.name(), &**namespace_buffer),
654 Event::End(e),
655 ))
656 }
657 Ok(e) => Ok((None, e)),
658 Err(e) => Err(e),
659 }
660 }
661
662 /// Returns the `Reader`s encoding.
663 ///
664 /// The used encoding may change after parsing the XML declaration.
665 ///
666 /// This encoding will be used by [`decode`].
667 ///
668 /// [`decode`]: #method.decode
669 #[cfg(feature = "encoding")]
encoding(&self) -> &'static Encoding670 pub fn encoding(&self) -> &'static Encoding {
671 self.encoding
672 }
673
674 /// Decodes a slice using the encoding specified in the XML declaration.
675 ///
676 /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
677 /// `U+FFFD REPLACEMENT CHARACTER`.
678 ///
679 /// If no encoding is specified, defaults to UTF-8.
680 #[inline]
681 #[cfg(feature = "encoding")]
decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str>682 pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> {
683 self.encoding.decode(bytes).0
684 }
685
686 /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration.
687 ///
688 /// Decode `bytes` without BOM and with malformed sequences replaced with the
689 /// `U+FFFD REPLACEMENT CHARACTER`.
690 ///
691 /// # Note
692 ///
693 /// If you instead want to use XML declared encoding, use the `encoding` feature
694 #[inline]
695 #[cfg(not(feature = "encoding"))]
decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>696 pub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
697 if bytes.starts_with(b"\xEF\xBB\xBF") {
698 from_utf8(&bytes[3..]).map_err(Error::Utf8)
699 } else {
700 from_utf8(bytes).map_err(Error::Utf8)
701 }
702 }
703
704 /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration.
705 ///
706 /// Decode `bytes` without BOM and with malformed sequences replaced with the
707 /// `U+FFFD REPLACEMENT CHARACTER`.
708 ///
709 /// If no encoding is specified, defaults to UTF-8.
710 #[inline]
711 #[cfg(feature = "encoding")]
decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str>712 pub fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> {
713 if self.is_encoding_set {
714 return self.encoding.decode_with_bom_removal(bytes).0;
715 }
716 if bytes.starts_with(b"\xEF\xBB\xBF") {
717 self.is_encoding_set = true;
718 bytes = &bytes[3..];
719 } else if bytes.starts_with(b"\xFF\xFE") {
720 self.is_encoding_set = true;
721 self.encoding = UTF_16LE;
722 bytes = &bytes[2..];
723 } else if bytes.starts_with(b"\xFE\xFF") {
724 self.is_encoding_set = true;
725 self.encoding = UTF_16BE;
726 bytes = &bytes[3..];
727 };
728 self.encoding.decode_without_bom_handling(bytes).0
729 }
730
731 /// Decodes a UTF8 slice regardless of XML declaration.
732 ///
733 /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the
734 /// `U+FFFD REPLACEMENT CHARACTER`.
735 ///
736 /// # Note
737 ///
738 /// If you instead want to use XML declared encoding, use the `encoding` feature
739 #[inline]
740 #[cfg(not(feature = "encoding"))]
decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>741 pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
742 from_utf8(bytes).map_err(Error::Utf8)
743 }
744
745 /// Get utf8 decoder
746 #[cfg(feature = "encoding")]
decoder(&self) -> Decoder747 pub fn decoder(&self) -> Decoder {
748 Decoder {
749 encoding: self.encoding,
750 }
751 }
752
753 /// Get utf8 decoder
754 #[cfg(not(feature = "encoding"))]
decoder(&self) -> Decoder755 pub fn decoder(&self) -> Decoder {
756 Decoder
757 }
758
759 /// Reads until end element is found
760 ///
761 /// Manages nested cases where parent and child elements have the same name
read_to_end<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<()>762 pub fn read_to_end<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<()> {
763 let mut depth = 0;
764 let end = end.as_ref();
765 loop {
766 match self.read_event(buf) {
767 Ok(Event::End(ref e)) if e.name() == end => {
768 if depth == 0 {
769 return Ok(());
770 }
771 depth -= 1;
772 }
773 Ok(Event::Start(ref e)) if e.name() == end => depth += 1,
774 Err(e) => return Err(e),
775 Ok(Event::Eof) => {
776 return Err(Error::UnexpectedEof(format!("</{:?}>", from_utf8(end))));
777 }
778 _ => (),
779 }
780 buf.clear();
781 }
782 }
783
784 /// Reads optional text between start and end tags.
785 ///
786 /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a
787 /// `String`. If the next event is an [`End`] event, returns the empty string. In all other
788 /// cases, returns an error.
789 ///
790 /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8
791 /// if none is specified).
792 ///
793 /// # Examples
794 ///
795 /// ```
796 /// use quick_xml::Reader;
797 /// use quick_xml::events::Event;
798 ///
799 /// let mut xml = Reader::from_reader(b"
800 /// <a><b></a>
801 /// <a></a>
802 /// " as &[u8]);
803 /// xml.trim_text(true);
804 ///
805 /// let expected = ["<b>", ""];
806 /// for &content in expected.iter() {
807 /// match xml.read_event(&mut Vec::new()) {
808 /// Ok(Event::Start(ref e)) => {
809 /// assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).unwrap(), content);
810 /// },
811 /// e => panic!("Expecting Start event, found {:?}", e),
812 /// }
813 /// }
814 /// ```
815 ///
816 /// [`Text`]: events/enum.Event.html#variant.Text
817 /// [`End`]: events/enum.Event.html#variant.End
read_text<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<String>818 pub fn read_text<K: AsRef<[u8]>>(&mut self, end: K, buf: &mut Vec<u8>) -> Result<String> {
819 let s = match self.read_event(buf) {
820 Ok(Event::Text(e)) => e.unescape_and_decode(self),
821 Ok(Event::End(ref e)) if e.name() == end.as_ref() => return Ok("".to_string()),
822 Err(e) => return Err(e),
823 Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())),
824 _ => return Err(Error::TextNotFound),
825 };
826 self.read_to_end(end, buf)?;
827 s
828 }
829
830 /// Consumes `Reader` returning the underlying reader
831 ///
832 /// Can be used to compute line and column of a parsing error position
833 ///
834 /// # Examples
835 ///
836 /// ```
837 /// use std::{str, io::Cursor};
838 /// use quick_xml::Reader;
839 /// use quick_xml::events::Event;
840 ///
841 /// let xml = r#"<tag1 att1 = "test">
842 /// <tag2><!--Test comment-->Test</tag2>
843 /// <tag3>Test 2</tag3>
844 /// </tag1>"#;
845 /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
846 /// let mut buf = Vec::new();
847 ///
848 /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
849 /// let end_pos = reader.buffer_position();
850 /// let mut cursor = reader.into_underlying_reader();
851 /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
852 /// .expect("can't make a string");
853 /// let mut line = 1;
854 /// let mut column = 0;
855 /// for c in s.chars() {
856 /// if c == '\n' {
857 /// line += 1;
858 /// column = 0;
859 /// } else {
860 /// column += 1;
861 /// }
862 /// }
863 /// (line, column)
864 /// }
865 ///
866 /// loop {
867 /// match reader.read_event(&mut buf) {
868 /// Ok(Event::Start(ref e)) => match e.name() {
869 /// b"tag1" | b"tag2" => (),
870 /// tag => {
871 /// assert_eq!(b"tag3", tag);
872 /// assert_eq!((3, 22), into_line_and_column(reader));
873 /// break;
874 /// }
875 /// },
876 /// Ok(Event::Eof) => unreachable!(),
877 /// _ => (),
878 /// }
879 /// buf.clear();
880 /// }
881 /// ```
into_underlying_reader(self) -> B882 pub fn into_underlying_reader(self) -> B {
883 self.reader
884 }
885 }
886
887 impl Reader<BufReader<File>> {
888 /// Creates an XML reader from a file path.
from_file<P: AsRef<Path>>(path: P) -> Result<Reader<BufReader<File>>>889 pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Reader<BufReader<File>>> {
890 let file = File::open(path).map_err(Error::Io)?;
891 let reader = BufReader::new(file);
892 Ok(Reader::from_reader(reader))
893 }
894 }
895
896 impl<'a> Reader<&'a [u8]> {
897 /// Creates an XML reader from a string slice.
from_str(s: &'a str) -> Reader<&'a [u8]>898 pub fn from_str(s: &'a str) -> Reader<&'a [u8]> {
899 Reader::from_reader(s.as_bytes())
900 }
901 }
902
903 /// read until `byte` is found or end of file
904 /// return the position of byte
905 #[inline]
read_until<R: BufRead>( r: &mut R, byte: u8, buf: &mut Vec<u8>, position: &mut usize, ) -> Result<usize>906 fn read_until<R: BufRead>(
907 r: &mut R,
908 byte: u8,
909 buf: &mut Vec<u8>,
910 position: &mut usize,
911 ) -> Result<usize> {
912 let mut read = 0;
913 let mut done = false;
914 while !done {
915 let used = {
916 let available = match r.fill_buf() {
917 Ok(n) if n.is_empty() => break,
918 Ok(n) => n,
919 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
920 Err(e) => {
921 *position += read;
922 return Err(Error::Io(e));
923 }
924 };
925
926 match memchr::memchr(byte, available) {
927 Some(i) => {
928 buf.extend_from_slice(&available[..i]);
929 done = true;
930 i + 1
931 }
932 None => {
933 buf.extend_from_slice(available);
934 available.len()
935 }
936 }
937 };
938 r.consume(used);
939 read += used;
940 }
941 *position += read;
942 Ok(read)
943 }
944
945 /// Derived from `read_until`, but modified to handle XML attributes using a minimal state machine.
946 /// [W3C Extensible Markup Language (XML) 1.1 (2006)](https://www.w3.org/TR/xml11)
947 ///
948 /// Attribute values are defined as follows:
949 /// ```plain
950 /// AttValue := '"' (([^<&"]) | Reference)* '"'
951 /// | "'" (([^<&']) | Reference)* "'"
952 /// ```
953 /// (`Reference` is something like `"`, but we don't care about escaped characters at this
954 /// level)
955 #[inline]
read_elem_until<R: BufRead>( r: &mut R, end_byte: u8, buf: &mut Vec<u8>, position: &mut usize, ) -> Result<usize>956 fn read_elem_until<R: BufRead>(
957 r: &mut R,
958 end_byte: u8,
959 buf: &mut Vec<u8>,
960 position: &mut usize,
961 ) -> Result<usize> {
962 #[derive(Clone, Copy)]
963 enum State {
964 /// The initial state (inside element, but outside of attribute value)
965 Elem,
966 /// Inside a single-quoted attribute value
967 SingleQ,
968 /// Inside a double-quoted attribute value
969 DoubleQ,
970 }
971 let mut state = State::Elem;
972 let mut read = 0;
973 let mut done = false;
974 while !done {
975 let used = {
976 let available = match r.fill_buf() {
977 Ok(n) if n.is_empty() => return Ok(read),
978 Ok(n) => n,
979 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
980 Err(e) => {
981 *position += read;
982 return Err(Error::Io(e));
983 }
984 };
985
986 let mut memiter = memchr::memchr3_iter(end_byte, b'\'', b'"', available);
987 let used: usize;
988 loop {
989 match memiter.next() {
990 Some(i) => {
991 state = match (state, available[i]) {
992 (State::Elem, b) if b == end_byte => {
993 // only allowed to match `end_byte` while we are in state `Elem`
994 buf.extend_from_slice(&available[..i]);
995 done = true;
996 used = i + 1;
997 break;
998 }
999 (State::Elem, b'\'') => State::SingleQ,
1000 (State::Elem, b'\"') => State::DoubleQ,
1001
1002 // the only end_byte that gets us out if the same character
1003 (State::SingleQ, b'\'') | (State::DoubleQ, b'\"') => State::Elem,
1004
1005 // all other bytes: no state change
1006 _ => state,
1007 };
1008 }
1009 None => {
1010 buf.extend_from_slice(available);
1011 used = available.len();
1012 break;
1013 }
1014 }
1015 }
1016 used
1017 };
1018 r.consume(used);
1019 read += used;
1020 }
1021 *position += read;
1022 Ok(read)
1023 }
1024
1025 /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab)
1026 #[inline]
is_whitespace(b: u8) -> bool1027 pub(crate) fn is_whitespace(b: u8) -> bool {
1028 match b {
1029 b' ' | b'\r' | b'\n' | b'\t' => true,
1030 _ => false,
1031 }
1032 }
1033
1034 /// A namespace declaration. Can either bind a namespace to a prefix or define the current default
1035 /// namespace.
1036 #[derive(Debug, Clone)]
1037 struct Namespace {
1038 /// Index of the namespace in the buffer
1039 start: usize,
1040 /// Length of the prefix
1041 /// * if bigger than start, then binds this namespace to the corresponding slice.
1042 /// * else defines the current default namespace.
1043 prefix_len: usize,
1044 /// The namespace name (the URI) of this namespace declaration.
1045 ///
1046 /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration
1047 /// for the extent of its scope. For prefix declarations that's not very interesting, but it is
1048 /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default
1049 /// behaviour of leaving unqualified element names unqualified.
1050 value_len: usize,
1051 /// Level of nesting at which this namespace was declared. The declaring element is included,
1052 /// i.e., a declaration on the document root has `level = 1`.
1053 /// This is used to pop the namespace when the element gets closed.
1054 level: i32,
1055 }
1056
1057 impl Namespace {
1058 /// Gets the value slice out of namespace buffer
1059 ///
1060 /// Returns `None` if `value_len == 0`
1061 #[inline]
opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]>1062 fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> {
1063 if self.value_len == 0 {
1064 None
1065 } else {
1066 let start = self.start + self.prefix_len;
1067 Some(&ns_buffer[start..start + self.value_len])
1068 }
1069 }
1070
1071 /// Check if the namespace matches the potentially qualified name
1072 #[inline]
is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool1073 fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool {
1074 if self.prefix_len == 0 {
1075 !qname.contains(&b':')
1076 } else {
1077 qname.get(self.prefix_len).map_or(false, |n| *n == b':')
1078 && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len])
1079 }
1080 }
1081 }
1082
1083 /// A namespace management buffer.
1084 ///
1085 /// Holds all internal logic to push/pop namespaces with their levels.
1086 #[derive(Debug, Default, Clone)]
1087 struct NamespaceBufferIndex {
1088 /// a buffer of namespace ranges
1089 slices: Vec<Namespace>,
1090 /// The number of open tags at the moment. We need to keep track of this to know which namespace
1091 /// declarations to remove when we encounter an `End` event.
1092 nesting_level: i32,
1093 /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the
1094 /// consumer has a chance to use `resolve` in the context of the empty element. We perform the
1095 /// pop as the first operation in the next `next()` call.
1096 pending_pop: bool,
1097 }
1098
1099 impl NamespaceBufferIndex {
1100 #[inline]
find_namespace_value<'a, 'b, 'c>( &'a self, element_name: &'b [u8], buffer: &'c [u8], ) -> Option<&'c [u8]>1101 fn find_namespace_value<'a, 'b, 'c>(
1102 &'a self,
1103 element_name: &'b [u8],
1104 buffer: &'c [u8],
1105 ) -> Option<&'c [u8]> {
1106 self.slices
1107 .iter()
1108 .rfind(|n| n.is_match(buffer, element_name))
1109 .and_then(|n| n.opt_value(buffer))
1110 }
1111
pop_empty_namespaces(&mut self, buffer: &mut Vec<u8>)1112 fn pop_empty_namespaces(&mut self, buffer: &mut Vec<u8>) {
1113 if !self.pending_pop {
1114 return;
1115 }
1116 self.pending_pop = false;
1117 self.nesting_level -= 1;
1118 let current_level = self.nesting_level;
1119 // from the back (most deeply nested scope), look for the first scope that is still valid
1120 match self.slices.iter().rposition(|n| n.level <= current_level) {
1121 // none of the namespaces are valid, remove all of them
1122 None => {
1123 buffer.clear();
1124 self.slices.clear();
1125 }
1126 // drop all namespaces past the last valid namespace
1127 Some(last_valid_pos) => {
1128 if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) {
1129 buffer.truncate(len);
1130 self.slices.truncate(last_valid_pos + 1);
1131 }
1132 }
1133 }
1134 }
1135
push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec<u8>)1136 fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec<u8>) {
1137 self.nesting_level += 1;
1138 let level = self.nesting_level;
1139 // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns'
1140 // (default namespace) attribute.
1141 for a in e.attributes().with_checks(false) {
1142 if let Ok(Attribute { key: k, value: v }) = a {
1143 if k.starts_with(b"xmlns") {
1144 match k.get(5) {
1145 None => {
1146 let start = buffer.len();
1147 buffer.extend_from_slice(&*v);
1148 self.slices.push(Namespace {
1149 start,
1150 prefix_len: 0,
1151 value_len: v.len(),
1152 level,
1153 });
1154 }
1155 Some(&b':') => {
1156 let start = buffer.len();
1157 buffer.extend_from_slice(&k[6..]);
1158 buffer.extend_from_slice(&*v);
1159 self.slices.push(Namespace {
1160 start,
1161 prefix_len: k.len() - 6,
1162 value_len: v.len(),
1163 level,
1164 });
1165 }
1166 _ => break,
1167 }
1168 }
1169 } else {
1170 break;
1171 }
1172 }
1173 }
1174
1175 /// Resolves a potentially qualified **attribute name** into (namespace name, local name).
1176 ///
1177 /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined
1178 /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix
1179 /// can be defined on the same element as the attribute in question.
1180 ///
1181 /// *Unqualified* attribute names do *not* inherit the current *default namespace*.
1182 #[inline]
resolve_namespace<'a, 'b, 'c>( &'a self, qname: &'b [u8], buffer: &'c [u8], use_default: bool, ) -> (Option<&'c [u8]>, &'b [u8])1183 fn resolve_namespace<'a, 'b, 'c>(
1184 &'a self,
1185 qname: &'b [u8],
1186 buffer: &'c [u8],
1187 use_default: bool,
1188 ) -> (Option<&'c [u8]>, &'b [u8]) {
1189 self.slices
1190 .iter()
1191 .rfind(|n| n.is_match(buffer, qname))
1192 .map_or((None, qname), |n| {
1193 let len = n.prefix_len;
1194 if len > 0 {
1195 (n.opt_value(buffer), &qname[len + 1..])
1196 } else if use_default {
1197 (n.opt_value(buffer), qname)
1198 } else {
1199 (None, qname)
1200 }
1201 })
1202 }
1203 }
1204
1205 /// Utf8 Decoder
1206 #[cfg(not(feature = "encoding"))]
1207 #[derive(Clone, Copy)]
1208 pub struct Decoder;
1209
1210 /// Utf8 Decoder
1211 #[cfg(feature = "encoding")]
1212 #[derive(Clone, Copy)]
1213 pub struct Decoder {
1214 encoding: &'static Encoding,
1215 }
1216
1217 impl Decoder {
1218 #[cfg(not(feature = "encoding"))]
decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>1219 pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> {
1220 from_utf8(bytes).map_err(Error::Utf8)
1221 }
1222
1223 #[cfg(feature = "encoding")]
decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str>1224 pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> {
1225 self.encoding.decode(bytes).0
1226 }
1227 }
1228