1 use crate::error::{Error, ErrorCode, Result};
2 use crate::lib::ops::Deref;
3 use crate::lib::*;
4
5 #[cfg(feature = "std")]
6 use crate::io;
7 #[cfg(feature = "std")]
8 use crate::iter::LineColIterator;
9
10 #[cfg(feature = "raw_value")]
11 use crate::raw::BorrowedRawDeserializer;
12 #[cfg(all(feature = "raw_value", feature = "std"))]
13 use crate::raw::OwnedRawDeserializer;
14 #[cfg(feature = "raw_value")]
15 use serde::de::Visitor;
16
17 /// Trait used by the deserializer for iterating over input. This is manually
18 /// "specialized" for iterating over &[u8]. Once feature(specialization) is
19 /// stable we can use actual specialization.
20 ///
21 /// This trait is sealed and cannot be implemented for types outside of
22 /// `serde_json`.
23 pub trait Read<'de>: private::Sealed {
24 #[doc(hidden)]
next(&mut self) -> Result<Option<u8>>25 fn next(&mut self) -> Result<Option<u8>>;
26 #[doc(hidden)]
peek(&mut self) -> Result<Option<u8>>27 fn peek(&mut self) -> Result<Option<u8>>;
28
29 /// Only valid after a call to peek(). Discards the peeked byte.
30 #[doc(hidden)]
discard(&mut self)31 fn discard(&mut self);
32
33 /// Position of the most recent call to next().
34 ///
35 /// The most recent call was probably next() and not peek(), but this method
36 /// should try to return a sensible result if the most recent call was
37 /// actually peek() because we don't always know.
38 ///
39 /// Only called in case of an error, so performance is not important.
40 #[doc(hidden)]
position(&self) -> Position41 fn position(&self) -> Position;
42
43 /// Position of the most recent call to peek().
44 ///
45 /// The most recent call was probably peek() and not next(), but this method
46 /// should try to return a sensible result if the most recent call was
47 /// actually next() because we don't always know.
48 ///
49 /// Only called in case of an error, so performance is not important.
50 #[doc(hidden)]
peek_position(&self) -> Position51 fn peek_position(&self) -> Position;
52
53 /// Offset from the beginning of the input to the next byte that would be
54 /// returned by next() or peek().
55 #[doc(hidden)]
byte_offset(&self) -> usize56 fn byte_offset(&self) -> usize;
57
58 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
59 /// string until the next quotation mark using the given scratch space if
60 /// necessary. The scratch space is initially empty.
61 #[doc(hidden)]
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>62 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
63
64 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
65 /// string until the next quotation mark using the given scratch space if
66 /// necessary. The scratch space is initially empty.
67 ///
68 /// This function returns the raw bytes in the string with escape sequences
69 /// expanded but without performing unicode validation.
70 #[doc(hidden)]
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>71 fn parse_str_raw<'s>(
72 &'s mut self,
73 scratch: &'s mut Vec<u8>,
74 ) -> Result<Reference<'de, 's, [u8]>>;
75
76 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
77 /// string until the next quotation mark but discards the data.
78 #[doc(hidden)]
ignore_str(&mut self) -> Result<()>79 fn ignore_str(&mut self) -> Result<()>;
80
81 /// Assumes the previous byte was a hex escape sequnce ('\u') in a string.
82 /// Parses next hexadecimal sequence.
83 #[doc(hidden)]
decode_hex_escape(&mut self) -> Result<u16>84 fn decode_hex_escape(&mut self) -> Result<u16>;
85
86 /// Switch raw buffering mode on.
87 ///
88 /// This is used when deserializing `RawValue`.
89 #[cfg(feature = "raw_value")]
90 #[doc(hidden)]
begin_raw_buffering(&mut self)91 fn begin_raw_buffering(&mut self);
92
93 /// Switch raw buffering mode off and provides the raw buffered data to the
94 /// given visitor.
95 #[cfg(feature = "raw_value")]
96 #[doc(hidden)]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>97 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
98 where
99 V: Visitor<'de>;
100
101 /// Whether StreamDeserializer::next needs to check the failed flag. True
102 /// for IoRead, false for StrRead and SliceRead which can track failure by
103 /// truncating their input slice to avoid the extra check on every next
104 /// call.
105 #[doc(hidden)]
106 const should_early_return_if_failed: bool;
107
108 /// Mark a persistent failure of StreamDeserializer, either by setting the
109 /// flag or by truncating the input data.
110 #[doc(hidden)]
set_failed(&mut self, failed: &mut bool)111 fn set_failed(&mut self, failed: &mut bool);
112 }
113
114 pub struct Position {
115 pub line: usize,
116 pub column: usize,
117 }
118
119 pub enum Reference<'b, 'c, T>
120 where
121 T: ?Sized + 'static,
122 {
123 Borrowed(&'b T),
124 Copied(&'c T),
125 }
126
127 impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
128 where
129 T: ?Sized + 'static,
130 {
131 type Target = T;
132
deref(&self) -> &Self::Target133 fn deref(&self) -> &Self::Target {
134 match *self {
135 Reference::Borrowed(b) => b,
136 Reference::Copied(c) => c,
137 }
138 }
139 }
140
141 /// JSON input source that reads from a std::io input stream.
142 #[cfg(feature = "std")]
143 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
144 pub struct IoRead<R>
145 where
146 R: io::Read,
147 {
148 iter: LineColIterator<io::Bytes<R>>,
149 /// Temporary storage of peeked byte.
150 ch: Option<u8>,
151 #[cfg(feature = "raw_value")]
152 raw_buffer: Option<Vec<u8>>,
153 }
154
155 /// JSON input source that reads from a slice of bytes.
156 //
157 // This is more efficient than other iterators because peek() can be read-only
158 // and we can compute line/col position only if an error happens.
159 pub struct SliceRead<'a> {
160 slice: &'a [u8],
161 /// Index of the *next* byte that will be returned by next() or peek().
162 index: usize,
163 #[cfg(feature = "raw_value")]
164 raw_buffering_start_index: usize,
165 }
166
167 /// JSON input source that reads from a UTF-8 string.
168 //
169 // Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
170 pub struct StrRead<'a> {
171 delegate: SliceRead<'a>,
172 #[cfg(feature = "raw_value")]
173 data: &'a str,
174 }
175
176 // Prevent users from implementing the Read trait.
177 mod private {
178 pub trait Sealed {}
179 }
180
181 //////////////////////////////////////////////////////////////////////////////
182
183 #[cfg(feature = "std")]
184 impl<R> IoRead<R>
185 where
186 R: io::Read,
187 {
188 /// Create a JSON input source to read from a std::io input stream.
new(reader: R) -> Self189 pub fn new(reader: R) -> Self {
190 IoRead {
191 iter: LineColIterator::new(reader.bytes()),
192 ch: None,
193 #[cfg(feature = "raw_value")]
194 raw_buffer: None,
195 }
196 }
197 }
198
199 #[cfg(feature = "std")]
200 impl<R> private::Sealed for IoRead<R> where R: io::Read {}
201
202 #[cfg(feature = "std")]
203 impl<R> IoRead<R>
204 where
205 R: io::Read,
206 {
parse_str_bytes<'s, T, F>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, result: F, ) -> Result<T> where T: 's, F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,207 fn parse_str_bytes<'s, T, F>(
208 &'s mut self,
209 scratch: &'s mut Vec<u8>,
210 validate: bool,
211 result: F,
212 ) -> Result<T>
213 where
214 T: 's,
215 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
216 {
217 loop {
218 let ch = tri!(next_or_eof(self));
219 if !ESCAPE[ch as usize] {
220 scratch.push(ch);
221 continue;
222 }
223 match ch {
224 b'"' => {
225 return result(self, scratch);
226 }
227 b'\\' => {
228 tri!(parse_escape(self, validate, scratch));
229 }
230 _ => {
231 if validate {
232 return error(self, ErrorCode::ControlCharacterWhileParsingString);
233 }
234 scratch.push(ch);
235 }
236 }
237 }
238 }
239 }
240
241 #[cfg(feature = "std")]
242 impl<'de, R> Read<'de> for IoRead<R>
243 where
244 R: io::Read,
245 {
246 #[inline]
next(&mut self) -> Result<Option<u8>>247 fn next(&mut self) -> Result<Option<u8>> {
248 match self.ch.take() {
249 Some(ch) => {
250 #[cfg(feature = "raw_value")]
251 {
252 if let Some(ref mut buf) = self.raw_buffer {
253 buf.push(ch);
254 }
255 }
256 Ok(Some(ch))
257 }
258 None => match self.iter.next() {
259 Some(Err(err)) => Err(Error::io(err)),
260 Some(Ok(ch)) => {
261 #[cfg(feature = "raw_value")]
262 {
263 if let Some(ref mut buf) = self.raw_buffer {
264 buf.push(ch);
265 }
266 }
267 Ok(Some(ch))
268 }
269 None => Ok(None),
270 },
271 }
272 }
273
274 #[inline]
peek(&mut self) -> Result<Option<u8>>275 fn peek(&mut self) -> Result<Option<u8>> {
276 match self.ch {
277 Some(ch) => Ok(Some(ch)),
278 None => match self.iter.next() {
279 Some(Err(err)) => Err(Error::io(err)),
280 Some(Ok(ch)) => {
281 self.ch = Some(ch);
282 Ok(self.ch)
283 }
284 None => Ok(None),
285 },
286 }
287 }
288
289 #[cfg(not(feature = "raw_value"))]
290 #[inline]
discard(&mut self)291 fn discard(&mut self) {
292 self.ch = None;
293 }
294
295 #[cfg(feature = "raw_value")]
discard(&mut self)296 fn discard(&mut self) {
297 if let Some(ch) = self.ch.take() {
298 if let Some(ref mut buf) = self.raw_buffer {
299 buf.push(ch);
300 }
301 }
302 }
303
position(&self) -> Position304 fn position(&self) -> Position {
305 Position {
306 line: self.iter.line(),
307 column: self.iter.col(),
308 }
309 }
310
peek_position(&self) -> Position311 fn peek_position(&self) -> Position {
312 // The LineColIterator updates its position during peek() so it has the
313 // right one here.
314 self.position()
315 }
316
byte_offset(&self) -> usize317 fn byte_offset(&self) -> usize {
318 match self.ch {
319 Some(_) => self.iter.byte_offset() - 1,
320 None => self.iter.byte_offset(),
321 }
322 }
323
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>324 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
325 self.parse_str_bytes(scratch, true, as_str)
326 .map(Reference::Copied)
327 }
328
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>329 fn parse_str_raw<'s>(
330 &'s mut self,
331 scratch: &'s mut Vec<u8>,
332 ) -> Result<Reference<'de, 's, [u8]>> {
333 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
334 .map(Reference::Copied)
335 }
336
ignore_str(&mut self) -> Result<()>337 fn ignore_str(&mut self) -> Result<()> {
338 loop {
339 let ch = tri!(next_or_eof(self));
340 if !ESCAPE[ch as usize] {
341 continue;
342 }
343 match ch {
344 b'"' => {
345 return Ok(());
346 }
347 b'\\' => {
348 tri!(ignore_escape(self));
349 }
350 _ => {
351 return error(self, ErrorCode::ControlCharacterWhileParsingString);
352 }
353 }
354 }
355 }
356
decode_hex_escape(&mut self) -> Result<u16>357 fn decode_hex_escape(&mut self) -> Result<u16> {
358 let mut n = 0;
359 for _ in 0..4 {
360 match decode_hex_val(tri!(next_or_eof(self))) {
361 None => return error(self, ErrorCode::InvalidEscape),
362 Some(val) => {
363 n = (n << 4) + val;
364 }
365 }
366 }
367 Ok(n)
368 }
369
370 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)371 fn begin_raw_buffering(&mut self) {
372 self.raw_buffer = Some(Vec::new());
373 }
374
375 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,376 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
377 where
378 V: Visitor<'de>,
379 {
380 let raw = self.raw_buffer.take().unwrap();
381 let raw = match String::from_utf8(raw) {
382 Ok(raw) => raw,
383 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
384 };
385 visitor.visit_map(OwnedRawDeserializer {
386 raw_value: Some(raw),
387 })
388 }
389
390 const should_early_return_if_failed: bool = true;
391
392 #[inline]
393 #[cold]
set_failed(&mut self, failed: &mut bool)394 fn set_failed(&mut self, failed: &mut bool) {
395 *failed = true;
396 }
397 }
398
399 //////////////////////////////////////////////////////////////////////////////
400
401 impl<'a> SliceRead<'a> {
402 /// Create a JSON input source to read from a slice of bytes.
new(slice: &'a [u8]) -> Self403 pub fn new(slice: &'a [u8]) -> Self {
404 SliceRead {
405 slice,
406 index: 0,
407 #[cfg(feature = "raw_value")]
408 raw_buffering_start_index: 0,
409 }
410 }
411
position_of_index(&self, i: usize) -> Position412 fn position_of_index(&self, i: usize) -> Position {
413 let mut position = Position { line: 1, column: 0 };
414 for ch in &self.slice[..i] {
415 match *ch {
416 b'\n' => {
417 position.line += 1;
418 position.column = 0;
419 }
420 _ => {
421 position.column += 1;
422 }
423 }
424 }
425 position
426 }
427
428 /// The big optimization here over IoRead is that if the string contains no
429 /// backslash escape sequences, the returned &str is a slice of the raw JSON
430 /// data so we avoid copying into the scratch space.
parse_str_bytes<'s, T, F>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, result: F, ) -> Result<Reference<'a, 's, T>> where T: ?Sized + 's, F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,431 fn parse_str_bytes<'s, T, F>(
432 &'s mut self,
433 scratch: &'s mut Vec<u8>,
434 validate: bool,
435 result: F,
436 ) -> Result<Reference<'a, 's, T>>
437 where
438 T: ?Sized + 's,
439 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
440 {
441 // Index of the first byte not yet copied into the scratch space.
442 let mut start = self.index;
443
444 loop {
445 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
446 self.index += 1;
447 }
448 if self.index == self.slice.len() {
449 return error(self, ErrorCode::EofWhileParsingString);
450 }
451 match self.slice[self.index] {
452 b'"' => {
453 if scratch.is_empty() {
454 // Fast path: return a slice of the raw JSON without any
455 // copying.
456 let borrowed = &self.slice[start..self.index];
457 self.index += 1;
458 return result(self, borrowed).map(Reference::Borrowed);
459 } else {
460 scratch.extend_from_slice(&self.slice[start..self.index]);
461 self.index += 1;
462 return result(self, scratch).map(Reference::Copied);
463 }
464 }
465 b'\\' => {
466 scratch.extend_from_slice(&self.slice[start..self.index]);
467 self.index += 1;
468 tri!(parse_escape(self, validate, scratch));
469 start = self.index;
470 }
471 _ => {
472 self.index += 1;
473 if validate {
474 return error(self, ErrorCode::ControlCharacterWhileParsingString);
475 }
476 }
477 }
478 }
479 }
480 }
481
482 impl<'a> private::Sealed for SliceRead<'a> {}
483
484 impl<'a> Read<'a> for SliceRead<'a> {
485 #[inline]
next(&mut self) -> Result<Option<u8>>486 fn next(&mut self) -> Result<Option<u8>> {
487 // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
488 // is about 10% slower.
489 Ok(if self.index < self.slice.len() {
490 let ch = self.slice[self.index];
491 self.index += 1;
492 Some(ch)
493 } else {
494 None
495 })
496 }
497
498 #[inline]
peek(&mut self) -> Result<Option<u8>>499 fn peek(&mut self) -> Result<Option<u8>> {
500 // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
501 // for some reason.
502 Ok(if self.index < self.slice.len() {
503 Some(self.slice[self.index])
504 } else {
505 None
506 })
507 }
508
509 #[inline]
discard(&mut self)510 fn discard(&mut self) {
511 self.index += 1;
512 }
513
position(&self) -> Position514 fn position(&self) -> Position {
515 self.position_of_index(self.index)
516 }
517
peek_position(&self) -> Position518 fn peek_position(&self) -> Position {
519 // Cap it at slice.len() just in case the most recent call was next()
520 // and it returned the last byte.
521 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
522 }
523
byte_offset(&self) -> usize524 fn byte_offset(&self) -> usize {
525 self.index
526 }
527
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>528 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
529 self.parse_str_bytes(scratch, true, as_str)
530 }
531
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>532 fn parse_str_raw<'s>(
533 &'s mut self,
534 scratch: &'s mut Vec<u8>,
535 ) -> Result<Reference<'a, 's, [u8]>> {
536 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
537 }
538
ignore_str(&mut self) -> Result<()>539 fn ignore_str(&mut self) -> Result<()> {
540 loop {
541 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
542 self.index += 1;
543 }
544 if self.index == self.slice.len() {
545 return error(self, ErrorCode::EofWhileParsingString);
546 }
547 match self.slice[self.index] {
548 b'"' => {
549 self.index += 1;
550 return Ok(());
551 }
552 b'\\' => {
553 self.index += 1;
554 tri!(ignore_escape(self));
555 }
556 _ => {
557 return error(self, ErrorCode::ControlCharacterWhileParsingString);
558 }
559 }
560 }
561 }
562
decode_hex_escape(&mut self) -> Result<u16>563 fn decode_hex_escape(&mut self) -> Result<u16> {
564 if self.index + 4 > self.slice.len() {
565 self.index = self.slice.len();
566 return error(self, ErrorCode::EofWhileParsingString);
567 }
568
569 let mut n = 0;
570 for _ in 0..4 {
571 let ch = decode_hex_val(self.slice[self.index]);
572 self.index += 1;
573 match ch {
574 None => return error(self, ErrorCode::InvalidEscape),
575 Some(val) => {
576 n = (n << 4) + val;
577 }
578 }
579 }
580 Ok(n)
581 }
582
583 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)584 fn begin_raw_buffering(&mut self) {
585 self.raw_buffering_start_index = self.index;
586 }
587
588 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,589 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
590 where
591 V: Visitor<'a>,
592 {
593 let raw = &self.slice[self.raw_buffering_start_index..self.index];
594 let raw = match str::from_utf8(raw) {
595 Ok(raw) => raw,
596 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
597 };
598 visitor.visit_map(BorrowedRawDeserializer {
599 raw_value: Some(raw),
600 })
601 }
602
603 const should_early_return_if_failed: bool = false;
604
605 #[inline]
606 #[cold]
set_failed(&mut self, _failed: &mut bool)607 fn set_failed(&mut self, _failed: &mut bool) {
608 self.slice = &self.slice[..self.index];
609 }
610 }
611
612 //////////////////////////////////////////////////////////////////////////////
613
614 impl<'a> StrRead<'a> {
615 /// Create a JSON input source to read from a UTF-8 string.
new(s: &'a str) -> Self616 pub fn new(s: &'a str) -> Self {
617 StrRead {
618 delegate: SliceRead::new(s.as_bytes()),
619 #[cfg(feature = "raw_value")]
620 data: s,
621 }
622 }
623 }
624
625 impl<'a> private::Sealed for StrRead<'a> {}
626
627 impl<'a> Read<'a> for StrRead<'a> {
628 #[inline]
next(&mut self) -> Result<Option<u8>>629 fn next(&mut self) -> Result<Option<u8>> {
630 self.delegate.next()
631 }
632
633 #[inline]
peek(&mut self) -> Result<Option<u8>>634 fn peek(&mut self) -> Result<Option<u8>> {
635 self.delegate.peek()
636 }
637
638 #[inline]
discard(&mut self)639 fn discard(&mut self) {
640 self.delegate.discard();
641 }
642
position(&self) -> Position643 fn position(&self) -> Position {
644 self.delegate.position()
645 }
646
peek_position(&self) -> Position647 fn peek_position(&self) -> Position {
648 self.delegate.peek_position()
649 }
650
byte_offset(&self) -> usize651 fn byte_offset(&self) -> usize {
652 self.delegate.byte_offset()
653 }
654
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>655 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
656 self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
657 // The deserialization input came in as &str with a UTF-8 guarantee,
658 // and the \u-escapes are checked along the way, so don't need to
659 // check here.
660 Ok(unsafe { str::from_utf8_unchecked(bytes) })
661 })
662 }
663
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>664 fn parse_str_raw<'s>(
665 &'s mut self,
666 scratch: &'s mut Vec<u8>,
667 ) -> Result<Reference<'a, 's, [u8]>> {
668 self.delegate.parse_str_raw(scratch)
669 }
670
ignore_str(&mut self) -> Result<()>671 fn ignore_str(&mut self) -> Result<()> {
672 self.delegate.ignore_str()
673 }
674
decode_hex_escape(&mut self) -> Result<u16>675 fn decode_hex_escape(&mut self) -> Result<u16> {
676 self.delegate.decode_hex_escape()
677 }
678
679 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)680 fn begin_raw_buffering(&mut self) {
681 self.delegate.begin_raw_buffering();
682 }
683
684 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,685 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
686 where
687 V: Visitor<'a>,
688 {
689 let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
690 visitor.visit_map(BorrowedRawDeserializer {
691 raw_value: Some(raw),
692 })
693 }
694
695 const should_early_return_if_failed: bool = false;
696
697 #[inline]
698 #[cold]
set_failed(&mut self, failed: &mut bool)699 fn set_failed(&mut self, failed: &mut bool) {
700 self.delegate.set_failed(failed);
701 }
702 }
703
704 //////////////////////////////////////////////////////////////////////////////
705
706 impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
707
708 impl<'a, 'de, R> Read<'de> for &'a mut R
709 where
710 R: Read<'de>,
711 {
next(&mut self) -> Result<Option<u8>>712 fn next(&mut self) -> Result<Option<u8>> {
713 R::next(self)
714 }
715
peek(&mut self) -> Result<Option<u8>>716 fn peek(&mut self) -> Result<Option<u8>> {
717 R::peek(self)
718 }
719
discard(&mut self)720 fn discard(&mut self) {
721 R::discard(self);
722 }
723
position(&self) -> Position724 fn position(&self) -> Position {
725 R::position(self)
726 }
727
peek_position(&self) -> Position728 fn peek_position(&self) -> Position {
729 R::peek_position(self)
730 }
731
byte_offset(&self) -> usize732 fn byte_offset(&self) -> usize {
733 R::byte_offset(self)
734 }
735
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>736 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
737 R::parse_str(self, scratch)
738 }
739
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>740 fn parse_str_raw<'s>(
741 &'s mut self,
742 scratch: &'s mut Vec<u8>,
743 ) -> Result<Reference<'de, 's, [u8]>> {
744 R::parse_str_raw(self, scratch)
745 }
746
ignore_str(&mut self) -> Result<()>747 fn ignore_str(&mut self) -> Result<()> {
748 R::ignore_str(self)
749 }
750
decode_hex_escape(&mut self) -> Result<u16>751 fn decode_hex_escape(&mut self) -> Result<u16> {
752 R::decode_hex_escape(self)
753 }
754
755 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)756 fn begin_raw_buffering(&mut self) {
757 R::begin_raw_buffering(self);
758 }
759
760 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,761 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
762 where
763 V: Visitor<'de>,
764 {
765 R::end_raw_buffering(self, visitor)
766 }
767
768 const should_early_return_if_failed: bool = R::should_early_return_if_failed;
769
set_failed(&mut self, failed: &mut bool)770 fn set_failed(&mut self, failed: &mut bool) {
771 R::set_failed(self, failed);
772 }
773 }
774
775 //////////////////////////////////////////////////////////////////////////////
776
777 /// Marker for whether StreamDeserializer can implement FusedIterator.
778 pub trait Fused: private::Sealed {}
779 impl<'a> Fused for SliceRead<'a> {}
780 impl<'a> Fused for StrRead<'a> {}
781
782 // Lookup table of bytes that must be escaped. A value of true at index i means
783 // that byte i requires an escape sequence in the input.
784 static ESCAPE: [bool; 256] = {
785 const CT: bool = true; // control character \x00..=\x1F
786 const QU: bool = true; // quote \x22
787 const BS: bool = true; // backslash \x5C
788 const __: bool = false; // allow unescaped
789 [
790 // 1 2 3 4 5 6 7 8 9 A B C D E F
791 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0
792 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
793 __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
794 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
795 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
796 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
797 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
798 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
799 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
800 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
801 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
802 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
803 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
804 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
805 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
806 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
807 ]
808 };
809
next_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,810 fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
811 where
812 R: ?Sized + Read<'de>,
813 {
814 match tri!(read.next()) {
815 Some(b) => Ok(b),
816 None => error(read, ErrorCode::EofWhileParsingString),
817 }
818 }
819
peek_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,820 fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
821 where
822 R: ?Sized + Read<'de>,
823 {
824 match tri!(read.peek()) {
825 Some(b) => Ok(b),
826 None => error(read, ErrorCode::EofWhileParsingString),
827 }
828 }
829
error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T> where R: ?Sized + Read<'de>,830 fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
831 where
832 R: ?Sized + Read<'de>,
833 {
834 let position = read.position();
835 Err(Error::syntax(reason, position.line, position.column))
836 }
837
as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str>838 fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
839 str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
840 }
841
842 /// Parses a JSON escape sequence and appends it into the scratch space. Assumes
843 /// the previous byte read was a backslash.
parse_escape<'de, R: Read<'de>>( read: &mut R, validate: bool, scratch: &mut Vec<u8>, ) -> Result<()>844 fn parse_escape<'de, R: Read<'de>>(
845 read: &mut R,
846 validate: bool,
847 scratch: &mut Vec<u8>,
848 ) -> Result<()> {
849 let ch = tri!(next_or_eof(read));
850
851 match ch {
852 b'"' => scratch.push(b'"'),
853 b'\\' => scratch.push(b'\\'),
854 b'/' => scratch.push(b'/'),
855 b'b' => scratch.push(b'\x08'),
856 b'f' => scratch.push(b'\x0c'),
857 b'n' => scratch.push(b'\n'),
858 b'r' => scratch.push(b'\r'),
859 b't' => scratch.push(b'\t'),
860 b'u' => {
861 fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
862 scratch.extend_from_slice(&[
863 (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
864 (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
865 (n & 0b0011_1111) as u8 | 0b1000_0000,
866 ]);
867 }
868
869 let c = match tri!(read.decode_hex_escape()) {
870 n @ 0xDC00..=0xDFFF => {
871 return if validate {
872 error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
873 } else {
874 encode_surrogate(scratch, n);
875 Ok(())
876 };
877 }
878
879 // Non-BMP characters are encoded as a sequence of two hex
880 // escapes, representing UTF-16 surrogates. If deserializing a
881 // utf-8 string the surrogates are required to be paired,
882 // whereas deserializing a byte string accepts lone surrogates.
883 n1 @ 0xD800..=0xDBFF => {
884 if tri!(peek_or_eof(read)) == b'\\' {
885 read.discard();
886 } else {
887 return if validate {
888 read.discard();
889 error(read, ErrorCode::UnexpectedEndOfHexEscape)
890 } else {
891 encode_surrogate(scratch, n1);
892 Ok(())
893 };
894 }
895
896 if tri!(peek_or_eof(read)) == b'u' {
897 read.discard();
898 } else {
899 return if validate {
900 read.discard();
901 error(read, ErrorCode::UnexpectedEndOfHexEscape)
902 } else {
903 encode_surrogate(scratch, n1);
904 // The \ prior to this byte started an escape sequence,
905 // so we need to parse that now. This recursive call
906 // does not blow the stack on malicious input because
907 // the escape is not \u, so it will be handled by one
908 // of the easy nonrecursive cases.
909 parse_escape(read, validate, scratch)
910 };
911 }
912
913 let n2 = tri!(read.decode_hex_escape());
914
915 if n2 < 0xDC00 || n2 > 0xDFFF {
916 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
917 }
918
919 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
920
921 match char::from_u32(n) {
922 Some(c) => c,
923 None => {
924 return error(read, ErrorCode::InvalidUnicodeCodePoint);
925 }
926 }
927 }
928
929 n => match char::from_u32(n as u32) {
930 Some(c) => c,
931 None => {
932 return error(read, ErrorCode::InvalidUnicodeCodePoint);
933 }
934 },
935 };
936
937 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
938 }
939 _ => {
940 return error(read, ErrorCode::InvalidEscape);
941 }
942 }
943
944 Ok(())
945 }
946
947 /// Parses a JSON escape sequence and discards the value. Assumes the previous
948 /// byte read was a backslash.
ignore_escape<'de, R>(read: &mut R) -> Result<()> where R: ?Sized + Read<'de>,949 fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
950 where
951 R: ?Sized + Read<'de>,
952 {
953 let ch = tri!(next_or_eof(read));
954
955 match ch {
956 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
957 b'u' => {
958 let n = match tri!(read.decode_hex_escape()) {
959 0xDC00..=0xDFFF => {
960 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
961 }
962
963 // Non-BMP characters are encoded as a sequence of
964 // two hex escapes, representing UTF-16 surrogates.
965 n1 @ 0xD800..=0xDBFF => {
966 if tri!(next_or_eof(read)) != b'\\' {
967 return error(read, ErrorCode::UnexpectedEndOfHexEscape);
968 }
969 if tri!(next_or_eof(read)) != b'u' {
970 return error(read, ErrorCode::UnexpectedEndOfHexEscape);
971 }
972
973 let n2 = tri!(read.decode_hex_escape());
974
975 if n2 < 0xDC00 || n2 > 0xDFFF {
976 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
977 }
978
979 (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000
980 }
981
982 n => n as u32,
983 };
984
985 if char::from_u32(n).is_none() {
986 return error(read, ErrorCode::InvalidUnicodeCodePoint);
987 }
988 }
989 _ => {
990 return error(read, ErrorCode::InvalidEscape);
991 }
992 }
993
994 Ok(())
995 }
996
997 static HEX: [u8; 256] = {
998 const __: u8 = 255; // not a hex digit
999 [
1000 // 1 2 3 4 5 6 7 8 9 A B C D E F
1001 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
1002 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
1003 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1004 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
1005 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
1006 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
1007 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
1008 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1009 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1010 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1011 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1012 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1013 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1014 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1015 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1016 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1017 ]
1018 };
1019
decode_hex_val(val: u8) -> Option<u16>1020 fn decode_hex_val(val: u8) -> Option<u16> {
1021 let n = HEX[val as usize] as u16;
1022 if n == 255 {
1023 None
1024 } else {
1025 Some(n)
1026 }
1027 }
1028