1 use crate::error::{Error, ErrorCode, Result};
2 use alloc::vec::Vec;
3 use core::char;
4 use core::cmp;
5 use core::ops::Deref;
6 use core::str;
7
8 #[cfg(feature = "std")]
9 use crate::io;
10 #[cfg(feature = "std")]
11 use crate::iter::LineColIterator;
12
13 #[cfg(feature = "raw_value")]
14 use crate::raw::BorrowedRawDeserializer;
15 #[cfg(all(feature = "raw_value", feature = "std"))]
16 use crate::raw::OwnedRawDeserializer;
17 #[cfg(feature = "raw_value")]
18 use serde::de::Visitor;
19
20 /// Trait used by the deserializer for iterating over input. This is manually
21 /// "specialized" for iterating over &[u8]. Once feature(specialization) is
22 /// stable we can use actual specialization.
23 ///
24 /// This trait is sealed and cannot be implemented for types outside of
25 /// `serde_json`.
26 pub trait Read<'de>: private::Sealed {
27 #[doc(hidden)]
next(&mut self) -> Result<Option<u8>>28 fn next(&mut self) -> Result<Option<u8>>;
29 #[doc(hidden)]
peek(&mut self) -> Result<Option<u8>>30 fn peek(&mut self) -> Result<Option<u8>>;
31
32 /// Only valid after a call to peek(). Discards the peeked byte.
33 #[doc(hidden)]
discard(&mut self)34 fn discard(&mut self);
35
36 /// Position of the most recent call to next().
37 ///
38 /// The most recent call was probably next() and not peek(), but this method
39 /// should try to return a sensible result if the most recent call was
40 /// actually peek() because we don't always know.
41 ///
42 /// Only called in case of an error, so performance is not important.
43 #[doc(hidden)]
position(&self) -> Position44 fn position(&self) -> Position;
45
46 /// Position of the most recent call to peek().
47 ///
48 /// The most recent call was probably peek() and not next(), but this method
49 /// should try to return a sensible result if the most recent call was
50 /// actually next() because we don't always know.
51 ///
52 /// Only called in case of an error, so performance is not important.
53 #[doc(hidden)]
peek_position(&self) -> Position54 fn peek_position(&self) -> Position;
55
56 /// Offset from the beginning of the input to the next byte that would be
57 /// returned by next() or peek().
58 #[doc(hidden)]
byte_offset(&self) -> usize59 fn byte_offset(&self) -> usize;
60
61 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
62 /// string until the next quotation mark using the given scratch space if
63 /// necessary. The scratch space is initially empty.
64 #[doc(hidden)]
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>65 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
66
67 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
68 /// string until the next quotation mark using the given scratch space if
69 /// necessary. The scratch space is initially empty.
70 ///
71 /// This function returns the raw bytes in the string with escape sequences
72 /// expanded but without performing unicode validation.
73 #[doc(hidden)]
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>74 fn parse_str_raw<'s>(
75 &'s mut self,
76 scratch: &'s mut Vec<u8>,
77 ) -> Result<Reference<'de, 's, [u8]>>;
78
79 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
80 /// string until the next quotation mark but discards the data.
81 #[doc(hidden)]
ignore_str(&mut self) -> Result<()>82 fn ignore_str(&mut self) -> Result<()>;
83
84 /// Assumes the previous byte was a hex escape sequnce ('\u') in a string.
85 /// Parses next hexadecimal sequence.
86 #[doc(hidden)]
decode_hex_escape(&mut self) -> Result<u16>87 fn decode_hex_escape(&mut self) -> Result<u16>;
88
89 /// Switch raw buffering mode on.
90 ///
91 /// This is used when deserializing `RawValue`.
92 #[cfg(feature = "raw_value")]
93 #[doc(hidden)]
begin_raw_buffering(&mut self)94 fn begin_raw_buffering(&mut self);
95
96 /// Switch raw buffering mode off and provides the raw buffered data to the
97 /// given visitor.
98 #[cfg(feature = "raw_value")]
99 #[doc(hidden)]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>100 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
101 where
102 V: Visitor<'de>;
103
104 /// Whether StreamDeserializer::next needs to check the failed flag. True
105 /// for IoRead, false for StrRead and SliceRead which can track failure by
106 /// truncating their input slice to avoid the extra check on every next
107 /// call.
108 #[doc(hidden)]
109 const should_early_return_if_failed: bool;
110
111 /// Mark a persistent failure of StreamDeserializer, either by setting the
112 /// flag or by truncating the input data.
113 #[doc(hidden)]
set_failed(&mut self, failed: &mut bool)114 fn set_failed(&mut self, failed: &mut bool);
115 }
116
117 pub struct Position {
118 pub line: usize,
119 pub column: usize,
120 }
121
122 pub enum Reference<'b, 'c, T>
123 where
124 T: ?Sized + 'static,
125 {
126 Borrowed(&'b T),
127 Copied(&'c T),
128 }
129
130 impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
131 where
132 T: ?Sized + 'static,
133 {
134 type Target = T;
135
deref(&self) -> &Self::Target136 fn deref(&self) -> &Self::Target {
137 match *self {
138 Reference::Borrowed(b) => b,
139 Reference::Copied(c) => c,
140 }
141 }
142 }
143
144 /// JSON input source that reads from a std::io input stream.
145 #[cfg(feature = "std")]
146 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
147 pub struct IoRead<R>
148 where
149 R: io::Read,
150 {
151 iter: LineColIterator<io::Bytes<R>>,
152 /// Temporary storage of peeked byte.
153 ch: Option<u8>,
154 #[cfg(feature = "raw_value")]
155 raw_buffer: Option<Vec<u8>>,
156 }
157
158 /// JSON input source that reads from a slice of bytes.
159 //
160 // This is more efficient than other iterators because peek() can be read-only
161 // and we can compute line/col position only if an error happens.
162 pub struct SliceRead<'a> {
163 slice: &'a [u8],
164 /// Index of the *next* byte that will be returned by next() or peek().
165 index: usize,
166 #[cfg(feature = "raw_value")]
167 raw_buffering_start_index: usize,
168 }
169
170 /// JSON input source that reads from a UTF-8 string.
171 //
172 // Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
173 pub struct StrRead<'a> {
174 delegate: SliceRead<'a>,
175 #[cfg(feature = "raw_value")]
176 data: &'a str,
177 }
178
179 // Prevent users from implementing the Read trait.
180 mod private {
181 pub trait Sealed {}
182 }
183
184 //////////////////////////////////////////////////////////////////////////////
185
186 #[cfg(feature = "std")]
187 impl<R> IoRead<R>
188 where
189 R: io::Read,
190 {
191 /// Create a JSON input source to read from a std::io input stream.
new(reader: R) -> Self192 pub fn new(reader: R) -> Self {
193 IoRead {
194 iter: LineColIterator::new(reader.bytes()),
195 ch: None,
196 #[cfg(feature = "raw_value")]
197 raw_buffer: None,
198 }
199 }
200 }
201
202 #[cfg(feature = "std")]
203 impl<R> private::Sealed for IoRead<R> where R: io::Read {}
204
205 #[cfg(feature = "std")]
206 impl<R> IoRead<R>
207 where
208 R: io::Read,
209 {
parse_str_bytes<'s, T, F>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, result: F, ) -> Result<T> where T: 's, F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,210 fn parse_str_bytes<'s, T, F>(
211 &'s mut self,
212 scratch: &'s mut Vec<u8>,
213 validate: bool,
214 result: F,
215 ) -> Result<T>
216 where
217 T: 's,
218 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
219 {
220 loop {
221 let ch = tri!(next_or_eof(self));
222 if !ESCAPE[ch as usize] {
223 scratch.push(ch);
224 continue;
225 }
226 match ch {
227 b'"' => {
228 return result(self, scratch);
229 }
230 b'\\' => {
231 tri!(parse_escape(self, validate, scratch));
232 }
233 _ => {
234 if validate {
235 return error(self, ErrorCode::ControlCharacterWhileParsingString);
236 }
237 scratch.push(ch);
238 }
239 }
240 }
241 }
242 }
243
244 #[cfg(feature = "std")]
245 impl<'de, R> Read<'de> for IoRead<R>
246 where
247 R: io::Read,
248 {
249 #[inline]
next(&mut self) -> Result<Option<u8>>250 fn next(&mut self) -> Result<Option<u8>> {
251 match self.ch.take() {
252 Some(ch) => {
253 #[cfg(feature = "raw_value")]
254 {
255 if let Some(ref mut buf) = self.raw_buffer {
256 buf.push(ch);
257 }
258 }
259 Ok(Some(ch))
260 }
261 None => match self.iter.next() {
262 Some(Err(err)) => Err(Error::io(err)),
263 Some(Ok(ch)) => {
264 #[cfg(feature = "raw_value")]
265 {
266 if let Some(ref mut buf) = self.raw_buffer {
267 buf.push(ch);
268 }
269 }
270 Ok(Some(ch))
271 }
272 None => Ok(None),
273 },
274 }
275 }
276
277 #[inline]
peek(&mut self) -> Result<Option<u8>>278 fn peek(&mut self) -> Result<Option<u8>> {
279 match self.ch {
280 Some(ch) => Ok(Some(ch)),
281 None => match self.iter.next() {
282 Some(Err(err)) => Err(Error::io(err)),
283 Some(Ok(ch)) => {
284 self.ch = Some(ch);
285 Ok(self.ch)
286 }
287 None => Ok(None),
288 },
289 }
290 }
291
292 #[cfg(not(feature = "raw_value"))]
293 #[inline]
discard(&mut self)294 fn discard(&mut self) {
295 self.ch = None;
296 }
297
298 #[cfg(feature = "raw_value")]
discard(&mut self)299 fn discard(&mut self) {
300 if let Some(ch) = self.ch.take() {
301 if let Some(ref mut buf) = self.raw_buffer {
302 buf.push(ch);
303 }
304 }
305 }
306
position(&self) -> Position307 fn position(&self) -> Position {
308 Position {
309 line: self.iter.line(),
310 column: self.iter.col(),
311 }
312 }
313
peek_position(&self) -> Position314 fn peek_position(&self) -> Position {
315 // The LineColIterator updates its position during peek() so it has the
316 // right one here.
317 self.position()
318 }
319
byte_offset(&self) -> usize320 fn byte_offset(&self) -> usize {
321 match self.ch {
322 Some(_) => self.iter.byte_offset() - 1,
323 None => self.iter.byte_offset(),
324 }
325 }
326
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>327 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
328 self.parse_str_bytes(scratch, true, as_str)
329 .map(Reference::Copied)
330 }
331
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>332 fn parse_str_raw<'s>(
333 &'s mut self,
334 scratch: &'s mut Vec<u8>,
335 ) -> Result<Reference<'de, 's, [u8]>> {
336 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
337 .map(Reference::Copied)
338 }
339
ignore_str(&mut self) -> Result<()>340 fn ignore_str(&mut self) -> Result<()> {
341 loop {
342 let ch = tri!(next_or_eof(self));
343 if !ESCAPE[ch as usize] {
344 continue;
345 }
346 match ch {
347 b'"' => {
348 return Ok(());
349 }
350 b'\\' => {
351 tri!(ignore_escape(self));
352 }
353 _ => {
354 return error(self, ErrorCode::ControlCharacterWhileParsingString);
355 }
356 }
357 }
358 }
359
decode_hex_escape(&mut self) -> Result<u16>360 fn decode_hex_escape(&mut self) -> Result<u16> {
361 let mut n = 0;
362 for _ in 0..4 {
363 match decode_hex_val(tri!(next_or_eof(self))) {
364 None => return error(self, ErrorCode::InvalidEscape),
365 Some(val) => {
366 n = (n << 4) + val;
367 }
368 }
369 }
370 Ok(n)
371 }
372
373 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)374 fn begin_raw_buffering(&mut self) {
375 self.raw_buffer = Some(Vec::new());
376 }
377
378 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,379 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
380 where
381 V: Visitor<'de>,
382 {
383 let raw = self.raw_buffer.take().unwrap();
384 let raw = match String::from_utf8(raw) {
385 Ok(raw) => raw,
386 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
387 };
388 visitor.visit_map(OwnedRawDeserializer {
389 raw_value: Some(raw),
390 })
391 }
392
393 const should_early_return_if_failed: bool = true;
394
395 #[inline]
396 #[cold]
set_failed(&mut self, failed: &mut bool)397 fn set_failed(&mut self, failed: &mut bool) {
398 *failed = true;
399 }
400 }
401
402 //////////////////////////////////////////////////////////////////////////////
403
404 impl<'a> SliceRead<'a> {
405 /// Create a JSON input source to read from a slice of bytes.
new(slice: &'a [u8]) -> Self406 pub fn new(slice: &'a [u8]) -> Self {
407 SliceRead {
408 slice,
409 index: 0,
410 #[cfg(feature = "raw_value")]
411 raw_buffering_start_index: 0,
412 }
413 }
414
position_of_index(&self, i: usize) -> Position415 fn position_of_index(&self, i: usize) -> Position {
416 let mut position = Position { line: 1, column: 0 };
417 for ch in &self.slice[..i] {
418 match *ch {
419 b'\n' => {
420 position.line += 1;
421 position.column = 0;
422 }
423 _ => {
424 position.column += 1;
425 }
426 }
427 }
428 position
429 }
430
431 /// The big optimization here over IoRead is that if the string contains no
432 /// backslash escape sequences, the returned &str is a slice of the raw JSON
433 /// data so we avoid copying into the scratch space.
parse_str_bytes<'s, T, F>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, result: F, ) -> Result<Reference<'a, 's, T>> where T: ?Sized + 's, F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,434 fn parse_str_bytes<'s, T, F>(
435 &'s mut self,
436 scratch: &'s mut Vec<u8>,
437 validate: bool,
438 result: F,
439 ) -> Result<Reference<'a, 's, T>>
440 where
441 T: ?Sized + 's,
442 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
443 {
444 // Index of the first byte not yet copied into the scratch space.
445 let mut start = self.index;
446
447 loop {
448 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
449 self.index += 1;
450 }
451 if self.index == self.slice.len() {
452 return error(self, ErrorCode::EofWhileParsingString);
453 }
454 match self.slice[self.index] {
455 b'"' => {
456 if scratch.is_empty() {
457 // Fast path: return a slice of the raw JSON without any
458 // copying.
459 let borrowed = &self.slice[start..self.index];
460 self.index += 1;
461 return result(self, borrowed).map(Reference::Borrowed);
462 } else {
463 scratch.extend_from_slice(&self.slice[start..self.index]);
464 self.index += 1;
465 return result(self, scratch).map(Reference::Copied);
466 }
467 }
468 b'\\' => {
469 scratch.extend_from_slice(&self.slice[start..self.index]);
470 self.index += 1;
471 tri!(parse_escape(self, validate, scratch));
472 start = self.index;
473 }
474 _ => {
475 self.index += 1;
476 if validate {
477 return error(self, ErrorCode::ControlCharacterWhileParsingString);
478 }
479 }
480 }
481 }
482 }
483 }
484
485 impl<'a> private::Sealed for SliceRead<'a> {}
486
487 impl<'a> Read<'a> for SliceRead<'a> {
488 #[inline]
next(&mut self) -> Result<Option<u8>>489 fn next(&mut self) -> Result<Option<u8>> {
490 // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
491 // is about 10% slower.
492 Ok(if self.index < self.slice.len() {
493 let ch = self.slice[self.index];
494 self.index += 1;
495 Some(ch)
496 } else {
497 None
498 })
499 }
500
501 #[inline]
peek(&mut self) -> Result<Option<u8>>502 fn peek(&mut self) -> Result<Option<u8>> {
503 // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
504 // for some reason.
505 Ok(if self.index < self.slice.len() {
506 Some(self.slice[self.index])
507 } else {
508 None
509 })
510 }
511
512 #[inline]
discard(&mut self)513 fn discard(&mut self) {
514 self.index += 1;
515 }
516
position(&self) -> Position517 fn position(&self) -> Position {
518 self.position_of_index(self.index)
519 }
520
peek_position(&self) -> Position521 fn peek_position(&self) -> Position {
522 // Cap it at slice.len() just in case the most recent call was next()
523 // and it returned the last byte.
524 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
525 }
526
byte_offset(&self) -> usize527 fn byte_offset(&self) -> usize {
528 self.index
529 }
530
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>531 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
532 self.parse_str_bytes(scratch, true, as_str)
533 }
534
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>535 fn parse_str_raw<'s>(
536 &'s mut self,
537 scratch: &'s mut Vec<u8>,
538 ) -> Result<Reference<'a, 's, [u8]>> {
539 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
540 }
541
ignore_str(&mut self) -> Result<()>542 fn ignore_str(&mut self) -> Result<()> {
543 loop {
544 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
545 self.index += 1;
546 }
547 if self.index == self.slice.len() {
548 return error(self, ErrorCode::EofWhileParsingString);
549 }
550 match self.slice[self.index] {
551 b'"' => {
552 self.index += 1;
553 return Ok(());
554 }
555 b'\\' => {
556 self.index += 1;
557 tri!(ignore_escape(self));
558 }
559 _ => {
560 return error(self, ErrorCode::ControlCharacterWhileParsingString);
561 }
562 }
563 }
564 }
565
decode_hex_escape(&mut self) -> Result<u16>566 fn decode_hex_escape(&mut self) -> Result<u16> {
567 if self.index + 4 > self.slice.len() {
568 self.index = self.slice.len();
569 return error(self, ErrorCode::EofWhileParsingString);
570 }
571
572 let mut n = 0;
573 for _ in 0..4 {
574 let ch = decode_hex_val(self.slice[self.index]);
575 self.index += 1;
576 match ch {
577 None => return error(self, ErrorCode::InvalidEscape),
578 Some(val) => {
579 n = (n << 4) + val;
580 }
581 }
582 }
583 Ok(n)
584 }
585
586 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)587 fn begin_raw_buffering(&mut self) {
588 self.raw_buffering_start_index = self.index;
589 }
590
591 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,592 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
593 where
594 V: Visitor<'a>,
595 {
596 let raw = &self.slice[self.raw_buffering_start_index..self.index];
597 let raw = match str::from_utf8(raw) {
598 Ok(raw) => raw,
599 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
600 };
601 visitor.visit_map(BorrowedRawDeserializer {
602 raw_value: Some(raw),
603 })
604 }
605
606 const should_early_return_if_failed: bool = false;
607
608 #[inline]
609 #[cold]
set_failed(&mut self, _failed: &mut bool)610 fn set_failed(&mut self, _failed: &mut bool) {
611 self.slice = &self.slice[..self.index];
612 }
613 }
614
615 //////////////////////////////////////////////////////////////////////////////
616
617 impl<'a> StrRead<'a> {
618 /// Create a JSON input source to read from a UTF-8 string.
new(s: &'a str) -> Self619 pub fn new(s: &'a str) -> Self {
620 StrRead {
621 delegate: SliceRead::new(s.as_bytes()),
622 #[cfg(feature = "raw_value")]
623 data: s,
624 }
625 }
626 }
627
628 impl<'a> private::Sealed for StrRead<'a> {}
629
630 impl<'a> Read<'a> for StrRead<'a> {
631 #[inline]
next(&mut self) -> Result<Option<u8>>632 fn next(&mut self) -> Result<Option<u8>> {
633 self.delegate.next()
634 }
635
636 #[inline]
peek(&mut self) -> Result<Option<u8>>637 fn peek(&mut self) -> Result<Option<u8>> {
638 self.delegate.peek()
639 }
640
641 #[inline]
discard(&mut self)642 fn discard(&mut self) {
643 self.delegate.discard();
644 }
645
position(&self) -> Position646 fn position(&self) -> Position {
647 self.delegate.position()
648 }
649
peek_position(&self) -> Position650 fn peek_position(&self) -> Position {
651 self.delegate.peek_position()
652 }
653
byte_offset(&self) -> usize654 fn byte_offset(&self) -> usize {
655 self.delegate.byte_offset()
656 }
657
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>658 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
659 self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
660 // The deserialization input came in as &str with a UTF-8 guarantee,
661 // and the \u-escapes are checked along the way, so don't need to
662 // check here.
663 Ok(unsafe { str::from_utf8_unchecked(bytes) })
664 })
665 }
666
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>667 fn parse_str_raw<'s>(
668 &'s mut self,
669 scratch: &'s mut Vec<u8>,
670 ) -> Result<Reference<'a, 's, [u8]>> {
671 self.delegate.parse_str_raw(scratch)
672 }
673
ignore_str(&mut self) -> Result<()>674 fn ignore_str(&mut self) -> Result<()> {
675 self.delegate.ignore_str()
676 }
677
decode_hex_escape(&mut self) -> Result<u16>678 fn decode_hex_escape(&mut self) -> Result<u16> {
679 self.delegate.decode_hex_escape()
680 }
681
682 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)683 fn begin_raw_buffering(&mut self) {
684 self.delegate.begin_raw_buffering();
685 }
686
687 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,688 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
689 where
690 V: Visitor<'a>,
691 {
692 let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
693 visitor.visit_map(BorrowedRawDeserializer {
694 raw_value: Some(raw),
695 })
696 }
697
698 const should_early_return_if_failed: bool = false;
699
700 #[inline]
701 #[cold]
set_failed(&mut self, failed: &mut bool)702 fn set_failed(&mut self, failed: &mut bool) {
703 self.delegate.set_failed(failed);
704 }
705 }
706
707 //////////////////////////////////////////////////////////////////////////////
708
709 impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
710
711 impl<'a, 'de, R> Read<'de> for &'a mut R
712 where
713 R: Read<'de>,
714 {
next(&mut self) -> Result<Option<u8>>715 fn next(&mut self) -> Result<Option<u8>> {
716 R::next(self)
717 }
718
peek(&mut self) -> Result<Option<u8>>719 fn peek(&mut self) -> Result<Option<u8>> {
720 R::peek(self)
721 }
722
discard(&mut self)723 fn discard(&mut self) {
724 R::discard(self);
725 }
726
position(&self) -> Position727 fn position(&self) -> Position {
728 R::position(self)
729 }
730
peek_position(&self) -> Position731 fn peek_position(&self) -> Position {
732 R::peek_position(self)
733 }
734
byte_offset(&self) -> usize735 fn byte_offset(&self) -> usize {
736 R::byte_offset(self)
737 }
738
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>739 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
740 R::parse_str(self, scratch)
741 }
742
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>743 fn parse_str_raw<'s>(
744 &'s mut self,
745 scratch: &'s mut Vec<u8>,
746 ) -> Result<Reference<'de, 's, [u8]>> {
747 R::parse_str_raw(self, scratch)
748 }
749
ignore_str(&mut self) -> Result<()>750 fn ignore_str(&mut self) -> Result<()> {
751 R::ignore_str(self)
752 }
753
decode_hex_escape(&mut self) -> Result<u16>754 fn decode_hex_escape(&mut self) -> Result<u16> {
755 R::decode_hex_escape(self)
756 }
757
758 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)759 fn begin_raw_buffering(&mut self) {
760 R::begin_raw_buffering(self);
761 }
762
763 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,764 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
765 where
766 V: Visitor<'de>,
767 {
768 R::end_raw_buffering(self, visitor)
769 }
770
771 const should_early_return_if_failed: bool = R::should_early_return_if_failed;
772
set_failed(&mut self, failed: &mut bool)773 fn set_failed(&mut self, failed: &mut bool) {
774 R::set_failed(self, failed);
775 }
776 }
777
778 //////////////////////////////////////////////////////////////////////////////
779
780 /// Marker for whether StreamDeserializer can implement FusedIterator.
781 pub trait Fused: private::Sealed {}
782 impl<'a> Fused for SliceRead<'a> {}
783 impl<'a> Fused for StrRead<'a> {}
784
785 // Lookup table of bytes that must be escaped. A value of true at index i means
786 // that byte i requires an escape sequence in the input.
787 static ESCAPE: [bool; 256] = {
788 const CT: bool = true; // control character \x00..=\x1F
789 const QU: bool = true; // quote \x22
790 const BS: bool = true; // backslash \x5C
791 const __: bool = false; // allow unescaped
792 [
793 // 1 2 3 4 5 6 7 8 9 A B C D E F
794 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0
795 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
796 __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
797 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
798 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
799 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
800 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
801 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
802 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
803 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
804 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
805 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
806 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
807 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
808 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
809 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
810 ]
811 };
812
next_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,813 fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
814 where
815 R: ?Sized + Read<'de>,
816 {
817 match tri!(read.next()) {
818 Some(b) => Ok(b),
819 None => error(read, ErrorCode::EofWhileParsingString),
820 }
821 }
822
peek_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,823 fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
824 where
825 R: ?Sized + Read<'de>,
826 {
827 match tri!(read.peek()) {
828 Some(b) => Ok(b),
829 None => error(read, ErrorCode::EofWhileParsingString),
830 }
831 }
832
error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T> where R: ?Sized + Read<'de>,833 fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
834 where
835 R: ?Sized + Read<'de>,
836 {
837 let position = read.position();
838 Err(Error::syntax(reason, position.line, position.column))
839 }
840
as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str>841 fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
842 str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
843 }
844
845 /// Parses a JSON escape sequence and appends it into the scratch space. Assumes
846 /// the previous byte read was a backslash.
parse_escape<'de, R: Read<'de>>( read: &mut R, validate: bool, scratch: &mut Vec<u8>, ) -> Result<()>847 fn parse_escape<'de, R: Read<'de>>(
848 read: &mut R,
849 validate: bool,
850 scratch: &mut Vec<u8>,
851 ) -> Result<()> {
852 let ch = tri!(next_or_eof(read));
853
854 match ch {
855 b'"' => scratch.push(b'"'),
856 b'\\' => scratch.push(b'\\'),
857 b'/' => scratch.push(b'/'),
858 b'b' => scratch.push(b'\x08'),
859 b'f' => scratch.push(b'\x0c'),
860 b'n' => scratch.push(b'\n'),
861 b'r' => scratch.push(b'\r'),
862 b't' => scratch.push(b'\t'),
863 b'u' => {
864 fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
865 scratch.extend_from_slice(&[
866 (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
867 (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
868 (n & 0b0011_1111) as u8 | 0b1000_0000,
869 ]);
870 }
871
872 let c = match tri!(read.decode_hex_escape()) {
873 n @ 0xDC00..=0xDFFF => {
874 return if validate {
875 error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
876 } else {
877 encode_surrogate(scratch, n);
878 Ok(())
879 };
880 }
881
882 // Non-BMP characters are encoded as a sequence of two hex
883 // escapes, representing UTF-16 surrogates. If deserializing a
884 // utf-8 string the surrogates are required to be paired,
885 // whereas deserializing a byte string accepts lone surrogates.
886 n1 @ 0xD800..=0xDBFF => {
887 if tri!(peek_or_eof(read)) == b'\\' {
888 read.discard();
889 } else {
890 return if validate {
891 read.discard();
892 error(read, ErrorCode::UnexpectedEndOfHexEscape)
893 } else {
894 encode_surrogate(scratch, n1);
895 Ok(())
896 };
897 }
898
899 if tri!(peek_or_eof(read)) == b'u' {
900 read.discard();
901 } else {
902 return if validate {
903 read.discard();
904 error(read, ErrorCode::UnexpectedEndOfHexEscape)
905 } else {
906 encode_surrogate(scratch, n1);
907 // The \ prior to this byte started an escape sequence,
908 // so we need to parse that now. This recursive call
909 // does not blow the stack on malicious input because
910 // the escape is not \u, so it will be handled by one
911 // of the easy nonrecursive cases.
912 parse_escape(read, validate, scratch)
913 };
914 }
915
916 let n2 = tri!(read.decode_hex_escape());
917
918 if n2 < 0xDC00 || n2 > 0xDFFF {
919 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
920 }
921
922 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
923
924 match char::from_u32(n) {
925 Some(c) => c,
926 None => {
927 return error(read, ErrorCode::InvalidUnicodeCodePoint);
928 }
929 }
930 }
931
932 // Every u16 outside of the surrogate ranges above is guaranteed
933 // to be a legal char.
934 n => char::from_u32(n as u32).unwrap(),
935 };
936
937 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
938 }
939 _ => {
940 return error(read, ErrorCode::InvalidEscape);
941 }
942 }
943
944 Ok(())
945 }
946
947 /// Parses a JSON escape sequence and discards the value. Assumes the previous
948 /// byte read was a backslash.
ignore_escape<'de, R>(read: &mut R) -> Result<()> where R: ?Sized + Read<'de>,949 fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
950 where
951 R: ?Sized + Read<'de>,
952 {
953 let ch = tri!(next_or_eof(read));
954
955 match ch {
956 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
957 b'u' => match tri!(read.decode_hex_escape()) {
958 0xDC00..=0xDFFF => {
959 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
960 }
961
962 // Non-BMP characters are encoded as a sequence of
963 // two hex escapes, representing UTF-16 surrogates.
964 n1 @ 0xD800..=0xDBFF => {
965 if tri!(next_or_eof(read)) != b'\\' {
966 return error(read, ErrorCode::UnexpectedEndOfHexEscape);
967 }
968 if tri!(next_or_eof(read)) != b'u' {
969 return error(read, ErrorCode::UnexpectedEndOfHexEscape);
970 }
971
972 let n2 = tri!(read.decode_hex_escape());
973 if n2 < 0xDC00 || n2 > 0xDFFF {
974 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
975 }
976
977 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
978 if char::from_u32(n).is_none() {
979 return error(read, ErrorCode::InvalidUnicodeCodePoint);
980 }
981 }
982
983 _ => {}
984 },
985 _ => {
986 return error(read, ErrorCode::InvalidEscape);
987 }
988 }
989
990 Ok(())
991 }
992
993 static HEX: [u8; 256] = {
994 const __: u8 = 255; // not a hex digit
995 [
996 // 1 2 3 4 5 6 7 8 9 A B C D E F
997 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
998 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
999 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1000 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
1001 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
1002 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
1003 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
1004 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1005 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1006 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1007 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1008 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1009 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1010 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1011 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1012 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1013 ]
1014 };
1015
decode_hex_val(val: u8) -> Option<u16>1016 fn decode_hex_val(val: u8) -> Option<u16> {
1017 let n = HEX[val as usize] as u16;
1018 if n == 255 {
1019 None
1020 } else {
1021 Some(n)
1022 }
1023 }
1024