1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17 
18 /*! \file reader.h */
19 
20 #include "allocators.h"
21 #include "stream.h"
22 #include "encodedstream.h"
23 #include "internal/clzll.h"
24 #include "internal/meta.h"
25 #include "internal/stack.h"
26 #include "internal/strtod.h"
27 #include <limits>
28 
29 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
30 #include <intrin.h>
31 #pragma intrinsic(_BitScanForward)
32 #endif
33 #ifdef RAPIDJSON_SSE42
34 #include <nmmintrin.h>
35 #elif defined(RAPIDJSON_SSE2)
36 #include <emmintrin.h>
37 #elif defined(RAPIDJSON_NEON)
38 #include <arm_neon.h>
39 #endif
40 
41 #ifdef __clang__
42 RAPIDJSON_DIAG_PUSH
43 RAPIDJSON_DIAG_OFF(old-style-cast)
44 RAPIDJSON_DIAG_OFF(padded)
45 RAPIDJSON_DIAG_OFF(switch-enum)
46 #elif defined(_MSC_VER)
47 RAPIDJSON_DIAG_PUSH
48 RAPIDJSON_DIAG_OFF(4127)  // conditional expression is constant
49 RAPIDJSON_DIAG_OFF(4702)  // unreachable code
50 #endif
51 
52 #ifdef __GNUC__
53 RAPIDJSON_DIAG_PUSH
54 RAPIDJSON_DIAG_OFF(effc++)
55 #endif
56 
57 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
58 #define RAPIDJSON_NOTHING /* deliberately empty */
59 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
60 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
61     RAPIDJSON_MULTILINEMACRO_BEGIN \
62     if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
63     RAPIDJSON_MULTILINEMACRO_END
64 #endif
65 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
66     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
67 //!@endcond
68 
69 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
70     \ingroup RAPIDJSON_ERRORS
71     \brief Macro to indicate a parse error.
72     \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
73     \param offset  position of the error in JSON input (\c size_t)
74 
75     This macros can be used as a customization point for the internal
76     error handling mechanism of RapidJSON.
77 
78     A common usage model is to throw an exception instead of requiring the
79     caller to explicitly check the \ref rapidjson::GenericReader::Parse's
80     return value:
81 
82     \code
83     #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
84        throw ParseException(parseErrorCode, #parseErrorCode, offset)
85 
86     #include <stdexcept>               // std::runtime_error
87     #include "rapidjson/error/error.h" // rapidjson::ParseResult
88 
89     struct ParseException : std::runtime_error, rapidjson::ParseResult {
90       ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
91         : std::runtime_error(msg), ParseResult(code, offset) {}
92     };
93 
94     #include "rapidjson/reader.h"
95     \endcode
96 
97     \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
98  */
99 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
100 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
101     RAPIDJSON_MULTILINEMACRO_BEGIN \
102     RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
103     SetParseError(parseErrorCode, offset); \
104     RAPIDJSON_MULTILINEMACRO_END
105 #endif
106 
107 /*! \def RAPIDJSON_PARSE_ERROR
108     \ingroup RAPIDJSON_ERRORS
109     \brief (Internal) macro to indicate and handle a parse error.
110     \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
111     \param offset  position of the error in JSON input (\c size_t)
112 
113     Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
114 
115     \see RAPIDJSON_PARSE_ERROR_NORETURN
116     \hideinitializer
117  */
118 #ifndef RAPIDJSON_PARSE_ERROR
119 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
120     RAPIDJSON_MULTILINEMACRO_BEGIN \
121     RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
122     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
123     RAPIDJSON_MULTILINEMACRO_END
124 #endif
125 
126 #include "error/error.h" // ParseErrorCode, ParseResult
127 
128 RAPIDJSON_NAMESPACE_BEGIN
129 
130 ///////////////////////////////////////////////////////////////////////////////
131 // ParseFlag
132 
133 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
134     \ingroup RAPIDJSON_CONFIG
135     \brief User-defined kParseDefaultFlags definition.
136 
137     User can define this as any \c ParseFlag combinations.
138 */
139 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
140 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
141 #endif
142 
143 //! Combination of parseFlags
144 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
145  */
146 enum ParseFlag {
147     kParseNoFlags = 0,              //!< No flags are set.
148     kParseInsituFlag = 1,           //!< In-situ(destructive) parsing.
149     kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
150     kParseIterativeFlag = 4,        //!< Iterative(constant complexity in terms of function call stack size) parsing.
151     kParseStopWhenDoneFlag = 8,     //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
152     kParseFullPrecisionFlag = 16,   //!< Parse number in full precision (but slower).
153     kParseCommentsFlag = 32,        //!< Allow one-line (//) and multi-line (/**/) comments.
154     kParseNumbersAsStringsFlag = 64,    //!< Parse all numbers (ints/doubles) as strings.
155     kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
156     kParseNanAndInfFlag = 256,      //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
157     kParseEscapedApostropheFlag = 512,  //!< Allow escaped apostrophe in strings.
158     kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
159 };
160 
161 ///////////////////////////////////////////////////////////////////////////////
162 // Handler
163 
164 /*! \class rapidjson::Handler
165     \brief Concept for receiving events from GenericReader upon parsing.
166     The functions return true if no error occurs. If they return false,
167     the event publisher should terminate the process.
168 \code
169 concept Handler {
170     typename Ch;
171 
172     bool Null();
173     bool Bool(bool b);
174     bool Int(int i);
175     bool Uint(unsigned i);
176     bool Int64(int64_t i);
177     bool Uint64(uint64_t i);
178     bool Double(double d);
179     /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
180     bool RawNumber(const Ch* str, SizeType length, bool copy);
181     bool String(const Ch* str, SizeType length, bool copy);
182     bool StartObject();
183     bool Key(const Ch* str, SizeType length, bool copy);
184     bool EndObject(SizeType memberCount);
185     bool StartArray();
186     bool EndArray(SizeType elementCount);
187 };
188 \endcode
189 */
190 ///////////////////////////////////////////////////////////////////////////////
191 // BaseReaderHandler
192 
193 //! Default implementation of Handler.
194 /*! This can be used as base class of any reader handler.
195     \note implements Handler concept
196 */
197 template<typename Encoding = UTF8<>, typename Derived = void>
198 struct BaseReaderHandler {
199     typedef typename Encoding::Ch Ch;
200 
201     typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
202 
DefaultBaseReaderHandler203     bool Default() { return true; }
NullBaseReaderHandler204     bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler205     bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler206     bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler207     bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler208     bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler209     bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler210     bool Double(double) { return static_cast<Override&>(*this).Default(); }
211     /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
RawNumberBaseReaderHandler212     bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
StringBaseReaderHandler213     bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler214     bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler215     bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler216     bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler217     bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler218     bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
219 };
220 
221 ///////////////////////////////////////////////////////////////////////////////
222 // StreamLocalCopy
223 
224 namespace internal {
225 
226 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
227 class StreamLocalCopy;
228 
229 //! Do copy optimization.
230 template<typename Stream>
231 class StreamLocalCopy<Stream, 1> {
232 public:
StreamLocalCopy(Stream & original)233     StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()234     ~StreamLocalCopy() { original_ = s; }
235 
236     Stream s;
237 
238 private:
239     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
240 
241     Stream& original_;
242 };
243 
244 //! Keep reference.
245 template<typename Stream>
246 class StreamLocalCopy<Stream, 0> {
247 public:
StreamLocalCopy(Stream & original)248     StreamLocalCopy(Stream& original) : s(original) {}
249 
250     Stream& s;
251 
252 private:
253     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
254 };
255 
256 } // namespace internal
257 
258 ///////////////////////////////////////////////////////////////////////////////
259 // SkipWhitespace
260 
261 //! Skip the JSON white spaces in a stream.
262 /*! \param is A input stream for skipping white spaces.
263     \note This function has SSE2/SSE4.2 specialization.
264 */
265 template<typename InputStream>
SkipWhitespace(InputStream & is)266 void SkipWhitespace(InputStream& is) {
267     internal::StreamLocalCopy<InputStream> copy(is);
268     InputStream& s(copy.s);
269 
270     typename InputStream::Ch c;
271     while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
272         s.Take();
273 }
274 
SkipWhitespace(const char * p,const char * end)275 inline const char* SkipWhitespace(const char* p, const char* end) {
276     while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
277         ++p;
278     return p;
279 }
280 
281 #ifdef RAPIDJSON_SSE42
282 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)283 inline const char *SkipWhitespace_SIMD(const char* p) {
284     // Fast return for single non-whitespace
285     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
286         ++p;
287     else
288         return p;
289 
290     // 16-byte align to the next boundary
291     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
292     while (p != nextAligned)
293         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
294             ++p;
295         else
296             return p;
297 
298     // The rest of string using SIMD
299     static const char whitespace[16] = " \n\r\t";
300     const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
301 
302     for (;; p += 16) {
303         const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
304         const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
305         if (r != 16)    // some of characters is non-whitespace
306             return p + r;
307     }
308 }
309 
SkipWhitespace_SIMD(const char * p,const char * end)310 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
311     // Fast return for single non-whitespace
312     if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
313         ++p;
314     else
315         return p;
316 
317     // The middle of string using SIMD
318     static const char whitespace[16] = " \n\r\t";
319     const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
320 
321     for (; p <= end - 16; p += 16) {
322         const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
323         const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
324         if (r != 16)    // some of characters is non-whitespace
325             return p + r;
326     }
327 
328     return SkipWhitespace(p, end);
329 }
330 
331 #elif defined(RAPIDJSON_SSE2)
332 
333 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)334 inline const char *SkipWhitespace_SIMD(const char* p) {
335     // Fast return for single non-whitespace
336     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
337         ++p;
338     else
339         return p;
340 
341     // 16-byte align to the next boundary
342     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
343     while (p != nextAligned)
344         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
345             ++p;
346         else
347             return p;
348 
349     // The rest of string
350     #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
351     static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
352     #undef C16
353 
354     const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
355     const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
356     const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
357     const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
358 
359     for (;; p += 16) {
360         const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
361         __m128i x = _mm_cmpeq_epi8(s, w0);
362         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
363         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
364         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
365         unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
366         if (r != 0) {   // some of characters may be non-whitespace
367 #ifdef _MSC_VER         // Find the index of first non-whitespace
368             unsigned long offset;
369             _BitScanForward(&offset, r);
370             return p + offset;
371 #else
372             return p + __builtin_ffs(r) - 1;
373 #endif
374         }
375     }
376 }
377 
SkipWhitespace_SIMD(const char * p,const char * end)378 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
379     // Fast return for single non-whitespace
380     if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
381         ++p;
382     else
383         return p;
384 
385     // The rest of string
386     #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
387     static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
388     #undef C16
389 
390     const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
391     const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
392     const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
393     const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
394 
395     for (; p <= end - 16; p += 16) {
396         const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
397         __m128i x = _mm_cmpeq_epi8(s, w0);
398         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
399         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
400         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
401         unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
402         if (r != 0) {   // some of characters may be non-whitespace
403 #ifdef _MSC_VER         // Find the index of first non-whitespace
404             unsigned long offset;
405             _BitScanForward(&offset, r);
406             return p + offset;
407 #else
408             return p + __builtin_ffs(r) - 1;
409 #endif
410         }
411     }
412 
413     return SkipWhitespace(p, end);
414 }
415 
416 #elif defined(RAPIDJSON_NEON)
417 
418 //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)419 inline const char *SkipWhitespace_SIMD(const char* p) {
420     // Fast return for single non-whitespace
421     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
422         ++p;
423     else
424         return p;
425 
426     // 16-byte align to the next boundary
427     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
428     while (p != nextAligned)
429         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
430             ++p;
431         else
432             return p;
433 
434     const uint8x16_t w0 = vmovq_n_u8(' ');
435     const uint8x16_t w1 = vmovq_n_u8('\n');
436     const uint8x16_t w2 = vmovq_n_u8('\r');
437     const uint8x16_t w3 = vmovq_n_u8('\t');
438 
439     for (;; p += 16) {
440         const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
441         uint8x16_t x = vceqq_u8(s, w0);
442         x = vorrq_u8(x, vceqq_u8(s, w1));
443         x = vorrq_u8(x, vceqq_u8(s, w2));
444         x = vorrq_u8(x, vceqq_u8(s, w3));
445 
446         x = vmvnq_u8(x);                       // Negate
447         x = vrev64q_u8(x);                     // Rev in 64
448         uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
449         uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
450 
451         if (low == 0) {
452             if (high != 0) {
453                 uint32_t lz = internal::clzll(high);
454                 return p + 8 + (lz >> 3);
455             }
456         } else {
457             uint32_t lz = internal::clzll(low);
458             return p + (lz >> 3);
459         }
460     }
461 }
462 
SkipWhitespace_SIMD(const char * p,const char * end)463 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
464     // Fast return for single non-whitespace
465     if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
466         ++p;
467     else
468         return p;
469 
470     const uint8x16_t w0 = vmovq_n_u8(' ');
471     const uint8x16_t w1 = vmovq_n_u8('\n');
472     const uint8x16_t w2 = vmovq_n_u8('\r');
473     const uint8x16_t w3 = vmovq_n_u8('\t');
474 
475     for (; p <= end - 16; p += 16) {
476         const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
477         uint8x16_t x = vceqq_u8(s, w0);
478         x = vorrq_u8(x, vceqq_u8(s, w1));
479         x = vorrq_u8(x, vceqq_u8(s, w2));
480         x = vorrq_u8(x, vceqq_u8(s, w3));
481 
482         x = vmvnq_u8(x);                       // Negate
483         x = vrev64q_u8(x);                     // Rev in 64
484         uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
485         uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
486 
487         if (low == 0) {
488             if (high != 0) {
489                 uint32_t lz = internal::clzll(high);
490                 return p + 8 + (lz >> 3);
491             }
492         } else {
493             uint32_t lz = internal::clzll(low);
494             return p + (lz >> 3);
495         }
496     }
497 
498     return SkipWhitespace(p, end);
499 }
500 
501 #endif // RAPIDJSON_NEON
502 
503 #ifdef RAPIDJSON_SIMD
504 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)505 template<> inline void SkipWhitespace(InsituStringStream& is) {
506     is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
507 }
508 
509 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)510 template<> inline void SkipWhitespace(StringStream& is) {
511     is.src_ = SkipWhitespace_SIMD(is.src_);
512 }
513 
SkipWhitespace(EncodedInputStream<UTF8<>,MemoryStream> & is)514 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
515     is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
516 }
517 #endif // RAPIDJSON_SIMD
518 
519 ///////////////////////////////////////////////////////////////////////////////
520 // GenericReader
521 
522 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
523 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
524     object implementing Handler concept.
525 
526     It needs to allocate a stack for storing a single decoded string during
527     non-destructive parsing.
528 
529     For in-situ parsing, the decoded string is directly written to the source
530     text string, no temporary buffer is required.
531 
532     A GenericReader object can be reused for parsing multiple JSON text.
533 
534     \tparam SourceEncoding Encoding of the input stream.
535     \tparam TargetEncoding Encoding of the parse output.
536     \tparam StackAllocator Allocator type for stack.
537 */
538 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
539 class GenericReader {
540 public:
541     typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
542 
543     //! Constructor.
544     /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
545         \param stackCapacity stack capacity in bytes for storing a single decoded string.  (Only use for non-destructive parsing)
546     */
547     GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
stack_(stackAllocator,stackCapacity)548         stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
549 
550     //! Parse JSON text.
551     /*! \tparam parseFlags Combination of \ref ParseFlag.
552         \tparam InputStream Type of input stream, implementing Stream concept.
553         \tparam Handler Type of handler, implementing Handler concept.
554         \param is Input stream to be parsed.
555         \param handler The handler to receive events.
556         \return Whether the parsing is successful.
557     */
558     template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)559     ParseResult Parse(InputStream& is, Handler& handler) {
560         if (parseFlags & kParseIterativeFlag)
561             return IterativeParse<parseFlags>(is, handler);
562 
563         parseResult_.Clear();
564 
565         ClearStackOnExit scope(*this);
566 
567         SkipWhitespaceAndComments<parseFlags>(is);
568         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
569 
570         if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
571             RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
572             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
573         }
574         else {
575             ParseValue<parseFlags>(is, handler);
576             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
577 
578             if (!(parseFlags & kParseStopWhenDoneFlag)) {
579                 SkipWhitespaceAndComments<parseFlags>(is);
580                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
581 
582                 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
583                     RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
584                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
585                 }
586             }
587         }
588 
589         return parseResult_;
590     }
591 
592     //! Parse JSON text (with \ref kParseDefaultFlags)
593     /*! \tparam InputStream Type of input stream, implementing Stream concept
594         \tparam Handler Type of handler, implementing Handler concept.
595         \param is Input stream to be parsed.
596         \param handler The handler to receive events.
597         \return Whether the parsing is successful.
598     */
599     template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)600     ParseResult Parse(InputStream& is, Handler& handler) {
601         return Parse<kParseDefaultFlags>(is, handler);
602     }
603 
604     //! Initialize JSON text token-by-token parsing
605     /*!
606      */
IterativeParseInit()607     void IterativeParseInit() {
608         parseResult_.Clear();
609         state_ = IterativeParsingStartState;
610     }
611 
612     //! Parse one token from JSON text
613     /*! \tparam InputStream Type of input stream, implementing Stream concept
614         \tparam Handler Type of handler, implementing Handler concept.
615         \param is Input stream to be parsed.
616         \param handler The handler to receive events.
617         \return Whether the parsing is successful.
618      */
619     template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParseNext(InputStream & is,Handler & handler)620     bool IterativeParseNext(InputStream& is, Handler& handler) {
621         while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
622             SkipWhitespaceAndComments<parseFlags>(is);
623 
624             Token t = Tokenize(is.Peek());
625             IterativeParsingState n = Predict(state_, t);
626             IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
627 
628             // If we've finished or hit an error...
629             if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
630                 // Report errors.
631                 if (d == IterativeParsingErrorState) {
632                     HandleError(state_, is);
633                     return false;
634                 }
635 
636                 // Transition to the finish state.
637                 RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
638                 state_ = d;
639 
640                 // If StopWhenDone is not set...
641                 if (!(parseFlags & kParseStopWhenDoneFlag)) {
642                     // ... and extra non-whitespace data is found...
643                     SkipWhitespaceAndComments<parseFlags>(is);
644                     if (is.Peek() != '\0') {
645                         // ... this is considered an error.
646                         HandleError(state_, is);
647                         return false;
648                     }
649                 }
650 
651                 // Success! We are done!
652                 return true;
653             }
654 
655             // Transition to the new state.
656             state_ = d;
657 
658             // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
659             if (!IsIterativeParsingDelimiterState(n))
660                 return true;
661         }
662 
663         // We reached the end of file.
664         stack_.Clear();
665 
666         if (state_ != IterativeParsingFinishState) {
667             HandleError(state_, is);
668             return false;
669         }
670 
671         return true;
672     }
673 
674     //! Check if token-by-token parsing JSON text is complete
675     /*! \return Whether the JSON has been fully decoded.
676      */
IterativeParseComplete()677     RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
678         return IsIterativeParsingCompleteState(state_);
679     }
680 
681     //! Whether a parse error has occurred in the last parsing.
HasParseError()682     bool HasParseError() const { return parseResult_.IsError(); }
683 
684     //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()685     ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
686 
687     //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()688     size_t GetErrorOffset() const { return parseResult_.Offset(); }
689 
690 protected:
SetParseError(ParseErrorCode code,size_t offset)691     void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
692 
693 private:
694     // Prohibit copy constructor & assignment operator.
695     GenericReader(const GenericReader&);
696     GenericReader& operator=(const GenericReader&);
697 
ClearStack()698     void ClearStack() { stack_.Clear(); }
699 
700     // clear stack on any exit from ParseStream, e.g. due to exception
701     struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit702         explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit703         ~ClearStackOnExit() { r_.ClearStack(); }
704     private:
705         GenericReader& r_;
706         ClearStackOnExit(const ClearStackOnExit&);
707         ClearStackOnExit& operator=(const ClearStackOnExit&);
708     };
709 
710     template<unsigned parseFlags, typename InputStream>
SkipWhitespaceAndComments(InputStream & is)711     void SkipWhitespaceAndComments(InputStream& is) {
712         SkipWhitespace(is);
713 
714         if (parseFlags & kParseCommentsFlag) {
715             while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
716                 if (Consume(is, '*')) {
717                     while (true) {
718                         if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
719                             RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
720                         else if (Consume(is, '*')) {
721                             if (Consume(is, '/'))
722                                 break;
723                         }
724                         else
725                             is.Take();
726                     }
727                 }
728                 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
729                     while (is.Peek() != '\0' && is.Take() != '\n') {}
730                 else
731                     RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
732 
733                 SkipWhitespace(is);
734             }
735         }
736     }
737 
738     // Parse object: { string : value, ... }
739     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)740     void ParseObject(InputStream& is, Handler& handler) {
741         RAPIDJSON_ASSERT(is.Peek() == '{');
742         is.Take();  // Skip '{'
743 
744         if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
745             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
746 
747         SkipWhitespaceAndComments<parseFlags>(is);
748         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
749 
750         if (Consume(is, '}')) {
751             if (RAPIDJSON_UNLIKELY(!handler.EndObject(0)))  // empty object
752                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
753             return;
754         }
755 
756         for (SizeType memberCount = 0;;) {
757             if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
758                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
759 
760             ParseString<parseFlags>(is, handler, true);
761             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
762 
763             SkipWhitespaceAndComments<parseFlags>(is);
764             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
765 
766             if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
767                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
768 
769             SkipWhitespaceAndComments<parseFlags>(is);
770             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
771 
772             ParseValue<parseFlags>(is, handler);
773             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
774 
775             SkipWhitespaceAndComments<parseFlags>(is);
776             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
777 
778             ++memberCount;
779 
780             switch (is.Peek()) {
781                 case ',':
782                     is.Take();
783                     SkipWhitespaceAndComments<parseFlags>(is);
784                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
785                     break;
786                 case '}':
787                     is.Take();
788                     if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
789                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
790                     return;
791                 default:
792                     RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
793             }
794 
795             if (parseFlags & kParseTrailingCommasFlag) {
796                 if (is.Peek() == '}') {
797                     if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
798                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
799                     is.Take();
800                     return;
801                 }
802             }
803         }
804     }
805 
806     // Parse array: [ value, ... ]
807     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)808     void ParseArray(InputStream& is, Handler& handler) {
809         RAPIDJSON_ASSERT(is.Peek() == '[');
810         is.Take();  // Skip '['
811 
812         if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
813             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
814 
815         SkipWhitespaceAndComments<parseFlags>(is);
816         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
817 
818         if (Consume(is, ']')) {
819             if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
820                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
821             return;
822         }
823 
824         for (SizeType elementCount = 0;;) {
825             ParseValue<parseFlags>(is, handler);
826             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
827 
828             ++elementCount;
829             SkipWhitespaceAndComments<parseFlags>(is);
830             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
831 
832             if (Consume(is, ',')) {
833                 SkipWhitespaceAndComments<parseFlags>(is);
834                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
835             }
836             else if (Consume(is, ']')) {
837                 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
838                     RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
839                 return;
840             }
841             else
842                 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
843 
844             if (parseFlags & kParseTrailingCommasFlag) {
845                 if (is.Peek() == ']') {
846                     if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
847                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
848                     is.Take();
849                     return;
850                 }
851             }
852         }
853     }
854 
855     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)856     void ParseNull(InputStream& is, Handler& handler) {
857         RAPIDJSON_ASSERT(is.Peek() == 'n');
858         is.Take();
859 
860         if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
861             if (RAPIDJSON_UNLIKELY(!handler.Null()))
862                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
863         }
864         else
865             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
866     }
867 
868     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)869     void ParseTrue(InputStream& is, Handler& handler) {
870         RAPIDJSON_ASSERT(is.Peek() == 't');
871         is.Take();
872 
873         if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
874             if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
875                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
876         }
877         else
878             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
879     }
880 
881     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)882     void ParseFalse(InputStream& is, Handler& handler) {
883         RAPIDJSON_ASSERT(is.Peek() == 'f');
884         is.Take();
885 
886         if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
887             if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
888                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
889         }
890         else
891             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
892     }
893 
894     template<typename InputStream>
Consume(InputStream & is,typename InputStream::Ch expect)895     RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
896         if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
897             is.Take();
898             return true;
899         }
900         else
901             return false;
902     }
903 
904     // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
905     template<typename InputStream>
ParseHex4(InputStream & is,size_t escapeOffset)906     unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
907         unsigned codepoint = 0;
908         for (int i = 0; i < 4; i++) {
909             Ch c = is.Peek();
910             codepoint <<= 4;
911             codepoint += static_cast<unsigned>(c);
912             if (c >= '0' && c <= '9')
913                 codepoint -= '0';
914             else if (c >= 'A' && c <= 'F')
915                 codepoint -= 'A' - 10;
916             else if (c >= 'a' && c <= 'f')
917                 codepoint -= 'a' - 10;
918             else {
919                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
920                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
921             }
922             is.Take();
923         }
924         return codepoint;
925     }
926 
927     template <typename CharType>
928     class StackStream {
929     public:
930         typedef CharType Ch;
931 
StackStream(internal::Stack<StackAllocator> & stack)932         StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)933         RAPIDJSON_FORCEINLINE void Put(Ch c) {
934             *stack_.template Push<Ch>() = c;
935             ++length_;
936         }
937 
Push(SizeType count)938         RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
939             length_ += count;
940             return stack_.template Push<Ch>(count);
941         }
942 
Length()943         size_t Length() const { return length_; }
944 
Pop()945         Ch* Pop() {
946             return stack_.template Pop<Ch>(length_);
947         }
948 
949     private:
950         StackStream(const StackStream&);
951         StackStream& operator=(const StackStream&);
952 
953         internal::Stack<StackAllocator>& stack_;
954         SizeType length_;
955     };
956 
957     // Parse string and generate String event. Different code paths for kParseInsituFlag.
958     template<unsigned parseFlags, typename InputStream, typename Handler>
959     void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
960         internal::StreamLocalCopy<InputStream> copy(is);
961         InputStream& s(copy.s);
962 
963         RAPIDJSON_ASSERT(s.Peek() == '\"');
964         s.Take();  // Skip '\"'
965 
966         bool success = false;
967         if (parseFlags & kParseInsituFlag) {
968             typename InputStream::Ch *head = s.PutBegin();
969             ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
970             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
971             size_t length = s.PutEnd(head) - 1;
972             RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
973             const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
974             success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
975         }
976         else {
977             StackStream<typename TargetEncoding::Ch> stackStream(stack_);
978             ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
979             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
980             SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
981             const typename TargetEncoding::Ch* const str = stackStream.Pop();
982             success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
983         }
984         if (RAPIDJSON_UNLIKELY(!success))
985             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
986     }
987 
988     // Parse string to an output is
989     // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
990     template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)991     RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
992 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
993 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
994         static const char escape[256] = {
995             Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
996             Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
997             0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
998             0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
999             Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
1000         };
1001 #undef Z16
1002 //!@endcond
1003 
1004         for (;;) {
1005             // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
1006             if (!(parseFlags & kParseValidateEncodingFlag))
1007                 ScanCopyUnescapedString(is, os);
1008 
1009             Ch c = is.Peek();
1010             if (RAPIDJSON_UNLIKELY(c == '\\')) {    // Escape
1011                 size_t escapeOffset = is.Tell();    // For invalid escaping, report the initial '\\' as error offset
1012                 is.Take();
1013                 Ch e = is.Peek();
1014                 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
1015                     is.Take();
1016                     os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
1017                 }
1018                 else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
1019                     is.Take();
1020                     os.Put('\'');
1021                 }
1022                 else if (RAPIDJSON_LIKELY(e == 'u')) {    // Unicode
1023                     is.Take();
1024                     unsigned codepoint = ParseHex4(is, escapeOffset);
1025                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1026                     if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
1027                         // high surrogate, check if followed by valid low surrogate
1028                         if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
1029                             // Handle UTF-16 surrogate pair
1030                             if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1031                                 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1032                             unsigned codepoint2 = ParseHex4(is, escapeOffset);
1033                             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1034                             if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1035                                 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1036                             codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1037                         }
1038                         // single low surrogate
1039                         else
1040                         {
1041                             RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1042                         }
1043                     }
1044                     TEncoding::Encode(os, codepoint);
1045                 }
1046                 else
1047                     RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
1048             }
1049             else if (RAPIDJSON_UNLIKELY(c == '"')) {    // Closing double quote
1050                 is.Take();
1051                 os.Put('\0');   // null-terminate the string
1052                 return;
1053             }
1054             else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
1055                 if (c == '\0')
1056                     RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
1057                 else
1058                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
1059             }
1060             else {
1061                 size_t offset = is.Tell();
1062                 if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
1063                     !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
1064                     !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
1065                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
1066             }
1067         }
1068     }
1069 
1070     template<typename InputStream, typename OutputStream>
ScanCopyUnescapedString(InputStream &,OutputStream &)1071     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
1072             // Do nothing for generic version
1073     }
1074 
1075 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
1076     // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)1077     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1078         const char* p = is.src_;
1079 
1080         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1081         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1082         while (p != nextAligned)
1083             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1084                 is.src_ = p;
1085                 return;
1086             }
1087             else
1088                 os.Put(*p++);
1089 
1090         // The rest of string using SIMD
1091         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1092         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1093         static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1094         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1095         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1096         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1097 
1098         for (;; p += 16) {
1099             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1100             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1101             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1102             const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1103             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1104             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1105             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
1106                 SizeType length;
1107     #ifdef _MSC_VER         // Find the index of first escaped
1108                 unsigned long offset;
1109                 _BitScanForward(&offset, r);
1110                 length = offset;
1111     #else
1112                 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
1113     #endif
1114                 if (length != 0) {
1115                     char* q = reinterpret_cast<char*>(os.Push(length));
1116                     for (size_t i = 0; i < length; i++)
1117                         q[i] = p[i];
1118 
1119                     p += length;
1120                 }
1121                 break;
1122             }
1123             _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
1124         }
1125 
1126         is.src_ = p;
1127     }
1128 
1129     // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)1130     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1131         RAPIDJSON_ASSERT(&is == &os);
1132         (void)os;
1133 
1134         if (is.src_ == is.dst_) {
1135             SkipUnescapedString(is);
1136             return;
1137         }
1138 
1139         char* p = is.src_;
1140         char *q = is.dst_;
1141 
1142         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1143         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1144         while (p != nextAligned)
1145             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1146                 is.src_ = p;
1147                 is.dst_ = q;
1148                 return;
1149             }
1150             else
1151                 *q++ = *p++;
1152 
1153         // The rest of string using SIMD
1154         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1155         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1156         static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1157         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1158         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1159         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1160 
1161         for (;; p += 16, q += 16) {
1162             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1163             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1164             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1165             const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1166             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1167             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1168             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
1169                 size_t length;
1170 #ifdef _MSC_VER         // Find the index of first escaped
1171                 unsigned long offset;
1172                 _BitScanForward(&offset, r);
1173                 length = offset;
1174 #else
1175                 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1176 #endif
1177                 for (const char* pend = p + length; p != pend; )
1178                     *q++ = *p++;
1179                 break;
1180             }
1181             _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1182         }
1183 
1184         is.src_ = p;
1185         is.dst_ = q;
1186     }
1187 
1188     // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1189     static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1190         RAPIDJSON_ASSERT(is.src_ == is.dst_);
1191         char* p = is.src_;
1192 
1193         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1194         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1195         for (; p != nextAligned; p++)
1196             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1197                 is.src_ = is.dst_ = p;
1198                 return;
1199             }
1200 
1201         // The rest of string using SIMD
1202         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1203         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1204         static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1205         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1206         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1207         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1208 
1209         for (;; p += 16) {
1210             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1211             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1212             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1213             const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1214             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1215             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1216             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
1217                 size_t length;
1218 #ifdef _MSC_VER         // Find the index of first escaped
1219                 unsigned long offset;
1220                 _BitScanForward(&offset, r);
1221                 length = offset;
1222 #else
1223                 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1224 #endif
1225                 p += length;
1226                 break;
1227             }
1228         }
1229 
1230         is.src_ = is.dst_ = p;
1231     }
1232 #elif defined(RAPIDJSON_NEON)
1233     // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)1234     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1235         const char* p = is.src_;
1236 
1237         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1238         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1239         while (p != nextAligned)
1240             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1241                 is.src_ = p;
1242                 return;
1243             }
1244             else
1245                 os.Put(*p++);
1246 
1247         // The rest of string using SIMD
1248         const uint8x16_t s0 = vmovq_n_u8('"');
1249         const uint8x16_t s1 = vmovq_n_u8('\\');
1250         const uint8x16_t s2 = vmovq_n_u8('\b');
1251         const uint8x16_t s3 = vmovq_n_u8(32);
1252 
1253         for (;; p += 16) {
1254             const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
1255             uint8x16_t x = vceqq_u8(s, s0);
1256             x = vorrq_u8(x, vceqq_u8(s, s1));
1257             x = vorrq_u8(x, vceqq_u8(s, s2));
1258             x = vorrq_u8(x, vcltq_u8(s, s3));
1259 
1260             x = vrev64q_u8(x);                     // Rev in 64
1261             uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
1262             uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
1263 
1264             SizeType length = 0;
1265             bool escaped = false;
1266             if (low == 0) {
1267                 if (high != 0) {
1268                     uint32_t lz = internal::clzll(high);
1269                     length = 8 + (lz >> 3);
1270                     escaped = true;
1271                 }
1272             } else {
1273                 uint32_t lz = internal::clzll(low);
1274                 length = lz >> 3;
1275                 escaped = true;
1276             }
1277             if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
1278                 if (length != 0) {
1279                     char* q = reinterpret_cast<char*>(os.Push(length));
1280                     for (size_t i = 0; i < length; i++)
1281                         q[i] = p[i];
1282 
1283                     p += length;
1284                 }
1285                 break;
1286             }
1287             vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
1288         }
1289 
1290         is.src_ = p;
1291     }
1292 
1293     // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)1294     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1295         RAPIDJSON_ASSERT(&is == &os);
1296         (void)os;
1297 
1298         if (is.src_ == is.dst_) {
1299             SkipUnescapedString(is);
1300             return;
1301         }
1302 
1303         char* p = is.src_;
1304         char *q = is.dst_;
1305 
1306         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1307         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1308         while (p != nextAligned)
1309             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1310                 is.src_ = p;
1311                 is.dst_ = q;
1312                 return;
1313             }
1314             else
1315                 *q++ = *p++;
1316 
1317         // The rest of string using SIMD
1318         const uint8x16_t s0 = vmovq_n_u8('"');
1319         const uint8x16_t s1 = vmovq_n_u8('\\');
1320         const uint8x16_t s2 = vmovq_n_u8('\b');
1321         const uint8x16_t s3 = vmovq_n_u8(32);
1322 
1323         for (;; p += 16, q += 16) {
1324             const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1325             uint8x16_t x = vceqq_u8(s, s0);
1326             x = vorrq_u8(x, vceqq_u8(s, s1));
1327             x = vorrq_u8(x, vceqq_u8(s, s2));
1328             x = vorrq_u8(x, vcltq_u8(s, s3));
1329 
1330             x = vrev64q_u8(x);                     // Rev in 64
1331             uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
1332             uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
1333 
1334             SizeType length = 0;
1335             bool escaped = false;
1336             if (low == 0) {
1337                 if (high != 0) {
1338                     uint32_t lz = internal::clzll(high);
1339                     length = 8 + (lz >> 3);
1340                     escaped = true;
1341                 }
1342             } else {
1343                 uint32_t lz = internal::clzll(low);
1344                 length = lz >> 3;
1345                 escaped = true;
1346             }
1347             if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
1348                 for (const char* pend = p + length; p != pend; ) {
1349                     *q++ = *p++;
1350                 }
1351                 break;
1352             }
1353             vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
1354         }
1355 
1356         is.src_ = p;
1357         is.dst_ = q;
1358     }
1359 
1360     // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1361     static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1362         RAPIDJSON_ASSERT(is.src_ == is.dst_);
1363         char* p = is.src_;
1364 
1365         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1366         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1367         for (; p != nextAligned; p++)
1368             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1369                 is.src_ = is.dst_ = p;
1370                 return;
1371             }
1372 
1373         // The rest of string using SIMD
1374         const uint8x16_t s0 = vmovq_n_u8('"');
1375         const uint8x16_t s1 = vmovq_n_u8('\\');
1376         const uint8x16_t s2 = vmovq_n_u8('\b');
1377         const uint8x16_t s3 = vmovq_n_u8(32);
1378 
1379         for (;; p += 16) {
1380             const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1381             uint8x16_t x = vceqq_u8(s, s0);
1382             x = vorrq_u8(x, vceqq_u8(s, s1));
1383             x = vorrq_u8(x, vceqq_u8(s, s2));
1384             x = vorrq_u8(x, vcltq_u8(s, s3));
1385 
1386             x = vrev64q_u8(x);                     // Rev in 64
1387             uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0);   // extract
1388             uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1);  // extract
1389 
1390             if (low == 0) {
1391                 if (high != 0) {
1392                     uint32_t lz = internal::clzll(high);
1393                     p += 8 + (lz >> 3);
1394                     break;
1395                 }
1396             } else {
1397                 uint32_t lz = internal::clzll(low);
1398                 p += lz >> 3;
1399                 break;
1400             }
1401         }
1402 
1403         is.src_ = is.dst_ = p;
1404     }
1405 #endif // RAPIDJSON_NEON
1406 
1407     template<typename InputStream, bool backup, bool pushOnTake>
1408     class NumberStream;
1409 
1410     template<typename InputStream>
1411     class NumberStream<InputStream, false, false> {
1412     public:
1413         typedef typename InputStream::Ch Ch;
1414 
NumberStream(GenericReader & reader,InputStream & s)1415         NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
1416 
Peek()1417         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()1418         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()1419         RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Push(char)1420         RAPIDJSON_FORCEINLINE void Push(char) {}
1421 
Tell()1422         size_t Tell() { return is.Tell(); }
Length()1423         size_t Length() { return 0; }
Pop()1424         const char* Pop() { return 0; }
1425 
1426     protected:
1427         NumberStream& operator=(const NumberStream&);
1428 
1429         InputStream& is;
1430     };
1431 
1432     template<typename InputStream>
1433     class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
1434         typedef NumberStream<InputStream, false, false> Base;
1435     public:
NumberStream(GenericReader & reader,InputStream & is)1436         NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
1437 
TakePush()1438         RAPIDJSON_FORCEINLINE Ch TakePush() {
1439             stackStream.Put(static_cast<char>(Base::is.Peek()));
1440             return Base::is.Take();
1441         }
1442 
Push(char c)1443         RAPIDJSON_FORCEINLINE void Push(char c) {
1444             stackStream.Put(c);
1445         }
1446 
Length()1447         size_t Length() { return stackStream.Length(); }
1448 
Pop()1449         const char* Pop() {
1450             stackStream.Put('\0');
1451             return stackStream.Pop();
1452         }
1453 
1454     private:
1455         StackStream<char> stackStream;
1456     };
1457 
1458     template<typename InputStream>
1459     class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
1460         typedef NumberStream<InputStream, true, false> Base;
1461     public:
NumberStream(GenericReader & reader,InputStream & is)1462         NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
1463 
Take()1464         RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1465     };
1466 
1467     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)1468     void ParseNumber(InputStream& is, Handler& handler) {
1469         internal::StreamLocalCopy<InputStream> copy(is);
1470         NumberStream<InputStream,
1471             ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1472                 ((parseFlags & kParseInsituFlag) == 0) :
1473                 ((parseFlags & kParseFullPrecisionFlag) != 0),
1474             (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1475                 (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1476 
1477         size_t startOffset = s.Tell();
1478         double d = 0.0;
1479         bool useNanOrInf = false;
1480 
1481         // Parse minus
1482         bool minus = Consume(s, '-');
1483 
1484         // Parse int: zero / ( digit1-9 *DIGIT )
1485         unsigned i = 0;
1486         uint64_t i64 = 0;
1487         bool use64bit = false;
1488         int significandDigit = 0;
1489         if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1490             i = 0;
1491             s.TakePush();
1492         }
1493         else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1494             i = static_cast<unsigned>(s.TakePush() - '0');
1495 
1496             if (minus)
1497                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1498                     if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1499                         if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1500                             i64 = i;
1501                             use64bit = true;
1502                             break;
1503                         }
1504                     }
1505                     i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1506                     significandDigit++;
1507                 }
1508             else
1509                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1510                     if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1511                         if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1512                             i64 = i;
1513                             use64bit = true;
1514                             break;
1515                         }
1516                     }
1517                     i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1518                     significandDigit++;
1519                 }
1520         }
1521         // Parse NaN or Infinity here
1522         else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1523             if (Consume(s, 'N')) {
1524                 if (Consume(s, 'a') && Consume(s, 'N')) {
1525                     d = std::numeric_limits<double>::quiet_NaN();
1526                     useNanOrInf = true;
1527                 }
1528             }
1529             else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
1530                 if (Consume(s, 'n') && Consume(s, 'f')) {
1531                     d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1532                     useNanOrInf = true;
1533 
1534                     if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1535                                                                 && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
1536                         RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1537                     }
1538                 }
1539             }
1540 
1541             if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
1542                 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1543             }
1544         }
1545         else
1546             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1547 
1548         // Parse 64bit int
1549         bool useDouble = false;
1550         if (use64bit) {
1551             if (minus)
1552                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1553                      if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1554                         if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1555                             d = static_cast<double>(i64);
1556                             useDouble = true;
1557                             break;
1558                         }
1559                     i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1560                     significandDigit++;
1561                 }
1562             else
1563                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1564                     if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1565                         if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1566                             d = static_cast<double>(i64);
1567                             useDouble = true;
1568                             break;
1569                         }
1570                     i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1571                     significandDigit++;
1572                 }
1573         }
1574 
1575         // Force double for big integer
1576         if (useDouble) {
1577             while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1578                 d = d * 10 + (s.TakePush() - '0');
1579             }
1580         }
1581 
1582         // Parse frac = decimal-point 1*DIGIT
1583         int expFrac = 0;
1584         size_t decimalPosition;
1585         if (Consume(s, '.')) {
1586             decimalPosition = s.Length();
1587 
1588             if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1589                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
1590 
1591             if (!useDouble) {
1592 #if RAPIDJSON_64BIT
1593                 // Use i64 to store significand in 64-bit architecture
1594                 if (!use64bit)
1595                     i64 = i;
1596 
1597                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1598                     if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1599                         break;
1600                     else {
1601                         i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1602                         --expFrac;
1603                         if (i64 != 0)
1604                             significandDigit++;
1605                     }
1606                 }
1607 
1608                 d = static_cast<double>(i64);
1609 #else
1610                 // Use double to store significand in 32-bit architecture
1611                 d = static_cast<double>(use64bit ? i64 : i);
1612 #endif
1613                 useDouble = true;
1614             }
1615 
1616             while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1617                 if (significandDigit < 17) {
1618                     d = d * 10.0 + (s.TakePush() - '0');
1619                     --expFrac;
1620                     if (RAPIDJSON_LIKELY(d > 0.0))
1621                         significandDigit++;
1622                 }
1623                 else
1624                     s.TakePush();
1625             }
1626         }
1627         else
1628             decimalPosition = s.Length(); // decimal position at the end of integer.
1629 
1630         // Parse exp = e [ minus / plus ] 1*DIGIT
1631         int exp = 0;
1632         if (Consume(s, 'e') || Consume(s, 'E')) {
1633             if (!useDouble) {
1634                 d = static_cast<double>(use64bit ? i64 : i);
1635                 useDouble = true;
1636             }
1637 
1638             bool expMinus = false;
1639             if (Consume(s, '+'))
1640                 ;
1641             else if (Consume(s, '-'))
1642                 expMinus = true;
1643 
1644             if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1645                 exp = static_cast<int>(s.Take() - '0');
1646                 if (expMinus) {
1647                     // (exp + expFrac) must not underflow int => we're detecting when -exp gets
1648                     // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
1649                     // underflow territory):
1650                     //
1651                     //        -(exp * 10 + 9) + expFrac >= INT_MIN
1652                     //   <=>  exp <= (expFrac - INT_MIN - 9) / 10
1653                     RAPIDJSON_ASSERT(expFrac <= 0);
1654                     int maxExp = (expFrac + 2147483639) / 10;
1655 
1656                     while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1657                         exp = exp * 10 + static_cast<int>(s.Take() - '0');
1658                         if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
1659                             while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9'))  // Consume the rest of exponent
1660                                 s.Take();
1661                         }
1662                     }
1663                 }
1664                 else {  // positive exp
1665                     int maxExp = 308 - expFrac;
1666                     while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1667                         exp = exp * 10 + static_cast<int>(s.Take() - '0');
1668                         if (RAPIDJSON_UNLIKELY(exp > maxExp))
1669                             RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1670                     }
1671                 }
1672             }
1673             else
1674                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1675 
1676             if (expMinus)
1677                 exp = -exp;
1678         }
1679 
1680         // Finish parsing, call event according to the type of number.
1681         bool cont = true;
1682 
1683         if (parseFlags & kParseNumbersAsStringsFlag) {
1684             if (parseFlags & kParseInsituFlag) {
1685                 s.Pop();  // Pop stack no matter if it will be used or not.
1686                 typename InputStream::Ch* head = is.PutBegin();
1687                 const size_t length = s.Tell() - startOffset;
1688                 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1689                 // unable to insert the \0 character here, it will erase the comma after this number
1690                 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1691                 cont = handler.RawNumber(str, SizeType(length), false);
1692             }
1693             else {
1694                 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1695                 StringStream srcStream(s.Pop());
1696                 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
1697                 while (numCharsToCopy--) {
1698                     Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
1699                 }
1700                 dstStream.Put('\0');
1701                 const typename TargetEncoding::Ch* str = dstStream.Pop();
1702                 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1703                 cont = handler.RawNumber(str, SizeType(length), true);
1704             }
1705         }
1706         else {
1707            size_t length = s.Length();
1708            const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
1709 
1710            if (useDouble) {
1711                int p = exp + expFrac;
1712                if (parseFlags & kParseFullPrecisionFlag)
1713                    d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1714                else
1715                    d = internal::StrtodNormalPrecision(d, p);
1716 
1717                // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
1718                if (d > (std::numeric_limits<double>::max)()) {
1719                    // Overflow
1720                    // TODO: internal::StrtodX should report overflow (or underflow)
1721                    RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1722                }
1723 
1724                cont = handler.Double(minus ? -d : d);
1725            }
1726            else if (useNanOrInf) {
1727                cont = handler.Double(d);
1728            }
1729            else {
1730                if (use64bit) {
1731                    if (minus)
1732                        cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1733                    else
1734                        cont = handler.Uint64(i64);
1735                }
1736                else {
1737                    if (minus)
1738                        cont = handler.Int(static_cast<int32_t>(~i + 1));
1739                    else
1740                        cont = handler.Uint(i);
1741                }
1742            }
1743         }
1744         if (RAPIDJSON_UNLIKELY(!cont))
1745             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
1746     }
1747 
1748     // Parse any JSON value
1749     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)1750     void ParseValue(InputStream& is, Handler& handler) {
1751         switch (is.Peek()) {
1752             case 'n': ParseNull  <parseFlags>(is, handler); break;
1753             case 't': ParseTrue  <parseFlags>(is, handler); break;
1754             case 'f': ParseFalse <parseFlags>(is, handler); break;
1755             case '"': ParseString<parseFlags>(is, handler); break;
1756             case '{': ParseObject<parseFlags>(is, handler); break;
1757             case '[': ParseArray <parseFlags>(is, handler); break;
1758             default :
1759                       ParseNumber<parseFlags>(is, handler);
1760                       break;
1761 
1762         }
1763     }
1764 
1765     // Iterative Parsing
1766 
1767     // States
1768     enum IterativeParsingState {
1769         IterativeParsingFinishState = 0, // sink states at top
1770         IterativeParsingErrorState,      // sink states at top
1771         IterativeParsingStartState,
1772 
1773         // Object states
1774         IterativeParsingObjectInitialState,
1775         IterativeParsingMemberKeyState,
1776         IterativeParsingMemberValueState,
1777         IterativeParsingObjectFinishState,
1778 
1779         // Array states
1780         IterativeParsingArrayInitialState,
1781         IterativeParsingElementState,
1782         IterativeParsingArrayFinishState,
1783 
1784         // Single value state
1785         IterativeParsingValueState,
1786 
1787         // Delimiter states (at bottom)
1788         IterativeParsingElementDelimiterState,
1789         IterativeParsingMemberDelimiterState,
1790         IterativeParsingKeyValueDelimiterState,
1791 
1792         cIterativeParsingStateCount
1793     };
1794 
1795     // Tokens
1796     enum Token {
1797         LeftBracketToken = 0,
1798         RightBracketToken,
1799 
1800         LeftCurlyBracketToken,
1801         RightCurlyBracketToken,
1802 
1803         CommaToken,
1804         ColonToken,
1805 
1806         StringToken,
1807         FalseToken,
1808         TrueToken,
1809         NullToken,
1810         NumberToken,
1811 
1812         kTokenCount
1813     };
1814 
Tokenize(Ch c)1815     RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
1816 
1817 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1818 #define N NumberToken
1819 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1820         // Maps from ASCII to Token
1821         static const unsigned char tokenMap[256] = {
1822             N16, // 00~0F
1823             N16, // 10~1F
1824             N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1825             N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1826             N16, // 40~4F
1827             N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1828             N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1829             N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1830             N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1831         };
1832 #undef N
1833 #undef N16
1834 //!@endcond
1835 
1836         if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1837             return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1838         else
1839             return NumberToken;
1840     }
1841 
Predict(IterativeParsingState state,Token token)1842     RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
1843         // current state x one lookahead token -> new state
1844         static const char G[cIterativeParsingStateCount][kTokenCount] = {
1845             // Finish(sink state)
1846             {
1847                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1848                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1849                 IterativeParsingErrorState
1850             },
1851             // Error(sink state)
1852             {
1853                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1854                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1855                 IterativeParsingErrorState
1856             },
1857             // Start
1858             {
1859                 IterativeParsingArrayInitialState,  // Left bracket
1860                 IterativeParsingErrorState,         // Right bracket
1861                 IterativeParsingObjectInitialState, // Left curly bracket
1862                 IterativeParsingErrorState,         // Right curly bracket
1863                 IterativeParsingErrorState,         // Comma
1864                 IterativeParsingErrorState,         // Colon
1865                 IterativeParsingValueState,         // String
1866                 IterativeParsingValueState,         // False
1867                 IterativeParsingValueState,         // True
1868                 IterativeParsingValueState,         // Null
1869                 IterativeParsingValueState          // Number
1870             },
1871             // ObjectInitial
1872             {
1873                 IterativeParsingErrorState,         // Left bracket
1874                 IterativeParsingErrorState,         // Right bracket
1875                 IterativeParsingErrorState,         // Left curly bracket
1876                 IterativeParsingObjectFinishState,  // Right curly bracket
1877                 IterativeParsingErrorState,         // Comma
1878                 IterativeParsingErrorState,         // Colon
1879                 IterativeParsingMemberKeyState,     // String
1880                 IterativeParsingErrorState,         // False
1881                 IterativeParsingErrorState,         // True
1882                 IterativeParsingErrorState,         // Null
1883                 IterativeParsingErrorState          // Number
1884             },
1885             // MemberKey
1886             {
1887                 IterativeParsingErrorState,             // Left bracket
1888                 IterativeParsingErrorState,             // Right bracket
1889                 IterativeParsingErrorState,             // Left curly bracket
1890                 IterativeParsingErrorState,             // Right curly bracket
1891                 IterativeParsingErrorState,             // Comma
1892                 IterativeParsingKeyValueDelimiterState, // Colon
1893                 IterativeParsingErrorState,             // String
1894                 IterativeParsingErrorState,             // False
1895                 IterativeParsingErrorState,             // True
1896                 IterativeParsingErrorState,             // Null
1897                 IterativeParsingErrorState              // Number
1898             },
1899             // MemberValue
1900             {
1901                 IterativeParsingErrorState,             // Left bracket
1902                 IterativeParsingErrorState,             // Right bracket
1903                 IterativeParsingErrorState,             // Left curly bracket
1904                 IterativeParsingObjectFinishState,      // Right curly bracket
1905                 IterativeParsingMemberDelimiterState,   // Comma
1906                 IterativeParsingErrorState,             // Colon
1907                 IterativeParsingErrorState,             // String
1908                 IterativeParsingErrorState,             // False
1909                 IterativeParsingErrorState,             // True
1910                 IterativeParsingErrorState,             // Null
1911                 IterativeParsingErrorState              // Number
1912             },
1913             // ObjectFinish(sink state)
1914             {
1915                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1916                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1917                 IterativeParsingErrorState
1918             },
1919             // ArrayInitial
1920             {
1921                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
1922                 IterativeParsingArrayFinishState,       // Right bracket
1923                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
1924                 IterativeParsingErrorState,             // Right curly bracket
1925                 IterativeParsingErrorState,             // Comma
1926                 IterativeParsingErrorState,             // Colon
1927                 IterativeParsingElementState,           // String
1928                 IterativeParsingElementState,           // False
1929                 IterativeParsingElementState,           // True
1930                 IterativeParsingElementState,           // Null
1931                 IterativeParsingElementState            // Number
1932             },
1933             // Element
1934             {
1935                 IterativeParsingErrorState,             // Left bracket
1936                 IterativeParsingArrayFinishState,       // Right bracket
1937                 IterativeParsingErrorState,             // Left curly bracket
1938                 IterativeParsingErrorState,             // Right curly bracket
1939                 IterativeParsingElementDelimiterState,  // Comma
1940                 IterativeParsingErrorState,             // Colon
1941                 IterativeParsingErrorState,             // String
1942                 IterativeParsingErrorState,             // False
1943                 IterativeParsingErrorState,             // True
1944                 IterativeParsingErrorState,             // Null
1945                 IterativeParsingErrorState              // Number
1946             },
1947             // ArrayFinish(sink state)
1948             {
1949                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1950                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1951                 IterativeParsingErrorState
1952             },
1953             // Single Value (sink state)
1954             {
1955                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1956                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1957                 IterativeParsingErrorState
1958             },
1959             // ElementDelimiter
1960             {
1961                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
1962                 IterativeParsingArrayFinishState,       // Right bracket
1963                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
1964                 IterativeParsingErrorState,             // Right curly bracket
1965                 IterativeParsingErrorState,             // Comma
1966                 IterativeParsingErrorState,             // Colon
1967                 IterativeParsingElementState,           // String
1968                 IterativeParsingElementState,           // False
1969                 IterativeParsingElementState,           // True
1970                 IterativeParsingElementState,           // Null
1971                 IterativeParsingElementState            // Number
1972             },
1973             // MemberDelimiter
1974             {
1975                 IterativeParsingErrorState,         // Left bracket
1976                 IterativeParsingErrorState,         // Right bracket
1977                 IterativeParsingErrorState,         // Left curly bracket
1978                 IterativeParsingObjectFinishState,  // Right curly bracket
1979                 IterativeParsingErrorState,         // Comma
1980                 IterativeParsingErrorState,         // Colon
1981                 IterativeParsingMemberKeyState,     // String
1982                 IterativeParsingErrorState,         // False
1983                 IterativeParsingErrorState,         // True
1984                 IterativeParsingErrorState,         // Null
1985                 IterativeParsingErrorState          // Number
1986             },
1987             // KeyValueDelimiter
1988             {
1989                 IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
1990                 IterativeParsingErrorState,             // Right bracket
1991                 IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
1992                 IterativeParsingErrorState,             // Right curly bracket
1993                 IterativeParsingErrorState,             // Comma
1994                 IterativeParsingErrorState,             // Colon
1995                 IterativeParsingMemberValueState,       // String
1996                 IterativeParsingMemberValueState,       // False
1997                 IterativeParsingMemberValueState,       // True
1998                 IterativeParsingMemberValueState,       // Null
1999                 IterativeParsingMemberValueState        // Number
2000             },
2001         }; // End of G
2002 
2003         return static_cast<IterativeParsingState>(G[state][token]);
2004     }
2005 
2006     // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
2007     // May return a new state on state pop.
2008     template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)2009     RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
2010         (void)token;
2011 
2012         switch (dst) {
2013         case IterativeParsingErrorState:
2014             return dst;
2015 
2016         case IterativeParsingObjectInitialState:
2017         case IterativeParsingArrayInitialState:
2018         {
2019             // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
2020             // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
2021             IterativeParsingState n = src;
2022             if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
2023                 n = IterativeParsingElementState;
2024             else if (src == IterativeParsingKeyValueDelimiterState)
2025                 n = IterativeParsingMemberValueState;
2026             // Push current state.
2027             *stack_.template Push<SizeType>(1) = n;
2028             // Initialize and push the member/element count.
2029             *stack_.template Push<SizeType>(1) = 0;
2030             // Call handler
2031             bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
2032             // On handler short circuits the parsing.
2033             if (!hr) {
2034                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2035                 return IterativeParsingErrorState;
2036             }
2037             else {
2038                 is.Take();
2039                 return dst;
2040             }
2041         }
2042 
2043         case IterativeParsingMemberKeyState:
2044             ParseString<parseFlags>(is, handler, true);
2045             if (HasParseError())
2046                 return IterativeParsingErrorState;
2047             else
2048                 return dst;
2049 
2050         case IterativeParsingKeyValueDelimiterState:
2051             RAPIDJSON_ASSERT(token == ColonToken);
2052             is.Take();
2053             return dst;
2054 
2055         case IterativeParsingMemberValueState:
2056             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2057             ParseValue<parseFlags>(is, handler);
2058             if (HasParseError()) {
2059                 return IterativeParsingErrorState;
2060             }
2061             return dst;
2062 
2063         case IterativeParsingElementState:
2064             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2065             ParseValue<parseFlags>(is, handler);
2066             if (HasParseError()) {
2067                 return IterativeParsingErrorState;
2068             }
2069             return dst;
2070 
2071         case IterativeParsingMemberDelimiterState:
2072         case IterativeParsingElementDelimiterState:
2073             is.Take();
2074             // Update member/element count.
2075             *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
2076             return dst;
2077 
2078         case IterativeParsingObjectFinishState:
2079         {
2080             // Transit from delimiter is only allowed when trailing commas are enabled
2081             if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
2082                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
2083                 return IterativeParsingErrorState;
2084             }
2085             // Get member count.
2086             SizeType c = *stack_.template Pop<SizeType>(1);
2087             // If the object is not empty, count the last member.
2088             if (src == IterativeParsingMemberValueState)
2089                 ++c;
2090             // Restore the state.
2091             IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2092             // Transit to Finish state if this is the topmost scope.
2093             if (n == IterativeParsingStartState)
2094                 n = IterativeParsingFinishState;
2095             // Call handler
2096             bool hr = handler.EndObject(c);
2097             // On handler short circuits the parsing.
2098             if (!hr) {
2099                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2100                 return IterativeParsingErrorState;
2101             }
2102             else {
2103                 is.Take();
2104                 return n;
2105             }
2106         }
2107 
2108         case IterativeParsingArrayFinishState:
2109         {
2110             // Transit from delimiter is only allowed when trailing commas are enabled
2111             if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
2112                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
2113                 return IterativeParsingErrorState;
2114             }
2115             // Get element count.
2116             SizeType c = *stack_.template Pop<SizeType>(1);
2117             // If the array is not empty, count the last element.
2118             if (src == IterativeParsingElementState)
2119                 ++c;
2120             // Restore the state.
2121             IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2122             // Transit to Finish state if this is the topmost scope.
2123             if (n == IterativeParsingStartState)
2124                 n = IterativeParsingFinishState;
2125             // Call handler
2126             bool hr = handler.EndArray(c);
2127             // On handler short circuits the parsing.
2128             if (!hr) {
2129                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2130                 return IterativeParsingErrorState;
2131             }
2132             else {
2133                 is.Take();
2134                 return n;
2135             }
2136         }
2137 
2138         default:
2139             // This branch is for IterativeParsingValueState actually.
2140             // Use `default:` rather than
2141             // `case IterativeParsingValueState:` is for code coverage.
2142 
2143             // The IterativeParsingStartState is not enumerated in this switch-case.
2144             // It is impossible for that case. And it can be caught by following assertion.
2145 
2146             // The IterativeParsingFinishState is not enumerated in this switch-case either.
2147             // It is a "derivative" state which cannot triggered from Predict() directly.
2148             // Therefore it cannot happen here. And it can be caught by following assertion.
2149             RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
2150 
2151             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2152             ParseValue<parseFlags>(is, handler);
2153             if (HasParseError()) {
2154                 return IterativeParsingErrorState;
2155             }
2156             return IterativeParsingFinishState;
2157         }
2158     }
2159 
2160     template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)2161     void HandleError(IterativeParsingState src, InputStream& is) {
2162         if (HasParseError()) {
2163             // Error flag has been set.
2164             return;
2165         }
2166 
2167         switch (src) {
2168         case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
2169         case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
2170         case IterativeParsingObjectInitialState:
2171         case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
2172         case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
2173         case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
2174         case IterativeParsingKeyValueDelimiterState:
2175         case IterativeParsingArrayInitialState:
2176         case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
2177         default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
2178         }
2179     }
2180 
IsIterativeParsingDelimiterState(IterativeParsingState s)2181     RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
2182         return s >= IterativeParsingElementDelimiterState;
2183     }
2184 
IsIterativeParsingCompleteState(IterativeParsingState s)2185     RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
2186         return s <= IterativeParsingErrorState;
2187     }
2188 
2189     template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)2190     ParseResult IterativeParse(InputStream& is, Handler& handler) {
2191         parseResult_.Clear();
2192         ClearStackOnExit scope(*this);
2193         IterativeParsingState state = IterativeParsingStartState;
2194 
2195         SkipWhitespaceAndComments<parseFlags>(is);
2196         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2197         while (is.Peek() != '\0') {
2198             Token t = Tokenize(is.Peek());
2199             IterativeParsingState n = Predict(state, t);
2200             IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
2201 
2202             if (d == IterativeParsingErrorState) {
2203                 HandleError(state, is);
2204                 break;
2205             }
2206 
2207             state = d;
2208 
2209             // Do not further consume streams if a root JSON has been parsed.
2210             if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
2211                 break;
2212 
2213             SkipWhitespaceAndComments<parseFlags>(is);
2214             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2215         }
2216 
2217         // Handle the end of file.
2218         if (state != IterativeParsingFinishState)
2219             HandleError(state, is);
2220 
2221         return parseResult_;
2222     }
2223 
2224     static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
2225     internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
2226     ParseResult parseResult_;
2227     IterativeParsingState state_;
2228 }; // class GenericReader
2229 
2230 //! Reader with UTF8 encoding and default allocator.
2231 typedef GenericReader<UTF8<>, UTF8<> > Reader;
2232 
2233 RAPIDJSON_NAMESPACE_END
2234 
2235 #if defined(__clang__) || defined(_MSC_VER)
2236 RAPIDJSON_DIAG_POP
2237 #endif
2238 
2239 
2240 #ifdef __GNUC__
2241 RAPIDJSON_DIAG_POP
2242 #endif
2243 
2244 #endif // RAPIDJSON_READER_H_
2245