1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17 
18 /*! \file reader.h */
19 
20 #include "allocators.h"
21 #include "stream.h"
22 #include "encodedstream.h"
23 #include "internal/meta.h"
24 #include "internal/stack.h"
25 #include "internal/strtod.h"
26 
27 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
28 #include <intrin.h>
29 #pragma intrinsic(_BitScanForward)
30 #endif
31 #ifdef RAPIDJSON_SSE42
32 #include <nmmintrin.h>
33 #elif defined(RAPIDJSON_SSE2)
34 #include <emmintrin.h>
35 #endif
36 
37 #ifdef _MSC_VER
38 RAPIDJSON_DIAG_PUSH
39 RAPIDJSON_DIAG_OFF(4127)  // conditional expression is constant
40 RAPIDJSON_DIAG_OFF(4702)  // unreachable code
41 #endif
42 
43 #ifdef __clang__
44 RAPIDJSON_DIAG_PUSH
45 RAPIDJSON_DIAG_OFF(padded)
46 RAPIDJSON_DIAG_OFF(switch-enum)
47 #endif
48 
49 #ifdef __GNUC__
50 RAPIDJSON_DIAG_PUSH
51 RAPIDJSON_DIAG_OFF(effc++)
52 #endif
53 
54 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
55 #define RAPIDJSON_NOTHING /* deliberately empty */
56 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
57 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
58     RAPIDJSON_MULTILINEMACRO_BEGIN \
59     if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
60     RAPIDJSON_MULTILINEMACRO_END
61 #endif
62 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
63     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
64 //!@endcond
65 
66 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
67     \ingroup RAPIDJSON_ERRORS
68     \brief Macro to indicate a parse error.
69     \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
70     \param offset  position of the error in JSON input (\c size_t)
71 
72     This macros can be used as a customization point for the internal
73     error handling mechanism of RapidJSON.
74 
75     A common usage model is to throw an exception instead of requiring the
76     caller to explicitly check the \ref rapidjson::GenericReader::Parse's
77     return value:
78 
79     \code
80     #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
81        throw ParseException(parseErrorCode, #parseErrorCode, offset)
82 
83     #include <stdexcept>               // std::runtime_error
84     #include "rapidjson/error/error.h" // rapidjson::ParseResult
85 
86     struct ParseException : std::runtime_error, rapidjson::ParseResult {
87       ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
88         : std::runtime_error(msg), ParseResult(code, offset) {}
89     };
90 
91     #include "rapidjson/reader.h"
92     \endcode
93 
94     \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
95  */
96 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
97 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
98     RAPIDJSON_MULTILINEMACRO_BEGIN \
99     RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
100     SetParseError(parseErrorCode, offset); \
101     RAPIDJSON_MULTILINEMACRO_END
102 #endif
103 
104 /*! \def RAPIDJSON_PARSE_ERROR
105     \ingroup RAPIDJSON_ERRORS
106     \brief (Internal) macro to indicate and handle a parse error.
107     \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
108     \param offset  position of the error in JSON input (\c size_t)
109 
110     Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
111 
112     \see RAPIDJSON_PARSE_ERROR_NORETURN
113     \hideinitializer
114  */
115 #ifndef RAPIDJSON_PARSE_ERROR
116 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
117     RAPIDJSON_MULTILINEMACRO_BEGIN \
118     RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
119     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
120     RAPIDJSON_MULTILINEMACRO_END
121 #endif
122 
123 #include "error/error.h" // ParseErrorCode, ParseResult
124 
125 RAPIDJSON_NAMESPACE_BEGIN
126 
127 ///////////////////////////////////////////////////////////////////////////////
128 // ParseFlag
129 
130 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
131     \ingroup RAPIDJSON_CONFIG
132     \brief User-defined kParseDefaultFlags definition.
133 
134     User can define this as any \c ParseFlag combinations.
135 */
136 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
137 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
138 #endif
139 
140 //! Combination of parseFlags
141 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
142  */
143 enum ParseFlag {
144     kParseNoFlags = 0,              //!< No flags are set.
145     kParseInsituFlag = 1,           //!< In-situ(destructive) parsing.
146     kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
147     kParseIterativeFlag = 4,        //!< Iterative(constant complexity in terms of function call stack size) parsing.
148     kParseStopWhenDoneFlag = 8,     //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
149     kParseFullPrecisionFlag = 16,   //!< Parse number in full precision (but slower).
150     kParseCommentsFlag = 32,        //!< Allow one-line (//) and multi-line (/**/) comments.
151     kParseNumbersAsStringsFlag = 64,    //!< Parse all numbers (ints/doubles) as strings.
152     kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
153 };
154 
155 ///////////////////////////////////////////////////////////////////////////////
156 // Handler
157 
158 /*! \class rapidjson::Handler
159     \brief Concept for receiving events from GenericReader upon parsing.
160     The functions return true if no error occurs. If they return false,
161     the event publisher should terminate the process.
162 \code
163 concept Handler {
164     typename Ch;
165 
166     bool Null();
167     bool Bool(bool b);
168     bool Int(int i);
169     bool Uint(unsigned i);
170     bool Int64(int64_t i);
171     bool Uint64(uint64_t i);
172     bool Double(double d);
173     /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
174     bool RawNumber(const Ch* str, SizeType length, bool copy);
175     bool String(const Ch* str, SizeType length, bool copy);
176     bool StartObject();
177     bool Key(const Ch* str, SizeType length, bool copy);
178     bool EndObject(SizeType memberCount);
179     bool StartArray();
180     bool EndArray(SizeType elementCount);
181 };
182 \endcode
183 */
184 ///////////////////////////////////////////////////////////////////////////////
185 // BaseReaderHandler
186 
187 //! Default implementation of Handler.
188 /*! This can be used as base class of any reader handler.
189     \note implements Handler concept
190 */
191 template<typename Encoding = UTF8<>, typename Derived = void>
192 struct BaseReaderHandler {
193     typedef typename Encoding::Ch Ch;
194 
195     typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
196 
DefaultBaseReaderHandler197     bool Default() { return true; }
NullBaseReaderHandler198     bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler199     bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler200     bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler201     bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler202     bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler203     bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler204     bool Double(double) { return static_cast<Override&>(*this).Default(); }
205     /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
RawNumberBaseReaderHandler206     bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
StringBaseReaderHandler207     bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler208     bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler209     bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler210     bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler211     bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler212     bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
213 };
214 
215 ///////////////////////////////////////////////////////////////////////////////
216 // StreamLocalCopy
217 
218 namespace internal {
219 
220 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
221 class StreamLocalCopy;
222 
223 //! Do copy optimization.
224 template<typename Stream>
225 class StreamLocalCopy<Stream, 1> {
226 public:
StreamLocalCopy(Stream & original)227     StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()228     ~StreamLocalCopy() { original_ = s; }
229 
230     Stream s;
231 
232 private:
233     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
234 
235     Stream& original_;
236 };
237 
238 //! Keep reference.
239 template<typename Stream>
240 class StreamLocalCopy<Stream, 0> {
241 public:
StreamLocalCopy(Stream & original)242     StreamLocalCopy(Stream& original) : s(original) {}
243 
244     Stream& s;
245 
246 private:
247     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
248 };
249 
250 } // namespace internal
251 
252 ///////////////////////////////////////////////////////////////////////////////
253 // SkipWhitespace
254 
255 //! Skip the JSON white spaces in a stream.
256 /*! \param is A input stream for skipping white spaces.
257     \note This function has SSE2/SSE4.2 specialization.
258 */
259 template<typename InputStream>
SkipWhitespace(InputStream & is)260 void SkipWhitespace(InputStream& is) {
261     internal::StreamLocalCopy<InputStream> copy(is);
262     InputStream& s(copy.s);
263 
264     while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
265         s.Take();
266 }
267 
SkipWhitespace(const char * p,const char * end)268 inline const char* SkipWhitespace(const char* p, const char* end) {
269     while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
270         ++p;
271     return p;
272 }
273 
274 #ifdef RAPIDJSON_SSE42
275 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)276 inline const char *SkipWhitespace_SIMD(const char* p) {
277     // Fast return for single non-whitespace
278     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
279         ++p;
280     else
281         return p;
282 
283     // 16-byte align to the next boundary
284     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
285     while (p != nextAligned)
286         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
287             ++p;
288         else
289             return p;
290 
291     // The rest of string using SIMD
292     static const char whitespace[16] = " \n\r\t";
293     const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
294 
295     for (;; p += 16) {
296         const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
297         const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
298         if (r != 0) {   // some of characters is non-whitespace
299 #ifdef _MSC_VER         // Find the index of first non-whitespace
300             unsigned long offset;
301             _BitScanForward(&offset, r);
302             return p + offset;
303 #else
304             return p + __builtin_ffs(r) - 1;
305 #endif
306         }
307     }
308 }
309 
SkipWhitespace_SIMD(const char * p,const char * end)310 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
311     // Fast return for single non-whitespace
312     if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
313         ++p;
314     else
315         return p;
316 
317     // The middle of string using SIMD
318     static const char whitespace[16] = " \n\r\t";
319     const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
320 
321     for (; p <= end - 16; p += 16) {
322         const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
323         const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
324         if (r != 0) {   // some of characters is non-whitespace
325 #ifdef _MSC_VER         // Find the index of first non-whitespace
326             unsigned long offset;
327             _BitScanForward(&offset, r);
328             return p + offset;
329 #else
330             return p + __builtin_ffs(r) - 1;
331 #endif
332         }
333     }
334 
335     return SkipWhitespace(p, end);
336 }
337 
338 #elif defined(RAPIDJSON_SSE2)
339 
340 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)341 inline const char *SkipWhitespace_SIMD(const char* p) {
342     // Fast return for single non-whitespace
343     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
344         ++p;
345     else
346         return p;
347 
348     // 16-byte align to the next boundary
349     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
350     while (p != nextAligned)
351         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
352             ++p;
353         else
354             return p;
355 
356     // The rest of string
357     #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
358     static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
359     #undef C16
360 
361     const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
362     const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
363     const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
364     const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
365 
366     for (;; p += 16) {
367         const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
368         __m128i x = _mm_cmpeq_epi8(s, w0);
369         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
370         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
371         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
372         unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
373         if (r != 0) {   // some of characters may be non-whitespace
374 #ifdef _MSC_VER         // Find the index of first non-whitespace
375             unsigned long offset;
376             _BitScanForward(&offset, r);
377             return p + offset;
378 #else
379             return p + __builtin_ffs(r) - 1;
380 #endif
381         }
382     }
383 }
384 
SkipWhitespace_SIMD(const char * p,const char * end)385 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
386     // Fast return for single non-whitespace
387     if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
388         ++p;
389     else
390         return p;
391 
392     // The rest of string
393     #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
394     static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
395     #undef C16
396 
397     const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
398     const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
399     const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
400     const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
401 
402     for (; p <= end - 16; p += 16) {
403         const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
404         __m128i x = _mm_cmpeq_epi8(s, w0);
405         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
406         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
407         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
408         unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
409         if (r != 0) {   // some of characters may be non-whitespace
410 #ifdef _MSC_VER         // Find the index of first non-whitespace
411             unsigned long offset;
412             _BitScanForward(&offset, r);
413             return p + offset;
414 #else
415             return p + __builtin_ffs(r) - 1;
416 #endif
417         }
418     }
419 
420     return SkipWhitespace(p, end);
421 }
422 
423 #endif // RAPIDJSON_SSE2
424 
425 #ifdef RAPIDJSON_SIMD
426 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)427 template<> inline void SkipWhitespace(InsituStringStream& is) {
428     is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
429 }
430 
431 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)432 template<> inline void SkipWhitespace(StringStream& is) {
433     is.src_ = SkipWhitespace_SIMD(is.src_);
434 }
435 
SkipWhitespace(EncodedInputStream<UTF8<>,MemoryStream> & is)436 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
437     is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
438 }
439 #endif // RAPIDJSON_SIMD
440 
441 ///////////////////////////////////////////////////////////////////////////////
442 // GenericReader
443 
444 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
445 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
446     object implementing Handler concept.
447 
448     It needs to allocate a stack for storing a single decoded string during
449     non-destructive parsing.
450 
451     For in-situ parsing, the decoded string is directly written to the source
452     text string, no temporary buffer is required.
453 
454     A GenericReader object can be reused for parsing multiple JSON text.
455 
456     \tparam SourceEncoding Encoding of the input stream.
457     \tparam TargetEncoding Encoding of the parse output.
458     \tparam StackAllocator Allocator type for stack.
459 */
460 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
461 class GenericReader {
462 public:
463     typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
464 
465     //! Constructor.
466     /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
467         \param stackCapacity stack capacity in bytes for storing a single decoded string.  (Only use for non-destructive parsing)
468     */
stack_(stackAllocator,stackCapacity)469     GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
470 
471     //! Parse JSON text.
472     /*! \tparam parseFlags Combination of \ref ParseFlag.
473         \tparam InputStream Type of input stream, implementing Stream concept.
474         \tparam Handler Type of handler, implementing Handler concept.
475         \param is Input stream to be parsed.
476         \param handler The handler to receive events.
477         \return Whether the parsing is successful.
478     */
479     template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)480     ParseResult Parse(InputStream& is, Handler& handler) {
481         if (parseFlags & kParseIterativeFlag)
482             return IterativeParse<parseFlags>(is, handler);
483 
484         parseResult_.Clear();
485 
486         ClearStackOnExit scope(*this);
487 
488         SkipWhitespaceAndComments<parseFlags>(is);
489         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
490 
491         if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
492             RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
493             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
494         }
495         else {
496             ParseValue<parseFlags>(is, handler);
497             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
498 
499             if (!(parseFlags & kParseStopWhenDoneFlag)) {
500                 SkipWhitespaceAndComments<parseFlags>(is);
501                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
502 
503                 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
504                     RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
505                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
506                 }
507             }
508         }
509 
510         return parseResult_;
511     }
512 
513     //! Parse JSON text (with \ref kParseDefaultFlags)
514     /*! \tparam InputStream Type of input stream, implementing Stream concept
515         \tparam Handler Type of handler, implementing Handler concept.
516         \param is Input stream to be parsed.
517         \param handler The handler to receive events.
518         \return Whether the parsing is successful.
519     */
520     template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)521     ParseResult Parse(InputStream& is, Handler& handler) {
522         return Parse<kParseDefaultFlags>(is, handler);
523     }
524 
525     //! Whether a parse error has occured in the last parsing.
HasParseError()526     bool HasParseError() const { return parseResult_.IsError(); }
527 
528     //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()529     ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
530 
531     //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()532     size_t GetErrorOffset() const { return parseResult_.Offset(); }
533 
534 protected:
SetParseError(ParseErrorCode code,size_t offset)535     void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
536 
537 private:
538     // Prohibit copy constructor & assignment operator.
539     GenericReader(const GenericReader&);
540     GenericReader& operator=(const GenericReader&);
541 
ClearStack()542     void ClearStack() { stack_.Clear(); }
543 
544     // clear stack on any exit from ParseStream, e.g. due to exception
545     struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit546         explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit547         ~ClearStackOnExit() { r_.ClearStack(); }
548     private:
549         GenericReader& r_;
550         ClearStackOnExit(const ClearStackOnExit&);
551         ClearStackOnExit& operator=(const ClearStackOnExit&);
552     };
553 
554     template<unsigned parseFlags, typename InputStream>
SkipWhitespaceAndComments(InputStream & is)555     void SkipWhitespaceAndComments(InputStream& is) {
556         SkipWhitespace(is);
557 
558         if (parseFlags & kParseCommentsFlag) {
559             while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
560                 if (Consume(is, '*')) {
561                     while (true) {
562                         if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
563                             RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
564                         else if (Consume(is, '*')) {
565                             if (Consume(is, '/'))
566                                 break;
567                         }
568                         else
569                             is.Take();
570                     }
571                 }
572                 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
573                     while (is.Peek() != '\0' && is.Take() != '\n');
574                 else
575                     RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
576 
577                 SkipWhitespace(is);
578             }
579         }
580     }
581 
582     // Parse object: { string : value, ... }
583     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)584     void ParseObject(InputStream& is, Handler& handler) {
585         RAPIDJSON_ASSERT(is.Peek() == '{');
586         is.Take();  // Skip '{'
587 
588         if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
589             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
590 
591         SkipWhitespaceAndComments<parseFlags>(is);
592         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
593 
594         if (Consume(is, '}')) {
595             if (RAPIDJSON_UNLIKELY(!handler.EndObject(0)))  // empty object
596                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
597             return;
598         }
599 
600         for (SizeType memberCount = 0;;) {
601             if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
602                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
603 
604             ParseString<parseFlags>(is, handler, true);
605             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
606 
607             SkipWhitespaceAndComments<parseFlags>(is);
608             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
609 
610             if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
611                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
612 
613             SkipWhitespaceAndComments<parseFlags>(is);
614             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
615 
616             ParseValue<parseFlags>(is, handler);
617             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
618 
619             SkipWhitespaceAndComments<parseFlags>(is);
620             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
621 
622             ++memberCount;
623 
624             switch (is.Peek()) {
625                 case ',':
626                     is.Take();
627                     SkipWhitespaceAndComments<parseFlags>(is);
628                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
629                     break;
630                 case '}':
631                     is.Take();
632                     if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
633                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
634                     return;
635                 default:
636                     RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
637                     break;
638             }
639         }
640     }
641 
642     // Parse array: [ value, ... ]
643     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)644     void ParseArray(InputStream& is, Handler& handler) {
645         RAPIDJSON_ASSERT(is.Peek() == '[');
646         is.Take();  // Skip '['
647 
648         if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
649             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
650 
651         SkipWhitespaceAndComments<parseFlags>(is);
652         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
653 
654         if (Consume(is, ']')) {
655             if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
656                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
657             return;
658         }
659 
660         for (SizeType elementCount = 0;;) {
661             ParseValue<parseFlags>(is, handler);
662             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
663 
664             ++elementCount;
665             SkipWhitespaceAndComments<parseFlags>(is);
666             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
667 
668             if (Consume(is, ',')) {
669                 SkipWhitespaceAndComments<parseFlags>(is);
670                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
671             }
672             else if (Consume(is, ']')) {
673                 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
674                     RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
675                 return;
676             }
677             else
678                 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
679         }
680     }
681 
682     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)683     void ParseNull(InputStream& is, Handler& handler) {
684         RAPIDJSON_ASSERT(is.Peek() == 'n');
685         is.Take();
686 
687         if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
688             if (RAPIDJSON_UNLIKELY(!handler.Null()))
689                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
690         }
691         else
692             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
693     }
694 
695     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)696     void ParseTrue(InputStream& is, Handler& handler) {
697         RAPIDJSON_ASSERT(is.Peek() == 't');
698         is.Take();
699 
700         if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
701             if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
702                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
703         }
704         else
705             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
706     }
707 
708     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)709     void ParseFalse(InputStream& is, Handler& handler) {
710         RAPIDJSON_ASSERT(is.Peek() == 'f');
711         is.Take();
712 
713         if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
714             if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
715                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
716         }
717         else
718             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
719     }
720 
721     template<typename InputStream>
Consume(InputStream & is,typename InputStream::Ch expect)722     RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
723         if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
724             is.Take();
725             return true;
726         }
727         else
728             return false;
729     }
730 
731     // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
732     template<typename InputStream>
ParseHex4(InputStream & is,size_t escapeOffset)733     unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
734         unsigned codepoint = 0;
735         for (int i = 0; i < 4; i++) {
736             Ch c = is.Peek();
737             codepoint <<= 4;
738             codepoint += static_cast<unsigned>(c);
739             if (c >= '0' && c <= '9')
740                 codepoint -= '0';
741             else if (c >= 'A' && c <= 'F')
742                 codepoint -= 'A' - 10;
743             else if (c >= 'a' && c <= 'f')
744                 codepoint -= 'a' - 10;
745             else {
746                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
747                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
748             }
749             is.Take();
750         }
751         return codepoint;
752     }
753 
754     template <typename CharType>
755     class StackStream {
756     public:
757         typedef CharType Ch;
758 
StackStream(internal::Stack<StackAllocator> & stack)759         StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)760         RAPIDJSON_FORCEINLINE void Put(Ch c) {
761             *stack_.template Push<Ch>() = c;
762             ++length_;
763         }
764 
Push(SizeType count)765         RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
766             length_ += count;
767             return stack_.template Push<Ch>(count);
768         }
769 
Length()770         size_t Length() const { return length_; }
771 
Pop()772         Ch* Pop() {
773             return stack_.template Pop<Ch>(length_);
774         }
775 
776     private:
777         StackStream(const StackStream&);
778         StackStream& operator=(const StackStream&);
779 
780         internal::Stack<StackAllocator>& stack_;
781         SizeType length_;
782     };
783 
784     // Parse string and generate String event. Different code paths for kParseInsituFlag.
785     template<unsigned parseFlags, typename InputStream, typename Handler>
786     void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
787         internal::StreamLocalCopy<InputStream> copy(is);
788         InputStream& s(copy.s);
789 
790         RAPIDJSON_ASSERT(s.Peek() == '\"');
791         s.Take();  // Skip '\"'
792 
793         bool success = false;
794         if (parseFlags & kParseInsituFlag) {
795             typename InputStream::Ch *head = s.PutBegin();
796             ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
797             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
798             size_t length = s.PutEnd(head) - 1;
799             RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
800             const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
801             success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
802         }
803         else {
804             StackStream<typename TargetEncoding::Ch> stackStream(stack_);
805             ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
806             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
807             SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
808             const typename TargetEncoding::Ch* const str = stackStream.Pop();
809             success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
810         }
811         if (RAPIDJSON_UNLIKELY(!success))
812             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
813     }
814 
815     // Parse string to an output is
816     // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
817     template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)818     RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
819 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
820 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
821         static const char escape[256] = {
822             Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
823             Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
824             0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
825             0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
826             Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
827         };
828 #undef Z16
829 //!@endcond
830 
831         for (;;) {
832             // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
833             if (!(parseFlags & kParseValidateEncodingFlag))
834                 ScanCopyUnescapedString(is, os);
835 
836             Ch c = is.Peek();
837             if (RAPIDJSON_UNLIKELY(c == '\\')) {    // Escape
838                 size_t escapeOffset = is.Tell();    // For invalid escaping, report the inital '\\' as error offset
839                 is.Take();
840                 Ch e = is.Peek();
841                 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
842                     is.Take();
843                     os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
844                 }
845                 else if (RAPIDJSON_LIKELY(e == 'u')) {    // Unicode
846                     is.Take();
847                     unsigned codepoint = ParseHex4(is, escapeOffset);
848                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
849                     if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
850                         // Handle UTF-16 surrogate pair
851                         if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
852                             RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
853                         unsigned codepoint2 = ParseHex4(is, escapeOffset);
854                         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
855                         if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
856                             RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
857                         codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
858                     }
859                     TEncoding::Encode(os, codepoint);
860                 }
861                 else
862                     RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
863             }
864             else if (RAPIDJSON_UNLIKELY(c == '"')) {    // Closing double quote
865                 is.Take();
866                 os.Put('\0');   // null-terminate the string
867                 return;
868             }
869             else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
870                 if (c == '\0')
871                     RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
872                 else
873                     RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
874             }
875             else {
876                 size_t offset = is.Tell();
877                 if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
878                     !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
879                     !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
880                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
881             }
882         }
883     }
884 
885     template<typename InputStream, typename OutputStream>
ScanCopyUnescapedString(InputStream &,OutputStream &)886     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
887             // Do nothing for generic version
888     }
889 
890 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
891     // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)892     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
893         const char* p = is.src_;
894 
895         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
896         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
897         while (p != nextAligned)
898             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
899                 is.src_ = p;
900                 return;
901             }
902             else
903                 os.Put(*p++);
904 
905         // The rest of string using SIMD
906         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
907         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
908         static const char space[16]  = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
909         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
910         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
911         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
912 
913         for (;; p += 16) {
914             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
915             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
916             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
917             const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
918             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
919             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
920             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
921                 SizeType length;
922     #ifdef _MSC_VER         // Find the index of first escaped
923                 unsigned long offset;
924                 _BitScanForward(&offset, r);
925                 length = offset;
926     #else
927                 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
928     #endif
929                 char* q = reinterpret_cast<char*>(os.Push(length));
930                 for (size_t i = 0; i < length; i++)
931                     q[i] = p[i];
932 
933                 p += length;
934                 break;
935             }
936             _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
937         }
938 
939         is.src_ = p;
940     }
941 
942     // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)943     static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
944         RAPIDJSON_ASSERT(&is == &os);
945         (void)os;
946 
947         if (is.src_ == is.dst_) {
948             SkipUnescapedString(is);
949             return;
950         }
951 
952         char* p = is.src_;
953         char *q = is.dst_;
954 
955         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
956         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
957         while (p != nextAligned)
958             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
959                 is.src_ = p;
960                 is.dst_ = q;
961                 return;
962             }
963             else
964                 *q++ = *p++;
965 
966         // The rest of string using SIMD
967         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
968         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
969         static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
970         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
971         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
972         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
973 
974         for (;; p += 16, q += 16) {
975             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
976             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
977             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
978             const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
979             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
980             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
981             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
982                 size_t length;
983 #ifdef _MSC_VER         // Find the index of first escaped
984                 unsigned long offset;
985                 _BitScanForward(&offset, r);
986                 length = offset;
987 #else
988                 length = static_cast<size_t>(__builtin_ffs(r) - 1);
989 #endif
990                 for (const char* pend = p + length; p != pend; )
991                     *q++ = *p++;
992                 break;
993             }
994             _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
995         }
996 
997         is.src_ = p;
998         is.dst_ = q;
999     }
1000 
1001     // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1002     static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1003         RAPIDJSON_ASSERT(is.src_ == is.dst_);
1004         char* p = is.src_;
1005 
1006         // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1007         const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1008         for (; p != nextAligned; p++)
1009             if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1010                 is.src_ = is.dst_ = p;
1011                 return;
1012             }
1013 
1014         // The rest of string using SIMD
1015         static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1016         static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1017         static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
1018         const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1019         const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1020         const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1021 
1022         for (;; p += 16) {
1023             const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1024             const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1025             const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1026             const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1027             const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1028             unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1029             if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
1030                 size_t length;
1031 #ifdef _MSC_VER         // Find the index of first escaped
1032                 unsigned long offset;
1033                 _BitScanForward(&offset, r);
1034                 length = offset;
1035 #else
1036                 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1037 #endif
1038                 p += length;
1039                 break;
1040             }
1041         }
1042 
1043         is.src_ = is.dst_ = p;
1044     }
1045 #endif
1046 
1047     template<typename InputStream, bool backup>
1048     class NumberStream;
1049 
1050     template<typename InputStream>
1051     class NumberStream<InputStream, false> {
1052     public:
1053         typedef typename InputStream::Ch Ch;
1054 
NumberStream(GenericReader & reader,InputStream & s)1055         NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
~NumberStream()1056         ~NumberStream() {}
1057 
Peek()1058         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()1059         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()1060         RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Push(char)1061 		  RAPIDJSON_FORCEINLINE void Push(char) {}
1062 
Tell()1063         size_t Tell() { return is.Tell(); }
Length()1064         size_t Length() { return 0; }
Pop()1065         const char* Pop() { return 0; }
1066 
1067     protected:
1068         NumberStream& operator=(const NumberStream&);
1069 
1070         InputStream& is;
1071     };
1072 
1073     template<typename InputStream>
1074     class NumberStream<InputStream, true> : public NumberStream<InputStream, false> {
1075         typedef NumberStream<InputStream, false> Base;
1076     public:
NumberStream(GenericReader & reader,InputStream & is)1077         NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {}
~NumberStream()1078         ~NumberStream() {}
1079 
TakePush()1080         RAPIDJSON_FORCEINLINE Ch TakePush() {
1081             stackStream.Put(static_cast<char>(Base::is.Peek()));
1082             return Base::is.Take();
1083         }
1084 
Push(char c)1085 		  RAPIDJSON_FORCEINLINE void Push(char c) {
1086 			  stackStream.Put(c);
1087 		  }
1088 
Length()1089         size_t Length() { return stackStream.Length(); }
1090 
Pop()1091         const char* Pop() {
1092             stackStream.Put('\0');
1093             return stackStream.Pop();
1094         }
1095 
1096     private:
1097         StackStream<char> stackStream;
1098     };
1099 
1100     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)1101     void ParseNumber(InputStream& is, Handler& handler) {
1102         internal::StreamLocalCopy<InputStream> copy(is);
1103         NumberStream<InputStream,
1104             ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1105                 ((parseFlags & kParseInsituFlag) == 0) :
1106                 ((parseFlags & kParseFullPrecisionFlag) != 0)> s(*this, copy.s);
1107 
1108         size_t startOffset = s.Tell();
1109 
1110         // Parse minus
1111         bool minus = Consume(s, '-');
1112 
1113         // Parse int: zero / ( digit1-9 *DIGIT )
1114         unsigned i = 0;
1115         uint64_t i64 = 0;
1116         bool use64bit = false;
1117         int significandDigit = 0;
1118         if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1119             i = 0;
1120             s.TakePush();
1121         }
1122         else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1123             i = static_cast<unsigned>(s.TakePush() - '0');
1124 
1125             if (minus)
1126                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1127                     if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1128                         if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1129                             i64 = i;
1130                             use64bit = true;
1131                             break;
1132                         }
1133                     }
1134                     i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1135                     significandDigit++;
1136                 }
1137             else
1138                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1139                     if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1140                         if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1141                             i64 = i;
1142                             use64bit = true;
1143                             break;
1144                         }
1145                     }
1146                     i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1147                     significandDigit++;
1148                 }
1149         }
1150         else
1151             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1152 
1153         // Parse 64bit int
1154         bool useDouble = false;
1155         double d = 0.0;
1156         if (use64bit) {
1157             if (minus)
1158                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1159                      if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1160                         if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1161                             d = static_cast<double>(i64);
1162                             useDouble = true;
1163                             break;
1164                         }
1165                     i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1166                     significandDigit++;
1167                 }
1168             else
1169                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1170                     if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1171                         if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1172                             d = static_cast<double>(i64);
1173                             useDouble = true;
1174                             break;
1175                         }
1176                     i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1177                     significandDigit++;
1178                 }
1179         }
1180 
1181         // Force double for big integer
1182         if (useDouble) {
1183             while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1184                 if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0
1185                     RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1186                 d = d * 10 + (s.TakePush() - '0');
1187             }
1188         }
1189 
1190         // Parse frac = decimal-point 1*DIGIT
1191         int expFrac = 0;
1192         size_t decimalPosition;
1193         if (Consume(s, '.')) {
1194             if (((parseFlags & kParseNumbersAsStringsFlag) != 0) && ((parseFlags & kParseInsituFlag) == 0)) {
1195 					s.Push('.');
1196 				}
1197             decimalPosition = s.Length();
1198 
1199             if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1200                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
1201 
1202             if (!useDouble) {
1203 #if RAPIDJSON_64BIT
1204                 // Use i64 to store significand in 64-bit architecture
1205                 if (!use64bit)
1206                     i64 = i;
1207 
1208                 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1209                     if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1210                         break;
1211                     else {
1212                         i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1213                         --expFrac;
1214                         if (i64 != 0)
1215                             significandDigit++;
1216                     }
1217                 }
1218 
1219                 d = static_cast<double>(i64);
1220 #else
1221                 // Use double to store significand in 32-bit architecture
1222                 d = static_cast<double>(use64bit ? i64 : i);
1223 #endif
1224                 useDouble = true;
1225             }
1226 
1227             while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1228                 if (significandDigit < 17) {
1229                     d = d * 10.0 + (s.TakePush() - '0');
1230                     --expFrac;
1231                     if (RAPIDJSON_LIKELY(d > 0.0))
1232                         significandDigit++;
1233                 }
1234                 else
1235                     s.TakePush();
1236             }
1237         }
1238         else
1239             decimalPosition = s.Length(); // decimal position at the end of integer.
1240 
1241         // Parse exp = e [ minus / plus ] 1*DIGIT
1242         int exp = 0;
1243         if (Consume(s, 'e') || Consume(s, 'E')) {
1244             if ( ((parseFlags & kParseNumbersAsStringsFlag) != 0) && ((parseFlags & kParseInsituFlag) == 0) ) {
1245                 s.Push( 'e' );
1246             }
1247 
1248 			   if (!useDouble) {
1249                 d = static_cast<double>(use64bit ? i64 : i);
1250                 useDouble = true;
1251             }
1252 
1253             bool expMinus = false;
1254             if (Consume(s, '+'))
1255                 ;
1256             else if (Consume(s, '-'))
1257                 expMinus = true;
1258 
1259             if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1260                 exp = static_cast<int>(s.Take() - '0');
1261                 if (expMinus) {
1262                     while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1263                         exp = exp * 10 + static_cast<int>(s.Take() - '0');
1264                         if (exp >= 214748364) {                         // Issue #313: prevent overflow exponent
1265                             while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9'))  // Consume the rest of exponent
1266                                 s.Take();
1267                         }
1268                     }
1269                 }
1270                 else {  // positive exp
1271                     int maxExp = 308 - expFrac;
1272                     while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1273                         exp = exp * 10 + static_cast<int>(s.Take() - '0');
1274                         if (RAPIDJSON_UNLIKELY(exp > maxExp))
1275                             RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1276                     }
1277                 }
1278             }
1279             else
1280                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1281 
1282             if (expMinus)
1283                 exp = -exp;
1284         }
1285 
1286         // Finish parsing, call event according to the type of number.
1287         bool cont = true;
1288 
1289         if (parseFlags & kParseNumbersAsStringsFlag) {
1290             if (parseFlags & kParseInsituFlag) {
1291                 s.Pop();  // Pop stack no matter if it will be used or not.
1292                 typename InputStream::Ch* head = is.PutBegin();
1293                 const size_t length = s.Tell() - startOffset;
1294                 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1295                 // unable to insert the \0 character here, it will erase the comma after this number
1296                 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1297                 cont = handler.RawNumber(str, SizeType(length), false);
1298             }
1299             else {
1300                 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
1301                 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1302                 while (numCharsToCopy--) {
1303                     Transcoder<SourceEncoding, TargetEncoding>::Transcode(is, stackStream);
1304                 }
1305                 stackStream.Put('\0');
1306                 const typename TargetEncoding::Ch* str = stackStream.Pop();
1307                 const SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
1308                 cont = handler.RawNumber(str, SizeType(length), true);
1309             }
1310         }
1311         else {
1312            size_t length = s.Length();
1313            const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
1314 
1315            if (useDouble) {
1316                int p = exp + expFrac;
1317                if (parseFlags & kParseFullPrecisionFlag)
1318                    d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1319                else
1320                    d = internal::StrtodNormalPrecision(d, p);
1321 
1322                cont = handler.Double(minus ? -d : d);
1323            }
1324            else {
1325                if (use64bit) {
1326                    if (minus)
1327                        cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1328                    else
1329                        cont = handler.Uint64(i64);
1330                }
1331                else {
1332                    if (minus)
1333                        cont = handler.Int(static_cast<int32_t>(~i + 1));
1334                    else
1335                        cont = handler.Uint(i);
1336                }
1337            }
1338         }
1339         if (RAPIDJSON_UNLIKELY(!cont))
1340             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
1341     }
1342 
1343     // Parse any JSON value
1344     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)1345     void ParseValue(InputStream& is, Handler& handler) {
1346         switch (is.Peek()) {
1347             case 'n': ParseNull  <parseFlags>(is, handler); break;
1348             case 't': ParseTrue  <parseFlags>(is, handler); break;
1349             case 'f': ParseFalse <parseFlags>(is, handler); break;
1350             case '"': ParseString<parseFlags>(is, handler); break;
1351             case '{': ParseObject<parseFlags>(is, handler); break;
1352             case '[': ParseArray <parseFlags>(is, handler); break;
1353             default :
1354                       ParseNumber<parseFlags>(is, handler);
1355                       break;
1356 
1357         }
1358     }
1359 
1360     // Iterative Parsing
1361 
1362     // States
1363     enum IterativeParsingState {
1364         IterativeParsingStartState = 0,
1365         IterativeParsingFinishState,
1366         IterativeParsingErrorState,
1367 
1368         // Object states
1369         IterativeParsingObjectInitialState,
1370         IterativeParsingMemberKeyState,
1371         IterativeParsingKeyValueDelimiterState,
1372         IterativeParsingMemberValueState,
1373         IterativeParsingMemberDelimiterState,
1374         IterativeParsingObjectFinishState,
1375 
1376         // Array states
1377         IterativeParsingArrayInitialState,
1378         IterativeParsingElementState,
1379         IterativeParsingElementDelimiterState,
1380         IterativeParsingArrayFinishState,
1381 
1382         // Single value state
1383         IterativeParsingValueState
1384     };
1385 
1386     enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 };
1387 
1388     // Tokens
1389     enum Token {
1390         LeftBracketToken = 0,
1391         RightBracketToken,
1392 
1393         LeftCurlyBracketToken,
1394         RightCurlyBracketToken,
1395 
1396         CommaToken,
1397         ColonToken,
1398 
1399         StringToken,
1400         FalseToken,
1401         TrueToken,
1402         NullToken,
1403         NumberToken,
1404 
1405         kTokenCount
1406     };
1407 
Tokenize(Ch c)1408     RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1409 
1410 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1411 #define N NumberToken
1412 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1413         // Maps from ASCII to Token
1414         static const unsigned char tokenMap[256] = {
1415             N16, // 00~0F
1416             N16, // 10~1F
1417             N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1418             N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1419             N16, // 40~4F
1420             N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1421             N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1422             N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1423             N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1424         };
1425 #undef N
1426 #undef N16
1427 //!@endcond
1428 
1429         if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1430             return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1431         else
1432             return NumberToken;
1433     }
1434 
Predict(IterativeParsingState state,Token token)1435     RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1436         // current state x one lookahead token -> new state
1437         static const char G[cIterativeParsingStateCount][kTokenCount] = {
1438             // Start
1439             {
1440                 IterativeParsingArrayInitialState,  // Left bracket
1441                 IterativeParsingErrorState,         // Right bracket
1442                 IterativeParsingObjectInitialState, // Left curly bracket
1443                 IterativeParsingErrorState,         // Right curly bracket
1444                 IterativeParsingErrorState,         // Comma
1445                 IterativeParsingErrorState,         // Colon
1446                 IterativeParsingValueState,         // String
1447                 IterativeParsingValueState,         // False
1448                 IterativeParsingValueState,         // True
1449                 IterativeParsingValueState,         // Null
1450                 IterativeParsingValueState          // Number
1451             },
1452             // Finish(sink state)
1453             {
1454                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1455                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1456                 IterativeParsingErrorState
1457             },
1458             // Error(sink state)
1459             {
1460                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1461                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1462                 IterativeParsingErrorState
1463             },
1464             // ObjectInitial
1465             {
1466                 IterativeParsingErrorState,         // Left bracket
1467                 IterativeParsingErrorState,         // Right bracket
1468                 IterativeParsingErrorState,         // Left curly bracket
1469                 IterativeParsingObjectFinishState,  // Right curly bracket
1470                 IterativeParsingErrorState,         // Comma
1471                 IterativeParsingErrorState,         // Colon
1472                 IterativeParsingMemberKeyState,     // String
1473                 IterativeParsingErrorState,         // False
1474                 IterativeParsingErrorState,         // True
1475                 IterativeParsingErrorState,         // Null
1476                 IterativeParsingErrorState          // Number
1477             },
1478             // MemberKey
1479             {
1480                 IterativeParsingErrorState,             // Left bracket
1481                 IterativeParsingErrorState,             // Right bracket
1482                 IterativeParsingErrorState,             // Left curly bracket
1483                 IterativeParsingErrorState,             // Right curly bracket
1484                 IterativeParsingErrorState,             // Comma
1485                 IterativeParsingKeyValueDelimiterState, // Colon
1486                 IterativeParsingErrorState,             // String
1487                 IterativeParsingErrorState,             // False
1488                 IterativeParsingErrorState,             // True
1489                 IterativeParsingErrorState,             // Null
1490                 IterativeParsingErrorState              // Number
1491             },
1492             // KeyValueDelimiter
1493             {
1494                 IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
1495                 IterativeParsingErrorState,             // Right bracket
1496                 IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
1497                 IterativeParsingErrorState,             // Right curly bracket
1498                 IterativeParsingErrorState,             // Comma
1499                 IterativeParsingErrorState,             // Colon
1500                 IterativeParsingMemberValueState,       // String
1501                 IterativeParsingMemberValueState,       // False
1502                 IterativeParsingMemberValueState,       // True
1503                 IterativeParsingMemberValueState,       // Null
1504                 IterativeParsingMemberValueState        // Number
1505             },
1506             // MemberValue
1507             {
1508                 IterativeParsingErrorState,             // Left bracket
1509                 IterativeParsingErrorState,             // Right bracket
1510                 IterativeParsingErrorState,             // Left curly bracket
1511                 IterativeParsingObjectFinishState,      // Right curly bracket
1512                 IterativeParsingMemberDelimiterState,   // Comma
1513                 IterativeParsingErrorState,             // Colon
1514                 IterativeParsingErrorState,             // String
1515                 IterativeParsingErrorState,             // False
1516                 IterativeParsingErrorState,             // True
1517                 IterativeParsingErrorState,             // Null
1518                 IterativeParsingErrorState              // Number
1519             },
1520             // MemberDelimiter
1521             {
1522                 IterativeParsingErrorState,         // Left bracket
1523                 IterativeParsingErrorState,         // Right bracket
1524                 IterativeParsingErrorState,         // Left curly bracket
1525                 IterativeParsingErrorState,         // Right curly bracket
1526                 IterativeParsingErrorState,         // Comma
1527                 IterativeParsingErrorState,         // Colon
1528                 IterativeParsingMemberKeyState,     // String
1529                 IterativeParsingErrorState,         // False
1530                 IterativeParsingErrorState,         // True
1531                 IterativeParsingErrorState,         // Null
1532                 IterativeParsingErrorState          // Number
1533             },
1534             // ObjectFinish(sink state)
1535             {
1536                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1537                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1538                 IterativeParsingErrorState
1539             },
1540             // ArrayInitial
1541             {
1542                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
1543                 IterativeParsingArrayFinishState,       // Right bracket
1544                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
1545                 IterativeParsingErrorState,             // Right curly bracket
1546                 IterativeParsingErrorState,             // Comma
1547                 IterativeParsingErrorState,             // Colon
1548                 IterativeParsingElementState,           // String
1549                 IterativeParsingElementState,           // False
1550                 IterativeParsingElementState,           // True
1551                 IterativeParsingElementState,           // Null
1552                 IterativeParsingElementState            // Number
1553             },
1554             // Element
1555             {
1556                 IterativeParsingErrorState,             // Left bracket
1557                 IterativeParsingArrayFinishState,       // Right bracket
1558                 IterativeParsingErrorState,             // Left curly bracket
1559                 IterativeParsingErrorState,             // Right curly bracket
1560                 IterativeParsingElementDelimiterState,  // Comma
1561                 IterativeParsingErrorState,             // Colon
1562                 IterativeParsingErrorState,             // String
1563                 IterativeParsingErrorState,             // False
1564                 IterativeParsingErrorState,             // True
1565                 IterativeParsingErrorState,             // Null
1566                 IterativeParsingErrorState              // Number
1567             },
1568             // ElementDelimiter
1569             {
1570                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
1571                 IterativeParsingErrorState,             // Right bracket
1572                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
1573                 IterativeParsingErrorState,             // Right curly bracket
1574                 IterativeParsingErrorState,             // Comma
1575                 IterativeParsingErrorState,             // Colon
1576                 IterativeParsingElementState,           // String
1577                 IterativeParsingElementState,           // False
1578                 IterativeParsingElementState,           // True
1579                 IterativeParsingElementState,           // Null
1580                 IterativeParsingElementState            // Number
1581             },
1582             // ArrayFinish(sink state)
1583             {
1584                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1585                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1586                 IterativeParsingErrorState
1587             },
1588             // Single Value (sink state)
1589             {
1590                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1591                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1592                 IterativeParsingErrorState
1593             }
1594         }; // End of G
1595 
1596         return static_cast<IterativeParsingState>(G[state][token]);
1597     }
1598 
1599     // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1600     // May return a new state on state pop.
1601     template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1602     RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1603         (void)token;
1604 
1605         switch (dst) {
1606         case IterativeParsingErrorState:
1607             return dst;
1608 
1609         case IterativeParsingObjectInitialState:
1610         case IterativeParsingArrayInitialState:
1611         {
1612             // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1613             // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1614             IterativeParsingState n = src;
1615             if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1616                 n = IterativeParsingElementState;
1617             else if (src == IterativeParsingKeyValueDelimiterState)
1618                 n = IterativeParsingMemberValueState;
1619             // Push current state.
1620             *stack_.template Push<SizeType>(1) = n;
1621             // Initialize and push the member/element count.
1622             *stack_.template Push<SizeType>(1) = 0;
1623             // Call handler
1624             bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1625             // On handler short circuits the parsing.
1626             if (!hr) {
1627                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1628                 return IterativeParsingErrorState;
1629             }
1630             else {
1631                 is.Take();
1632                 return dst;
1633             }
1634         }
1635 
1636         case IterativeParsingMemberKeyState:
1637             ParseString<parseFlags>(is, handler, true);
1638             if (HasParseError())
1639                 return IterativeParsingErrorState;
1640             else
1641                 return dst;
1642 
1643         case IterativeParsingKeyValueDelimiterState:
1644             RAPIDJSON_ASSERT(token == ColonToken);
1645             is.Take();
1646             return dst;
1647 
1648         case IterativeParsingMemberValueState:
1649             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1650             ParseValue<parseFlags>(is, handler);
1651             if (HasParseError()) {
1652                 return IterativeParsingErrorState;
1653             }
1654             return dst;
1655 
1656         case IterativeParsingElementState:
1657             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1658             ParseValue<parseFlags>(is, handler);
1659             if (HasParseError()) {
1660                 return IterativeParsingErrorState;
1661             }
1662             return dst;
1663 
1664         case IterativeParsingMemberDelimiterState:
1665         case IterativeParsingElementDelimiterState:
1666             is.Take();
1667             // Update member/element count.
1668             *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1669             return dst;
1670 
1671         case IterativeParsingObjectFinishState:
1672         {
1673             // Get member count.
1674             SizeType c = *stack_.template Pop<SizeType>(1);
1675             // If the object is not empty, count the last member.
1676             if (src == IterativeParsingMemberValueState)
1677                 ++c;
1678             // Restore the state.
1679             IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1680             // Transit to Finish state if this is the topmost scope.
1681             if (n == IterativeParsingStartState)
1682                 n = IterativeParsingFinishState;
1683             // Call handler
1684             bool hr = handler.EndObject(c);
1685             // On handler short circuits the parsing.
1686             if (!hr) {
1687                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1688                 return IterativeParsingErrorState;
1689             }
1690             else {
1691                 is.Take();
1692                 return n;
1693             }
1694         }
1695 
1696         case IterativeParsingArrayFinishState:
1697         {
1698             // Get element count.
1699             SizeType c = *stack_.template Pop<SizeType>(1);
1700             // If the array is not empty, count the last element.
1701             if (src == IterativeParsingElementState)
1702                 ++c;
1703             // Restore the state.
1704             IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1705             // Transit to Finish state if this is the topmost scope.
1706             if (n == IterativeParsingStartState)
1707                 n = IterativeParsingFinishState;
1708             // Call handler
1709             bool hr = handler.EndArray(c);
1710             // On handler short circuits the parsing.
1711             if (!hr) {
1712                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1713                 return IterativeParsingErrorState;
1714             }
1715             else {
1716                 is.Take();
1717                 return n;
1718             }
1719         }
1720 
1721         default:
1722             // This branch is for IterativeParsingValueState actually.
1723             // Use `default:` rather than
1724             // `case IterativeParsingValueState:` is for code coverage.
1725 
1726             // The IterativeParsingStartState is not enumerated in this switch-case.
1727             // It is impossible for that case. And it can be caught by following assertion.
1728 
1729             // The IterativeParsingFinishState is not enumerated in this switch-case either.
1730             // It is a "derivative" state which cannot triggered from Predict() directly.
1731             // Therefore it cannot happen here. And it can be caught by following assertion.
1732             RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1733 
1734             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1735             ParseValue<parseFlags>(is, handler);
1736             if (HasParseError()) {
1737                 return IterativeParsingErrorState;
1738             }
1739             return IterativeParsingFinishState;
1740         }
1741     }
1742 
1743     template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)1744     void HandleError(IterativeParsingState src, InputStream& is) {
1745         if (HasParseError()) {
1746             // Error flag has been set.
1747             return;
1748         }
1749 
1750         switch (src) {
1751         case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1752         case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1753         case IterativeParsingObjectInitialState:
1754         case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1755         case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1756         case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1757         case IterativeParsingElementState:          RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1758         default:                                    RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); return;
1759         }
1760     }
1761 
1762     template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)1763     ParseResult IterativeParse(InputStream& is, Handler& handler) {
1764         parseResult_.Clear();
1765         ClearStackOnExit scope(*this);
1766         IterativeParsingState state = IterativeParsingStartState;
1767 
1768         SkipWhitespaceAndComments<parseFlags>(is);
1769         RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1770         while (is.Peek() != '\0') {
1771             Token t = Tokenize(is.Peek());
1772             IterativeParsingState n = Predict(state, t);
1773             IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1774 
1775             if (d == IterativeParsingErrorState) {
1776                 HandleError(state, is);
1777                 break;
1778             }
1779 
1780             state = d;
1781 
1782             // Do not further consume streams if a root JSON has been parsed.
1783             if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1784                 break;
1785 
1786             SkipWhitespaceAndComments<parseFlags>(is);
1787             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1788         }
1789 
1790         // Handle the end of file.
1791         if (state != IterativeParsingFinishState)
1792             HandleError(state, is);
1793 
1794         return parseResult_;
1795     }
1796 
1797     static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
1798     internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
1799     ParseResult parseResult_;
1800 }; // class GenericReader
1801 
1802 //! Reader with UTF8 encoding and default allocator.
1803 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1804 
1805 RAPIDJSON_NAMESPACE_END
1806 
1807 #ifdef __clang__
1808 RAPIDJSON_DIAG_POP
1809 #endif
1810 
1811 
1812 #ifdef __GNUC__
1813 RAPIDJSON_DIAG_POP
1814 #endif
1815 
1816 #ifdef _MSC_VER
1817 RAPIDJSON_DIAG_POP
1818 #endif
1819 
1820 #endif // RAPIDJSON_READER_H_
1821