1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17
18 /*! \file reader.h */
19
20 #include "allocators.h"
21 #include "stream.h"
22 #include "encodedstream.h"
23 #include "internal/meta.h"
24 #include "internal/stack.h"
25 #include "internal/strtod.h"
26
27 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
28 #include <intrin.h>
29 #pragma intrinsic(_BitScanForward)
30 #endif
31 #ifdef RAPIDJSON_SSE42
32 #include <nmmintrin.h>
33 #elif defined(RAPIDJSON_SSE2)
34 #include <emmintrin.h>
35 #endif
36
37 #ifdef _MSC_VER
38 RAPIDJSON_DIAG_PUSH
39 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
40 RAPIDJSON_DIAG_OFF(4702) // unreachable code
41 #endif
42
43 #ifdef __clang__
44 RAPIDJSON_DIAG_PUSH
45 RAPIDJSON_DIAG_OFF(padded)
46 RAPIDJSON_DIAG_OFF(switch-enum)
47 #endif
48
49 #ifdef __GNUC__
50 RAPIDJSON_DIAG_PUSH
51 RAPIDJSON_DIAG_OFF(effc++)
52 #endif
53
54 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
55 #define RAPIDJSON_NOTHING /* deliberately empty */
56 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
57 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
58 RAPIDJSON_MULTILINEMACRO_BEGIN \
59 if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
60 RAPIDJSON_MULTILINEMACRO_END
61 #endif
62 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
63 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
64 //!@endcond
65
66 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
67 \ingroup RAPIDJSON_ERRORS
68 \brief Macro to indicate a parse error.
69 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
70 \param offset position of the error in JSON input (\c size_t)
71
72 This macros can be used as a customization point for the internal
73 error handling mechanism of RapidJSON.
74
75 A common usage model is to throw an exception instead of requiring the
76 caller to explicitly check the \ref rapidjson::GenericReader::Parse's
77 return value:
78
79 \code
80 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
81 throw ParseException(parseErrorCode, #parseErrorCode, offset)
82
83 #include <stdexcept> // std::runtime_error
84 #include "rapidjson/error/error.h" // rapidjson::ParseResult
85
86 struct ParseException : std::runtime_error, rapidjson::ParseResult {
87 ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
88 : std::runtime_error(msg), ParseResult(code, offset) {}
89 };
90
91 #include "rapidjson/reader.h"
92 \endcode
93
94 \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
95 */
96 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
97 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
98 RAPIDJSON_MULTILINEMACRO_BEGIN \
99 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
100 SetParseError(parseErrorCode, offset); \
101 RAPIDJSON_MULTILINEMACRO_END
102 #endif
103
104 /*! \def RAPIDJSON_PARSE_ERROR
105 \ingroup RAPIDJSON_ERRORS
106 \brief (Internal) macro to indicate and handle a parse error.
107 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
108 \param offset position of the error in JSON input (\c size_t)
109
110 Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
111
112 \see RAPIDJSON_PARSE_ERROR_NORETURN
113 \hideinitializer
114 */
115 #ifndef RAPIDJSON_PARSE_ERROR
116 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
117 RAPIDJSON_MULTILINEMACRO_BEGIN \
118 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
119 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
120 RAPIDJSON_MULTILINEMACRO_END
121 #endif
122
123 #include "error/error.h" // ParseErrorCode, ParseResult
124
125 RAPIDJSON_NAMESPACE_BEGIN
126
127 ///////////////////////////////////////////////////////////////////////////////
128 // ParseFlag
129
130 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
131 \ingroup RAPIDJSON_CONFIG
132 \brief User-defined kParseDefaultFlags definition.
133
134 User can define this as any \c ParseFlag combinations.
135 */
136 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
137 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
138 #endif
139
140 //! Combination of parseFlags
141 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
142 */
143 enum ParseFlag {
144 kParseNoFlags = 0, //!< No flags are set.
145 kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
146 kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
147 kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
148 kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
149 kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
150 kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
151 kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
152 kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
153 };
154
155 ///////////////////////////////////////////////////////////////////////////////
156 // Handler
157
158 /*! \class rapidjson::Handler
159 \brief Concept for receiving events from GenericReader upon parsing.
160 The functions return true if no error occurs. If they return false,
161 the event publisher should terminate the process.
162 \code
163 concept Handler {
164 typename Ch;
165
166 bool Null();
167 bool Bool(bool b);
168 bool Int(int i);
169 bool Uint(unsigned i);
170 bool Int64(int64_t i);
171 bool Uint64(uint64_t i);
172 bool Double(double d);
173 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
174 bool RawNumber(const Ch* str, SizeType length, bool copy);
175 bool String(const Ch* str, SizeType length, bool copy);
176 bool StartObject();
177 bool Key(const Ch* str, SizeType length, bool copy);
178 bool EndObject(SizeType memberCount);
179 bool StartArray();
180 bool EndArray(SizeType elementCount);
181 };
182 \endcode
183 */
184 ///////////////////////////////////////////////////////////////////////////////
185 // BaseReaderHandler
186
187 //! Default implementation of Handler.
188 /*! This can be used as base class of any reader handler.
189 \note implements Handler concept
190 */
191 template<typename Encoding = UTF8<>, typename Derived = void>
192 struct BaseReaderHandler {
193 typedef typename Encoding::Ch Ch;
194
195 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
196
DefaultBaseReaderHandler197 bool Default() { return true; }
NullBaseReaderHandler198 bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler199 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler200 bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler201 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler202 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler203 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler204 bool Double(double) { return static_cast<Override&>(*this).Default(); }
205 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
RawNumberBaseReaderHandler206 bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
StringBaseReaderHandler207 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler208 bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler209 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler210 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler211 bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler212 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
213 };
214
215 ///////////////////////////////////////////////////////////////////////////////
216 // StreamLocalCopy
217
218 namespace internal {
219
220 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
221 class StreamLocalCopy;
222
223 //! Do copy optimization.
224 template<typename Stream>
225 class StreamLocalCopy<Stream, 1> {
226 public:
StreamLocalCopy(Stream & original)227 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()228 ~StreamLocalCopy() { original_ = s; }
229
230 Stream s;
231
232 private:
233 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
234
235 Stream& original_;
236 };
237
238 //! Keep reference.
239 template<typename Stream>
240 class StreamLocalCopy<Stream, 0> {
241 public:
StreamLocalCopy(Stream & original)242 StreamLocalCopy(Stream& original) : s(original) {}
243
244 Stream& s;
245
246 private:
247 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
248 };
249
250 } // namespace internal
251
252 ///////////////////////////////////////////////////////////////////////////////
253 // SkipWhitespace
254
255 //! Skip the JSON white spaces in a stream.
256 /*! \param is A input stream for skipping white spaces.
257 \note This function has SSE2/SSE4.2 specialization.
258 */
259 template<typename InputStream>
SkipWhitespace(InputStream & is)260 void SkipWhitespace(InputStream& is) {
261 internal::StreamLocalCopy<InputStream> copy(is);
262 InputStream& s(copy.s);
263
264 while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
265 s.Take();
266 }
267
SkipWhitespace(const char * p,const char * end)268 inline const char* SkipWhitespace(const char* p, const char* end) {
269 while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
270 ++p;
271 return p;
272 }
273
274 #ifdef RAPIDJSON_SSE42
275 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)276 inline const char *SkipWhitespace_SIMD(const char* p) {
277 // Fast return for single non-whitespace
278 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
279 ++p;
280 else
281 return p;
282
283 // 16-byte align to the next boundary
284 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
285 while (p != nextAligned)
286 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
287 ++p;
288 else
289 return p;
290
291 // The rest of string using SIMD
292 static const char whitespace[16] = " \n\r\t";
293 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
294
295 for (;; p += 16) {
296 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
297 const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
298 if (r != 0) { // some of characters is non-whitespace
299 #ifdef _MSC_VER // Find the index of first non-whitespace
300 unsigned long offset;
301 _BitScanForward(&offset, r);
302 return p + offset;
303 #else
304 return p + __builtin_ffs(r) - 1;
305 #endif
306 }
307 }
308 }
309
SkipWhitespace_SIMD(const char * p,const char * end)310 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
311 // Fast return for single non-whitespace
312 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
313 ++p;
314 else
315 return p;
316
317 // The middle of string using SIMD
318 static const char whitespace[16] = " \n\r\t";
319 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
320
321 for (; p <= end - 16; p += 16) {
322 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
323 const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
324 if (r != 0) { // some of characters is non-whitespace
325 #ifdef _MSC_VER // Find the index of first non-whitespace
326 unsigned long offset;
327 _BitScanForward(&offset, r);
328 return p + offset;
329 #else
330 return p + __builtin_ffs(r) - 1;
331 #endif
332 }
333 }
334
335 return SkipWhitespace(p, end);
336 }
337
338 #elif defined(RAPIDJSON_SSE2)
339
340 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)341 inline const char *SkipWhitespace_SIMD(const char* p) {
342 // Fast return for single non-whitespace
343 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
344 ++p;
345 else
346 return p;
347
348 // 16-byte align to the next boundary
349 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
350 while (p != nextAligned)
351 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
352 ++p;
353 else
354 return p;
355
356 // The rest of string
357 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
358 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
359 #undef C16
360
361 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
362 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
363 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
364 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
365
366 for (;; p += 16) {
367 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
368 __m128i x = _mm_cmpeq_epi8(s, w0);
369 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
370 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
371 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
372 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
373 if (r != 0) { // some of characters may be non-whitespace
374 #ifdef _MSC_VER // Find the index of first non-whitespace
375 unsigned long offset;
376 _BitScanForward(&offset, r);
377 return p + offset;
378 #else
379 return p + __builtin_ffs(r) - 1;
380 #endif
381 }
382 }
383 }
384
SkipWhitespace_SIMD(const char * p,const char * end)385 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
386 // Fast return for single non-whitespace
387 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
388 ++p;
389 else
390 return p;
391
392 // The rest of string
393 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
394 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
395 #undef C16
396
397 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
398 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
399 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
400 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
401
402 for (; p <= end - 16; p += 16) {
403 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
404 __m128i x = _mm_cmpeq_epi8(s, w0);
405 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
406 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
407 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
408 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
409 if (r != 0) { // some of characters may be non-whitespace
410 #ifdef _MSC_VER // Find the index of first non-whitespace
411 unsigned long offset;
412 _BitScanForward(&offset, r);
413 return p + offset;
414 #else
415 return p + __builtin_ffs(r) - 1;
416 #endif
417 }
418 }
419
420 return SkipWhitespace(p, end);
421 }
422
423 #endif // RAPIDJSON_SSE2
424
425 #ifdef RAPIDJSON_SIMD
426 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)427 template<> inline void SkipWhitespace(InsituStringStream& is) {
428 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
429 }
430
431 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)432 template<> inline void SkipWhitespace(StringStream& is) {
433 is.src_ = SkipWhitespace_SIMD(is.src_);
434 }
435
SkipWhitespace(EncodedInputStream<UTF8<>,MemoryStream> & is)436 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
437 is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
438 }
439 #endif // RAPIDJSON_SIMD
440
441 ///////////////////////////////////////////////////////////////////////////////
442 // GenericReader
443
444 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
445 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
446 object implementing Handler concept.
447
448 It needs to allocate a stack for storing a single decoded string during
449 non-destructive parsing.
450
451 For in-situ parsing, the decoded string is directly written to the source
452 text string, no temporary buffer is required.
453
454 A GenericReader object can be reused for parsing multiple JSON text.
455
456 \tparam SourceEncoding Encoding of the input stream.
457 \tparam TargetEncoding Encoding of the parse output.
458 \tparam StackAllocator Allocator type for stack.
459 */
460 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
461 class GenericReader {
462 public:
463 typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
464
465 //! Constructor.
466 /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
467 \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
468 */
stack_(stackAllocator,stackCapacity)469 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
470
471 //! Parse JSON text.
472 /*! \tparam parseFlags Combination of \ref ParseFlag.
473 \tparam InputStream Type of input stream, implementing Stream concept.
474 \tparam Handler Type of handler, implementing Handler concept.
475 \param is Input stream to be parsed.
476 \param handler The handler to receive events.
477 \return Whether the parsing is successful.
478 */
479 template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)480 ParseResult Parse(InputStream& is, Handler& handler) {
481 if (parseFlags & kParseIterativeFlag)
482 return IterativeParse<parseFlags>(is, handler);
483
484 parseResult_.Clear();
485
486 ClearStackOnExit scope(*this);
487
488 SkipWhitespaceAndComments<parseFlags>(is);
489 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
490
491 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
492 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
493 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
494 }
495 else {
496 ParseValue<parseFlags>(is, handler);
497 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
498
499 if (!(parseFlags & kParseStopWhenDoneFlag)) {
500 SkipWhitespaceAndComments<parseFlags>(is);
501 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
502
503 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
504 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
505 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
506 }
507 }
508 }
509
510 return parseResult_;
511 }
512
513 //! Parse JSON text (with \ref kParseDefaultFlags)
514 /*! \tparam InputStream Type of input stream, implementing Stream concept
515 \tparam Handler Type of handler, implementing Handler concept.
516 \param is Input stream to be parsed.
517 \param handler The handler to receive events.
518 \return Whether the parsing is successful.
519 */
520 template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)521 ParseResult Parse(InputStream& is, Handler& handler) {
522 return Parse<kParseDefaultFlags>(is, handler);
523 }
524
525 //! Whether a parse error has occured in the last parsing.
HasParseError()526 bool HasParseError() const { return parseResult_.IsError(); }
527
528 //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()529 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
530
531 //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()532 size_t GetErrorOffset() const { return parseResult_.Offset(); }
533
534 protected:
SetParseError(ParseErrorCode code,size_t offset)535 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
536
537 private:
538 // Prohibit copy constructor & assignment operator.
539 GenericReader(const GenericReader&);
540 GenericReader& operator=(const GenericReader&);
541
ClearStack()542 void ClearStack() { stack_.Clear(); }
543
544 // clear stack on any exit from ParseStream, e.g. due to exception
545 struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit546 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit547 ~ClearStackOnExit() { r_.ClearStack(); }
548 private:
549 GenericReader& r_;
550 ClearStackOnExit(const ClearStackOnExit&);
551 ClearStackOnExit& operator=(const ClearStackOnExit&);
552 };
553
554 template<unsigned parseFlags, typename InputStream>
SkipWhitespaceAndComments(InputStream & is)555 void SkipWhitespaceAndComments(InputStream& is) {
556 SkipWhitespace(is);
557
558 if (parseFlags & kParseCommentsFlag) {
559 while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
560 if (Consume(is, '*')) {
561 while (true) {
562 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
563 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
564 else if (Consume(is, '*')) {
565 if (Consume(is, '/'))
566 break;
567 }
568 else
569 is.Take();
570 }
571 }
572 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
573 while (is.Peek() != '\0' && is.Take() != '\n');
574 else
575 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
576
577 SkipWhitespace(is);
578 }
579 }
580 }
581
582 // Parse object: { string : value, ... }
583 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)584 void ParseObject(InputStream& is, Handler& handler) {
585 RAPIDJSON_ASSERT(is.Peek() == '{');
586 is.Take(); // Skip '{'
587
588 if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
589 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
590
591 SkipWhitespaceAndComments<parseFlags>(is);
592 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
593
594 if (Consume(is, '}')) {
595 if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
596 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
597 return;
598 }
599
600 for (SizeType memberCount = 0;;) {
601 if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
602 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
603
604 ParseString<parseFlags>(is, handler, true);
605 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
606
607 SkipWhitespaceAndComments<parseFlags>(is);
608 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
609
610 if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
611 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
612
613 SkipWhitespaceAndComments<parseFlags>(is);
614 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
615
616 ParseValue<parseFlags>(is, handler);
617 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
618
619 SkipWhitespaceAndComments<parseFlags>(is);
620 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
621
622 ++memberCount;
623
624 switch (is.Peek()) {
625 case ',':
626 is.Take();
627 SkipWhitespaceAndComments<parseFlags>(is);
628 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
629 break;
630 case '}':
631 is.Take();
632 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
633 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
634 return;
635 default:
636 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
637 break;
638 }
639 }
640 }
641
642 // Parse array: [ value, ... ]
643 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)644 void ParseArray(InputStream& is, Handler& handler) {
645 RAPIDJSON_ASSERT(is.Peek() == '[');
646 is.Take(); // Skip '['
647
648 if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
649 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
650
651 SkipWhitespaceAndComments<parseFlags>(is);
652 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
653
654 if (Consume(is, ']')) {
655 if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
656 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
657 return;
658 }
659
660 for (SizeType elementCount = 0;;) {
661 ParseValue<parseFlags>(is, handler);
662 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
663
664 ++elementCount;
665 SkipWhitespaceAndComments<parseFlags>(is);
666 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
667
668 if (Consume(is, ',')) {
669 SkipWhitespaceAndComments<parseFlags>(is);
670 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
671 }
672 else if (Consume(is, ']')) {
673 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
674 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
675 return;
676 }
677 else
678 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
679 }
680 }
681
682 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)683 void ParseNull(InputStream& is, Handler& handler) {
684 RAPIDJSON_ASSERT(is.Peek() == 'n');
685 is.Take();
686
687 if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
688 if (RAPIDJSON_UNLIKELY(!handler.Null()))
689 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
690 }
691 else
692 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
693 }
694
695 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)696 void ParseTrue(InputStream& is, Handler& handler) {
697 RAPIDJSON_ASSERT(is.Peek() == 't');
698 is.Take();
699
700 if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
701 if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
702 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
703 }
704 else
705 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
706 }
707
708 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)709 void ParseFalse(InputStream& is, Handler& handler) {
710 RAPIDJSON_ASSERT(is.Peek() == 'f');
711 is.Take();
712
713 if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
714 if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
715 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
716 }
717 else
718 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
719 }
720
721 template<typename InputStream>
Consume(InputStream & is,typename InputStream::Ch expect)722 RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
723 if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
724 is.Take();
725 return true;
726 }
727 else
728 return false;
729 }
730
731 // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
732 template<typename InputStream>
ParseHex4(InputStream & is,size_t escapeOffset)733 unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
734 unsigned codepoint = 0;
735 for (int i = 0; i < 4; i++) {
736 Ch c = is.Peek();
737 codepoint <<= 4;
738 codepoint += static_cast<unsigned>(c);
739 if (c >= '0' && c <= '9')
740 codepoint -= '0';
741 else if (c >= 'A' && c <= 'F')
742 codepoint -= 'A' - 10;
743 else if (c >= 'a' && c <= 'f')
744 codepoint -= 'a' - 10;
745 else {
746 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
747 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
748 }
749 is.Take();
750 }
751 return codepoint;
752 }
753
754 template <typename CharType>
755 class StackStream {
756 public:
757 typedef CharType Ch;
758
StackStream(internal::Stack<StackAllocator> & stack)759 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)760 RAPIDJSON_FORCEINLINE void Put(Ch c) {
761 *stack_.template Push<Ch>() = c;
762 ++length_;
763 }
764
Push(SizeType count)765 RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
766 length_ += count;
767 return stack_.template Push<Ch>(count);
768 }
769
Length()770 size_t Length() const { return length_; }
771
Pop()772 Ch* Pop() {
773 return stack_.template Pop<Ch>(length_);
774 }
775
776 private:
777 StackStream(const StackStream&);
778 StackStream& operator=(const StackStream&);
779
780 internal::Stack<StackAllocator>& stack_;
781 SizeType length_;
782 };
783
784 // Parse string and generate String event. Different code paths for kParseInsituFlag.
785 template<unsigned parseFlags, typename InputStream, typename Handler>
786 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
787 internal::StreamLocalCopy<InputStream> copy(is);
788 InputStream& s(copy.s);
789
790 RAPIDJSON_ASSERT(s.Peek() == '\"');
791 s.Take(); // Skip '\"'
792
793 bool success = false;
794 if (parseFlags & kParseInsituFlag) {
795 typename InputStream::Ch *head = s.PutBegin();
796 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
797 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
798 size_t length = s.PutEnd(head) - 1;
799 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
800 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
801 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
802 }
803 else {
804 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
805 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
806 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
807 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
808 const typename TargetEncoding::Ch* const str = stackStream.Pop();
809 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
810 }
811 if (RAPIDJSON_UNLIKELY(!success))
812 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
813 }
814
815 // Parse string to an output is
816 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
817 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)818 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
819 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
820 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
821 static const char escape[256] = {
822 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
823 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
824 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
825 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
826 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
827 };
828 #undef Z16
829 //!@endcond
830
831 for (;;) {
832 // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
833 if (!(parseFlags & kParseValidateEncodingFlag))
834 ScanCopyUnescapedString(is, os);
835
836 Ch c = is.Peek();
837 if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
838 size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset
839 is.Take();
840 Ch e = is.Peek();
841 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
842 is.Take();
843 os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
844 }
845 else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
846 is.Take();
847 unsigned codepoint = ParseHex4(is, escapeOffset);
848 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
849 if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
850 // Handle UTF-16 surrogate pair
851 if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
852 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
853 unsigned codepoint2 = ParseHex4(is, escapeOffset);
854 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
855 if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
856 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
857 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
858 }
859 TEncoding::Encode(os, codepoint);
860 }
861 else
862 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
863 }
864 else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
865 is.Take();
866 os.Put('\0'); // null-terminate the string
867 return;
868 }
869 else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
870 if (c == '\0')
871 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
872 else
873 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
874 }
875 else {
876 size_t offset = is.Tell();
877 if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
878 !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
879 !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
880 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
881 }
882 }
883 }
884
885 template<typename InputStream, typename OutputStream>
ScanCopyUnescapedString(InputStream &,OutputStream &)886 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
887 // Do nothing for generic version
888 }
889
890 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
891 // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)892 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
893 const char* p = is.src_;
894
895 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
896 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
897 while (p != nextAligned)
898 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
899 is.src_ = p;
900 return;
901 }
902 else
903 os.Put(*p++);
904
905 // The rest of string using SIMD
906 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
907 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
908 static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
909 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
910 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
911 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
912
913 for (;; p += 16) {
914 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
915 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
916 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
917 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
918 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
919 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
920 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
921 SizeType length;
922 #ifdef _MSC_VER // Find the index of first escaped
923 unsigned long offset;
924 _BitScanForward(&offset, r);
925 length = offset;
926 #else
927 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
928 #endif
929 char* q = reinterpret_cast<char*>(os.Push(length));
930 for (size_t i = 0; i < length; i++)
931 q[i] = p[i];
932
933 p += length;
934 break;
935 }
936 _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
937 }
938
939 is.src_ = p;
940 }
941
942 // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)943 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
944 RAPIDJSON_ASSERT(&is == &os);
945 (void)os;
946
947 if (is.src_ == is.dst_) {
948 SkipUnescapedString(is);
949 return;
950 }
951
952 char* p = is.src_;
953 char *q = is.dst_;
954
955 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
956 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
957 while (p != nextAligned)
958 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
959 is.src_ = p;
960 is.dst_ = q;
961 return;
962 }
963 else
964 *q++ = *p++;
965
966 // The rest of string using SIMD
967 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
968 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
969 static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
970 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
971 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
972 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
973
974 for (;; p += 16, q += 16) {
975 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
976 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
977 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
978 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
979 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
980 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
981 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
982 size_t length;
983 #ifdef _MSC_VER // Find the index of first escaped
984 unsigned long offset;
985 _BitScanForward(&offset, r);
986 length = offset;
987 #else
988 length = static_cast<size_t>(__builtin_ffs(r) - 1);
989 #endif
990 for (const char* pend = p + length; p != pend; )
991 *q++ = *p++;
992 break;
993 }
994 _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
995 }
996
997 is.src_ = p;
998 is.dst_ = q;
999 }
1000
1001 // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1002 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1003 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1004 char* p = is.src_;
1005
1006 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1007 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1008 for (; p != nextAligned; p++)
1009 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1010 is.src_ = is.dst_ = p;
1011 return;
1012 }
1013
1014 // The rest of string using SIMD
1015 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1016 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1017 static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
1018 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1019 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1020 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1021
1022 for (;; p += 16) {
1023 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1024 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1025 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1026 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1027 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1028 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1029 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1030 size_t length;
1031 #ifdef _MSC_VER // Find the index of first escaped
1032 unsigned long offset;
1033 _BitScanForward(&offset, r);
1034 length = offset;
1035 #else
1036 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1037 #endif
1038 p += length;
1039 break;
1040 }
1041 }
1042
1043 is.src_ = is.dst_ = p;
1044 }
1045 #endif
1046
1047 template<typename InputStream, bool backup>
1048 class NumberStream;
1049
1050 template<typename InputStream>
1051 class NumberStream<InputStream, false> {
1052 public:
1053 typedef typename InputStream::Ch Ch;
1054
NumberStream(GenericReader & reader,InputStream & s)1055 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
~NumberStream()1056 ~NumberStream() {}
1057
Peek()1058 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()1059 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()1060 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Push(char)1061 RAPIDJSON_FORCEINLINE void Push(char) {}
1062
Tell()1063 size_t Tell() { return is.Tell(); }
Length()1064 size_t Length() { return 0; }
Pop()1065 const char* Pop() { return 0; }
1066
1067 protected:
1068 NumberStream& operator=(const NumberStream&);
1069
1070 InputStream& is;
1071 };
1072
1073 template<typename InputStream>
1074 class NumberStream<InputStream, true> : public NumberStream<InputStream, false> {
1075 typedef NumberStream<InputStream, false> Base;
1076 public:
NumberStream(GenericReader & reader,InputStream & is)1077 NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {}
~NumberStream()1078 ~NumberStream() {}
1079
TakePush()1080 RAPIDJSON_FORCEINLINE Ch TakePush() {
1081 stackStream.Put(static_cast<char>(Base::is.Peek()));
1082 return Base::is.Take();
1083 }
1084
Push(char c)1085 RAPIDJSON_FORCEINLINE void Push(char c) {
1086 stackStream.Put(c);
1087 }
1088
Length()1089 size_t Length() { return stackStream.Length(); }
1090
Pop()1091 const char* Pop() {
1092 stackStream.Put('\0');
1093 return stackStream.Pop();
1094 }
1095
1096 private:
1097 StackStream<char> stackStream;
1098 };
1099
1100 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)1101 void ParseNumber(InputStream& is, Handler& handler) {
1102 internal::StreamLocalCopy<InputStream> copy(is);
1103 NumberStream<InputStream,
1104 ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1105 ((parseFlags & kParseInsituFlag) == 0) :
1106 ((parseFlags & kParseFullPrecisionFlag) != 0)> s(*this, copy.s);
1107
1108 size_t startOffset = s.Tell();
1109
1110 // Parse minus
1111 bool minus = Consume(s, '-');
1112
1113 // Parse int: zero / ( digit1-9 *DIGIT )
1114 unsigned i = 0;
1115 uint64_t i64 = 0;
1116 bool use64bit = false;
1117 int significandDigit = 0;
1118 if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1119 i = 0;
1120 s.TakePush();
1121 }
1122 else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1123 i = static_cast<unsigned>(s.TakePush() - '0');
1124
1125 if (minus)
1126 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1127 if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1128 if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1129 i64 = i;
1130 use64bit = true;
1131 break;
1132 }
1133 }
1134 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1135 significandDigit++;
1136 }
1137 else
1138 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1139 if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1140 if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1141 i64 = i;
1142 use64bit = true;
1143 break;
1144 }
1145 }
1146 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1147 significandDigit++;
1148 }
1149 }
1150 else
1151 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1152
1153 // Parse 64bit int
1154 bool useDouble = false;
1155 double d = 0.0;
1156 if (use64bit) {
1157 if (minus)
1158 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1159 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1160 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1161 d = static_cast<double>(i64);
1162 useDouble = true;
1163 break;
1164 }
1165 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1166 significandDigit++;
1167 }
1168 else
1169 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1170 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1171 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1172 d = static_cast<double>(i64);
1173 useDouble = true;
1174 break;
1175 }
1176 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1177 significandDigit++;
1178 }
1179 }
1180
1181 // Force double for big integer
1182 if (useDouble) {
1183 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1184 if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0
1185 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1186 d = d * 10 + (s.TakePush() - '0');
1187 }
1188 }
1189
1190 // Parse frac = decimal-point 1*DIGIT
1191 int expFrac = 0;
1192 size_t decimalPosition;
1193 if (Consume(s, '.')) {
1194 if (((parseFlags & kParseNumbersAsStringsFlag) != 0) && ((parseFlags & kParseInsituFlag) == 0)) {
1195 s.Push('.');
1196 }
1197 decimalPosition = s.Length();
1198
1199 if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1200 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
1201
1202 if (!useDouble) {
1203 #if RAPIDJSON_64BIT
1204 // Use i64 to store significand in 64-bit architecture
1205 if (!use64bit)
1206 i64 = i;
1207
1208 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1209 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1210 break;
1211 else {
1212 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1213 --expFrac;
1214 if (i64 != 0)
1215 significandDigit++;
1216 }
1217 }
1218
1219 d = static_cast<double>(i64);
1220 #else
1221 // Use double to store significand in 32-bit architecture
1222 d = static_cast<double>(use64bit ? i64 : i);
1223 #endif
1224 useDouble = true;
1225 }
1226
1227 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1228 if (significandDigit < 17) {
1229 d = d * 10.0 + (s.TakePush() - '0');
1230 --expFrac;
1231 if (RAPIDJSON_LIKELY(d > 0.0))
1232 significandDigit++;
1233 }
1234 else
1235 s.TakePush();
1236 }
1237 }
1238 else
1239 decimalPosition = s.Length(); // decimal position at the end of integer.
1240
1241 // Parse exp = e [ minus / plus ] 1*DIGIT
1242 int exp = 0;
1243 if (Consume(s, 'e') || Consume(s, 'E')) {
1244 if ( ((parseFlags & kParseNumbersAsStringsFlag) != 0) && ((parseFlags & kParseInsituFlag) == 0) ) {
1245 s.Push( 'e' );
1246 }
1247
1248 if (!useDouble) {
1249 d = static_cast<double>(use64bit ? i64 : i);
1250 useDouble = true;
1251 }
1252
1253 bool expMinus = false;
1254 if (Consume(s, '+'))
1255 ;
1256 else if (Consume(s, '-'))
1257 expMinus = true;
1258
1259 if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1260 exp = static_cast<int>(s.Take() - '0');
1261 if (expMinus) {
1262 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1263 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1264 if (exp >= 214748364) { // Issue #313: prevent overflow exponent
1265 while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1266 s.Take();
1267 }
1268 }
1269 }
1270 else { // positive exp
1271 int maxExp = 308 - expFrac;
1272 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1273 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1274 if (RAPIDJSON_UNLIKELY(exp > maxExp))
1275 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1276 }
1277 }
1278 }
1279 else
1280 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1281
1282 if (expMinus)
1283 exp = -exp;
1284 }
1285
1286 // Finish parsing, call event according to the type of number.
1287 bool cont = true;
1288
1289 if (parseFlags & kParseNumbersAsStringsFlag) {
1290 if (parseFlags & kParseInsituFlag) {
1291 s.Pop(); // Pop stack no matter if it will be used or not.
1292 typename InputStream::Ch* head = is.PutBegin();
1293 const size_t length = s.Tell() - startOffset;
1294 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1295 // unable to insert the \0 character here, it will erase the comma after this number
1296 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1297 cont = handler.RawNumber(str, SizeType(length), false);
1298 }
1299 else {
1300 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
1301 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1302 while (numCharsToCopy--) {
1303 Transcoder<SourceEncoding, TargetEncoding>::Transcode(is, stackStream);
1304 }
1305 stackStream.Put('\0');
1306 const typename TargetEncoding::Ch* str = stackStream.Pop();
1307 const SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
1308 cont = handler.RawNumber(str, SizeType(length), true);
1309 }
1310 }
1311 else {
1312 size_t length = s.Length();
1313 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1314
1315 if (useDouble) {
1316 int p = exp + expFrac;
1317 if (parseFlags & kParseFullPrecisionFlag)
1318 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1319 else
1320 d = internal::StrtodNormalPrecision(d, p);
1321
1322 cont = handler.Double(minus ? -d : d);
1323 }
1324 else {
1325 if (use64bit) {
1326 if (minus)
1327 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1328 else
1329 cont = handler.Uint64(i64);
1330 }
1331 else {
1332 if (minus)
1333 cont = handler.Int(static_cast<int32_t>(~i + 1));
1334 else
1335 cont = handler.Uint(i);
1336 }
1337 }
1338 }
1339 if (RAPIDJSON_UNLIKELY(!cont))
1340 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
1341 }
1342
1343 // Parse any JSON value
1344 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)1345 void ParseValue(InputStream& is, Handler& handler) {
1346 switch (is.Peek()) {
1347 case 'n': ParseNull <parseFlags>(is, handler); break;
1348 case 't': ParseTrue <parseFlags>(is, handler); break;
1349 case 'f': ParseFalse <parseFlags>(is, handler); break;
1350 case '"': ParseString<parseFlags>(is, handler); break;
1351 case '{': ParseObject<parseFlags>(is, handler); break;
1352 case '[': ParseArray <parseFlags>(is, handler); break;
1353 default :
1354 ParseNumber<parseFlags>(is, handler);
1355 break;
1356
1357 }
1358 }
1359
1360 // Iterative Parsing
1361
1362 // States
1363 enum IterativeParsingState {
1364 IterativeParsingStartState = 0,
1365 IterativeParsingFinishState,
1366 IterativeParsingErrorState,
1367
1368 // Object states
1369 IterativeParsingObjectInitialState,
1370 IterativeParsingMemberKeyState,
1371 IterativeParsingKeyValueDelimiterState,
1372 IterativeParsingMemberValueState,
1373 IterativeParsingMemberDelimiterState,
1374 IterativeParsingObjectFinishState,
1375
1376 // Array states
1377 IterativeParsingArrayInitialState,
1378 IterativeParsingElementState,
1379 IterativeParsingElementDelimiterState,
1380 IterativeParsingArrayFinishState,
1381
1382 // Single value state
1383 IterativeParsingValueState
1384 };
1385
1386 enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 };
1387
1388 // Tokens
1389 enum Token {
1390 LeftBracketToken = 0,
1391 RightBracketToken,
1392
1393 LeftCurlyBracketToken,
1394 RightCurlyBracketToken,
1395
1396 CommaToken,
1397 ColonToken,
1398
1399 StringToken,
1400 FalseToken,
1401 TrueToken,
1402 NullToken,
1403 NumberToken,
1404
1405 kTokenCount
1406 };
1407
Tokenize(Ch c)1408 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1409
1410 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1411 #define N NumberToken
1412 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1413 // Maps from ASCII to Token
1414 static const unsigned char tokenMap[256] = {
1415 N16, // 00~0F
1416 N16, // 10~1F
1417 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1418 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1419 N16, // 40~4F
1420 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1421 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1422 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1423 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1424 };
1425 #undef N
1426 #undef N16
1427 //!@endcond
1428
1429 if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1430 return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1431 else
1432 return NumberToken;
1433 }
1434
Predict(IterativeParsingState state,Token token)1435 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1436 // current state x one lookahead token -> new state
1437 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1438 // Start
1439 {
1440 IterativeParsingArrayInitialState, // Left bracket
1441 IterativeParsingErrorState, // Right bracket
1442 IterativeParsingObjectInitialState, // Left curly bracket
1443 IterativeParsingErrorState, // Right curly bracket
1444 IterativeParsingErrorState, // Comma
1445 IterativeParsingErrorState, // Colon
1446 IterativeParsingValueState, // String
1447 IterativeParsingValueState, // False
1448 IterativeParsingValueState, // True
1449 IterativeParsingValueState, // Null
1450 IterativeParsingValueState // Number
1451 },
1452 // Finish(sink state)
1453 {
1454 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1455 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1456 IterativeParsingErrorState
1457 },
1458 // Error(sink state)
1459 {
1460 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1461 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1462 IterativeParsingErrorState
1463 },
1464 // ObjectInitial
1465 {
1466 IterativeParsingErrorState, // Left bracket
1467 IterativeParsingErrorState, // Right bracket
1468 IterativeParsingErrorState, // Left curly bracket
1469 IterativeParsingObjectFinishState, // Right curly bracket
1470 IterativeParsingErrorState, // Comma
1471 IterativeParsingErrorState, // Colon
1472 IterativeParsingMemberKeyState, // String
1473 IterativeParsingErrorState, // False
1474 IterativeParsingErrorState, // True
1475 IterativeParsingErrorState, // Null
1476 IterativeParsingErrorState // Number
1477 },
1478 // MemberKey
1479 {
1480 IterativeParsingErrorState, // Left bracket
1481 IterativeParsingErrorState, // Right bracket
1482 IterativeParsingErrorState, // Left curly bracket
1483 IterativeParsingErrorState, // Right curly bracket
1484 IterativeParsingErrorState, // Comma
1485 IterativeParsingKeyValueDelimiterState, // Colon
1486 IterativeParsingErrorState, // String
1487 IterativeParsingErrorState, // False
1488 IterativeParsingErrorState, // True
1489 IterativeParsingErrorState, // Null
1490 IterativeParsingErrorState // Number
1491 },
1492 // KeyValueDelimiter
1493 {
1494 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1495 IterativeParsingErrorState, // Right bracket
1496 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1497 IterativeParsingErrorState, // Right curly bracket
1498 IterativeParsingErrorState, // Comma
1499 IterativeParsingErrorState, // Colon
1500 IterativeParsingMemberValueState, // String
1501 IterativeParsingMemberValueState, // False
1502 IterativeParsingMemberValueState, // True
1503 IterativeParsingMemberValueState, // Null
1504 IterativeParsingMemberValueState // Number
1505 },
1506 // MemberValue
1507 {
1508 IterativeParsingErrorState, // Left bracket
1509 IterativeParsingErrorState, // Right bracket
1510 IterativeParsingErrorState, // Left curly bracket
1511 IterativeParsingObjectFinishState, // Right curly bracket
1512 IterativeParsingMemberDelimiterState, // Comma
1513 IterativeParsingErrorState, // Colon
1514 IterativeParsingErrorState, // String
1515 IterativeParsingErrorState, // False
1516 IterativeParsingErrorState, // True
1517 IterativeParsingErrorState, // Null
1518 IterativeParsingErrorState // Number
1519 },
1520 // MemberDelimiter
1521 {
1522 IterativeParsingErrorState, // Left bracket
1523 IterativeParsingErrorState, // Right bracket
1524 IterativeParsingErrorState, // Left curly bracket
1525 IterativeParsingErrorState, // Right curly bracket
1526 IterativeParsingErrorState, // Comma
1527 IterativeParsingErrorState, // Colon
1528 IterativeParsingMemberKeyState, // String
1529 IterativeParsingErrorState, // False
1530 IterativeParsingErrorState, // True
1531 IterativeParsingErrorState, // Null
1532 IterativeParsingErrorState // Number
1533 },
1534 // ObjectFinish(sink state)
1535 {
1536 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1537 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1538 IterativeParsingErrorState
1539 },
1540 // ArrayInitial
1541 {
1542 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1543 IterativeParsingArrayFinishState, // Right bracket
1544 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1545 IterativeParsingErrorState, // Right curly bracket
1546 IterativeParsingErrorState, // Comma
1547 IterativeParsingErrorState, // Colon
1548 IterativeParsingElementState, // String
1549 IterativeParsingElementState, // False
1550 IterativeParsingElementState, // True
1551 IterativeParsingElementState, // Null
1552 IterativeParsingElementState // Number
1553 },
1554 // Element
1555 {
1556 IterativeParsingErrorState, // Left bracket
1557 IterativeParsingArrayFinishState, // Right bracket
1558 IterativeParsingErrorState, // Left curly bracket
1559 IterativeParsingErrorState, // Right curly bracket
1560 IterativeParsingElementDelimiterState, // Comma
1561 IterativeParsingErrorState, // Colon
1562 IterativeParsingErrorState, // String
1563 IterativeParsingErrorState, // False
1564 IterativeParsingErrorState, // True
1565 IterativeParsingErrorState, // Null
1566 IterativeParsingErrorState // Number
1567 },
1568 // ElementDelimiter
1569 {
1570 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1571 IterativeParsingErrorState, // Right bracket
1572 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1573 IterativeParsingErrorState, // Right curly bracket
1574 IterativeParsingErrorState, // Comma
1575 IterativeParsingErrorState, // Colon
1576 IterativeParsingElementState, // String
1577 IterativeParsingElementState, // False
1578 IterativeParsingElementState, // True
1579 IterativeParsingElementState, // Null
1580 IterativeParsingElementState // Number
1581 },
1582 // ArrayFinish(sink state)
1583 {
1584 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1585 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1586 IterativeParsingErrorState
1587 },
1588 // Single Value (sink state)
1589 {
1590 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1591 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1592 IterativeParsingErrorState
1593 }
1594 }; // End of G
1595
1596 return static_cast<IterativeParsingState>(G[state][token]);
1597 }
1598
1599 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1600 // May return a new state on state pop.
1601 template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1602 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1603 (void)token;
1604
1605 switch (dst) {
1606 case IterativeParsingErrorState:
1607 return dst;
1608
1609 case IterativeParsingObjectInitialState:
1610 case IterativeParsingArrayInitialState:
1611 {
1612 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1613 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1614 IterativeParsingState n = src;
1615 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1616 n = IterativeParsingElementState;
1617 else if (src == IterativeParsingKeyValueDelimiterState)
1618 n = IterativeParsingMemberValueState;
1619 // Push current state.
1620 *stack_.template Push<SizeType>(1) = n;
1621 // Initialize and push the member/element count.
1622 *stack_.template Push<SizeType>(1) = 0;
1623 // Call handler
1624 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1625 // On handler short circuits the parsing.
1626 if (!hr) {
1627 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1628 return IterativeParsingErrorState;
1629 }
1630 else {
1631 is.Take();
1632 return dst;
1633 }
1634 }
1635
1636 case IterativeParsingMemberKeyState:
1637 ParseString<parseFlags>(is, handler, true);
1638 if (HasParseError())
1639 return IterativeParsingErrorState;
1640 else
1641 return dst;
1642
1643 case IterativeParsingKeyValueDelimiterState:
1644 RAPIDJSON_ASSERT(token == ColonToken);
1645 is.Take();
1646 return dst;
1647
1648 case IterativeParsingMemberValueState:
1649 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1650 ParseValue<parseFlags>(is, handler);
1651 if (HasParseError()) {
1652 return IterativeParsingErrorState;
1653 }
1654 return dst;
1655
1656 case IterativeParsingElementState:
1657 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1658 ParseValue<parseFlags>(is, handler);
1659 if (HasParseError()) {
1660 return IterativeParsingErrorState;
1661 }
1662 return dst;
1663
1664 case IterativeParsingMemberDelimiterState:
1665 case IterativeParsingElementDelimiterState:
1666 is.Take();
1667 // Update member/element count.
1668 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1669 return dst;
1670
1671 case IterativeParsingObjectFinishState:
1672 {
1673 // Get member count.
1674 SizeType c = *stack_.template Pop<SizeType>(1);
1675 // If the object is not empty, count the last member.
1676 if (src == IterativeParsingMemberValueState)
1677 ++c;
1678 // Restore the state.
1679 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1680 // Transit to Finish state if this is the topmost scope.
1681 if (n == IterativeParsingStartState)
1682 n = IterativeParsingFinishState;
1683 // Call handler
1684 bool hr = handler.EndObject(c);
1685 // On handler short circuits the parsing.
1686 if (!hr) {
1687 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1688 return IterativeParsingErrorState;
1689 }
1690 else {
1691 is.Take();
1692 return n;
1693 }
1694 }
1695
1696 case IterativeParsingArrayFinishState:
1697 {
1698 // Get element count.
1699 SizeType c = *stack_.template Pop<SizeType>(1);
1700 // If the array is not empty, count the last element.
1701 if (src == IterativeParsingElementState)
1702 ++c;
1703 // Restore the state.
1704 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1705 // Transit to Finish state if this is the topmost scope.
1706 if (n == IterativeParsingStartState)
1707 n = IterativeParsingFinishState;
1708 // Call handler
1709 bool hr = handler.EndArray(c);
1710 // On handler short circuits the parsing.
1711 if (!hr) {
1712 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1713 return IterativeParsingErrorState;
1714 }
1715 else {
1716 is.Take();
1717 return n;
1718 }
1719 }
1720
1721 default:
1722 // This branch is for IterativeParsingValueState actually.
1723 // Use `default:` rather than
1724 // `case IterativeParsingValueState:` is for code coverage.
1725
1726 // The IterativeParsingStartState is not enumerated in this switch-case.
1727 // It is impossible for that case. And it can be caught by following assertion.
1728
1729 // The IterativeParsingFinishState is not enumerated in this switch-case either.
1730 // It is a "derivative" state which cannot triggered from Predict() directly.
1731 // Therefore it cannot happen here. And it can be caught by following assertion.
1732 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1733
1734 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1735 ParseValue<parseFlags>(is, handler);
1736 if (HasParseError()) {
1737 return IterativeParsingErrorState;
1738 }
1739 return IterativeParsingFinishState;
1740 }
1741 }
1742
1743 template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)1744 void HandleError(IterativeParsingState src, InputStream& is) {
1745 if (HasParseError()) {
1746 // Error flag has been set.
1747 return;
1748 }
1749
1750 switch (src) {
1751 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1752 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1753 case IterativeParsingObjectInitialState:
1754 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1755 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1756 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1757 case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1758 default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); return;
1759 }
1760 }
1761
1762 template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)1763 ParseResult IterativeParse(InputStream& is, Handler& handler) {
1764 parseResult_.Clear();
1765 ClearStackOnExit scope(*this);
1766 IterativeParsingState state = IterativeParsingStartState;
1767
1768 SkipWhitespaceAndComments<parseFlags>(is);
1769 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1770 while (is.Peek() != '\0') {
1771 Token t = Tokenize(is.Peek());
1772 IterativeParsingState n = Predict(state, t);
1773 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1774
1775 if (d == IterativeParsingErrorState) {
1776 HandleError(state, is);
1777 break;
1778 }
1779
1780 state = d;
1781
1782 // Do not further consume streams if a root JSON has been parsed.
1783 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1784 break;
1785
1786 SkipWhitespaceAndComments<parseFlags>(is);
1787 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1788 }
1789
1790 // Handle the end of file.
1791 if (state != IterativeParsingFinishState)
1792 HandleError(state, is);
1793
1794 return parseResult_;
1795 }
1796
1797 static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
1798 internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
1799 ParseResult parseResult_;
1800 }; // class GenericReader
1801
1802 //! Reader with UTF8 encoding and default allocator.
1803 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1804
1805 RAPIDJSON_NAMESPACE_END
1806
1807 #ifdef __clang__
1808 RAPIDJSON_DIAG_POP
1809 #endif
1810
1811
1812 #ifdef __GNUC__
1813 RAPIDJSON_DIAG_POP
1814 #endif
1815
1816 #ifdef _MSC_VER
1817 RAPIDJSON_DIAG_POP
1818 #endif
1819
1820 #endif // RAPIDJSON_READER_H_
1821