1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17
18 /*! \file reader.h */
19
20 #include "rapidjson.h"
21 #include "encodings.h"
22 #include "internal/meta.h"
23 #include "internal/stack.h"
24 #include "internal/strtod.h"
25
26 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
27 #include <intrin.h>
28 #pragma intrinsic(_BitScanForward)
29 #endif
30 #ifdef RAPIDJSON_SSE42
31 #include <nmmintrin.h>
32 #elif defined(RAPIDJSON_SSE2)
33 #include <emmintrin.h>
34 #endif
35
36 #ifdef _MSC_VER
37 RAPIDJSON_DIAG_PUSH
38 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
39 RAPIDJSON_DIAG_OFF(4702) // unreachable code
40 #endif
41
42 #ifdef __GNUC__
43 RAPIDJSON_DIAG_PUSH
44 RAPIDJSON_DIAG_OFF(effc++)
45 #endif
46
47 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
48 #define RAPIDJSON_NOTHING /* deliberately empty */
49 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
50 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
51 RAPIDJSON_MULTILINEMACRO_BEGIN \
52 if (HasParseError()) { return value; } \
53 RAPIDJSON_MULTILINEMACRO_END
54 #endif
55 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
56 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
57 //!@endcond
58
59 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
60 \ingroup RAPIDJSON_ERRORS
61 \brief Macro to indicate a parse error.
62 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
63 \param offset position of the error in JSON input (\c size_t)
64
65 This macros can be used as a customization point for the internal
66 error handling mechanism of RapidJSON.
67
68 A common usage model is to throw an exception instead of requiring the
69 caller to explicitly check the \ref rapidjson::GenericReader::Parse's
70 return value:
71
72 \code
73 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
74 throw ParseException(parseErrorCode, #parseErrorCode, offset)
75
76 #include <stdexcept> // std::runtime_error
77 #include "rapidjson/error/error.h" // rapidjson::ParseResult
78
79 struct ParseException : std::runtime_error, rapidjson::ParseResult {
80 ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
81 : std::runtime_error(msg), ParseResult(code, offset) {}
82 };
83
84 #include "rapidjson/reader.h"
85 \endcode
86
87 \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
88 */
89 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
90 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
91 RAPIDJSON_MULTILINEMACRO_BEGIN \
92 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
93 SetParseError(parseErrorCode, offset); \
94 RAPIDJSON_MULTILINEMACRO_END
95 #endif
96
97 /*! \def RAPIDJSON_PARSE_ERROR
98 \ingroup RAPIDJSON_ERRORS
99 \brief (Internal) macro to indicate and handle a parse error.
100 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
101 \param offset position of the error in JSON input (\c size_t)
102
103 Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
104
105 \see RAPIDJSON_PARSE_ERROR_NORETURN
106 \hideinitializer
107 */
108 #ifndef RAPIDJSON_PARSE_ERROR
109 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
110 RAPIDJSON_MULTILINEMACRO_BEGIN \
111 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
112 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
113 RAPIDJSON_MULTILINEMACRO_END
114 #endif
115
116 #include "error/error.h" // ParseErrorCode, ParseResult
117
118 RAPIDJSON_NAMESPACE_BEGIN
119
120 ///////////////////////////////////////////////////////////////////////////////
121 // ParseFlag
122
123 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
124 \ingroup RAPIDJSON_CONFIG
125 \brief User-defined kParseDefaultFlags definition.
126
127 User can define this as any \c ParseFlag combinations.
128 */
129 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
130 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
131 #endif
132
133 //! Combination of parseFlags
134 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
135 */
136 enum ParseFlag {
137 kParseNoFlags = 0, //!< No flags are set.
138 kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
139 kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
140 kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
141 kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
142 kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
143 kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
144 kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
145 };
146
147 ///////////////////////////////////////////////////////////////////////////////
148 // Handler
149
150 /*! \class rapidjson::Handler
151 \brief Concept for receiving events from GenericReader upon parsing.
152 The functions return true if no error occurs. If they return false,
153 the event publisher should terminate the process.
154 \code
155 concept Handler {
156 typename Ch;
157
158 bool Null();
159 bool Bool(bool b);
160 bool Int(int i);
161 bool Uint(unsigned i);
162 bool Int64(int64_t i);
163 bool Uint64(uint64_t i);
164 bool Double(double d);
165 bool String(const Ch* str, SizeType length, bool copy);
166 bool StartObject();
167 bool Key(const Ch* str, SizeType length, bool copy);
168 bool EndObject(SizeType memberCount);
169 bool StartArray();
170 bool EndArray(SizeType elementCount);
171 };
172 \endcode
173 */
174 ///////////////////////////////////////////////////////////////////////////////
175 // BaseReaderHandler
176
177 //! Default implementation of Handler.
178 /*! This can be used as base class of any reader handler.
179 \note implements Handler concept
180 */
181 template<typename Encoding = UTF8<>, typename Derived = void>
182 struct BaseReaderHandler {
183 typedef typename Encoding::Ch Ch;
184
185 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
186
DefaultBaseReaderHandler187 bool Default() { return true; }
NullBaseReaderHandler188 bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler189 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler190 bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler191 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler192 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler193 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler194 bool Double(double) { return static_cast<Override&>(*this).Default(); }
StringBaseReaderHandler195 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler196 bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler197 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler198 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler199 bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler200 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
201 };
202
203 ///////////////////////////////////////////////////////////////////////////////
204 // StreamLocalCopy
205
206 namespace internal {
207
208 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
209 class StreamLocalCopy;
210
211 //! Do copy optimization.
212 template<typename Stream>
213 class StreamLocalCopy<Stream, 1> {
214 public:
StreamLocalCopy(Stream & original)215 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()216 ~StreamLocalCopy() { original_ = s; }
217
218 Stream s;
219
220 private:
221 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
222
223 Stream& original_;
224 };
225
226 //! Keep reference.
227 template<typename Stream>
228 class StreamLocalCopy<Stream, 0> {
229 public:
StreamLocalCopy(Stream & original)230 StreamLocalCopy(Stream& original) : s(original) {}
231
232 Stream& s;
233
234 private:
235 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
236 };
237
238 } // namespace internal
239
240 ///////////////////////////////////////////////////////////////////////////////
241 // SkipWhitespace
242
243 //! Skip the JSON white spaces in a stream.
244 /*! \param is A input stream for skipping white spaces.
245 \note This function has SSE2/SSE4.2 specialization.
246 */
247 template<typename InputStream>
SkipWhitespace(InputStream & is)248 void SkipWhitespace(InputStream& is) {
249 internal::StreamLocalCopy<InputStream> copy(is);
250 InputStream& s(copy.s);
251
252 while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
253 s.Take();
254 }
255
256 #ifdef RAPIDJSON_SSE42
257 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)258 inline const char *SkipWhitespace_SIMD(const char* p) {
259 // Fast return for single non-whitespace
260 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
261 ++p;
262 else
263 return p;
264
265 // 16-byte align to the next boundary
266 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
267 while (p != nextAligned)
268 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
269 ++p;
270 else
271 return p;
272
273 // The rest of string using SIMD
274 static const char whitespace[16] = " \n\r\t";
275 const __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]);
276
277 for (;; p += 16) {
278 const __m128i s = _mm_load_si128((const __m128i *)p);
279 const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
280 if (r != 0) { // some of characters is non-whitespace
281 #ifdef _MSC_VER // Find the index of first non-whitespace
282 unsigned long offset;
283 _BitScanForward(&offset, r);
284 return p + offset;
285 #else
286 return p + __builtin_ffs(r) - 1;
287 #endif
288 }
289 }
290 }
291
292 #elif defined(RAPIDJSON_SSE2)
293
294 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)295 inline const char *SkipWhitespace_SIMD(const char* p) {
296 // Fast return for single non-whitespace
297 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
298 ++p;
299 else
300 return p;
301
302 // 16-byte align to the next boundary
303 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
304 while (p != nextAligned)
305 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
306 ++p;
307 else
308 return p;
309
310 // The rest of string
311 static const char whitespaces[4][17] = {
312 " ",
313 "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
314 "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
315 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
316
317 const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
318 const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
319 const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
320 const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
321
322 for (;; p += 16) {
323 const __m128i s = _mm_load_si128((const __m128i *)p);
324 __m128i x = _mm_cmpeq_epi8(s, w0);
325 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
326 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
327 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
328 unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
329 if (r != 0) { // some of characters may be non-whitespace
330 #ifdef _MSC_VER // Find the index of first non-whitespace
331 unsigned long offset;
332 _BitScanForward(&offset, r);
333 return p + offset;
334 #else
335 return p + __builtin_ffs(r) - 1;
336 #endif
337 }
338 }
339 }
340
341 #endif // RAPIDJSON_SSE2
342
343 #ifdef RAPIDJSON_SIMD
344 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)345 template<> inline void SkipWhitespace(InsituStringStream& is) {
346 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
347 }
348
349 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)350 template<> inline void SkipWhitespace(StringStream& is) {
351 is.src_ = SkipWhitespace_SIMD(is.src_);
352 }
353 #endif // RAPIDJSON_SIMD
354
355 ///////////////////////////////////////////////////////////////////////////////
356 // GenericReader
357
358 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
359 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
360 object implementing Handler concept.
361
362 It needs to allocate a stack for storing a single decoded string during
363 non-destructive parsing.
364
365 For in-situ parsing, the decoded string is directly written to the source
366 text string, no temporary buffer is required.
367
368 A GenericReader object can be reused for parsing multiple JSON text.
369
370 \tparam SourceEncoding Encoding of the input stream.
371 \tparam TargetEncoding Encoding of the parse output.
372 \tparam StackAllocator Allocator type for stack.
373 */
374 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
375 class GenericReader {
376 public:
377 typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
378
379 //! Constructor.
380 /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
381 \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
382 */
stack_(stackAllocator,stackCapacity)383 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
384
385 //! Parse JSON text.
386 /*! \tparam parseFlags Combination of \ref ParseFlag.
387 \tparam InputStream Type of input stream, implementing Stream concept.
388 \tparam Handler Type of handler, implementing Handler concept.
389 \param is Input stream to be parsed.
390 \param handler The handler to receive events.
391 \return Whether the parsing is successful.
392 */
393 template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)394 ParseResult Parse(InputStream& is, Handler& handler) {
395 if (parseFlags & kParseIterativeFlag)
396 return IterativeParse<parseFlags>(is, handler);
397
398 parseResult_.Clear();
399
400 ClearStackOnExit scope(*this);
401
402 SkipWhitespaceAndComments<parseFlags>(is);
403 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
404
405 if (is.Peek() == '\0') {
406 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
407 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
408 }
409 else {
410 ParseValue<parseFlags>(is, handler);
411 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
412
413 if (!(parseFlags & kParseStopWhenDoneFlag)) {
414 SkipWhitespaceAndComments<parseFlags>(is);
415 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
416
417 if (is.Peek() != '\0') {
418 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
419 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
420 }
421 }
422 }
423
424 return parseResult_;
425 }
426
427 //! Parse JSON text (with \ref kParseDefaultFlags)
428 /*! \tparam InputStream Type of input stream, implementing Stream concept
429 \tparam Handler Type of handler, implementing Handler concept.
430 \param is Input stream to be parsed.
431 \param handler The handler to receive events.
432 \return Whether the parsing is successful.
433 */
434 template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)435 ParseResult Parse(InputStream& is, Handler& handler) {
436 return Parse<kParseDefaultFlags>(is, handler);
437 }
438
439 //! Whether a parse error has occured in the last parsing.
HasParseError()440 bool HasParseError() const { return parseResult_.IsError(); }
441
442 //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()443 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
444
445 //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()446 size_t GetErrorOffset() const { return parseResult_.Offset(); }
447
448 protected:
SetParseError(ParseErrorCode code,size_t offset)449 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
450
451 private:
452 // Prohibit copy constructor & assignment operator.
453 GenericReader(const GenericReader&);
454 GenericReader& operator=(const GenericReader&);
455
ClearStack()456 void ClearStack() { stack_.Clear(); }
457
458 // clear stack on any exit from ParseStream, e.g. due to exception
459 struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit460 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit461 ~ClearStackOnExit() { r_.ClearStack(); }
462 private:
463 GenericReader& r_;
464 ClearStackOnExit(const ClearStackOnExit&);
465 ClearStackOnExit& operator=(const ClearStackOnExit&);
466 };
467
468 template<unsigned parseFlags, typename InputStream>
SkipWhitespaceAndComments(InputStream & is)469 void SkipWhitespaceAndComments(InputStream& is) {
470 SkipWhitespace(is);
471
472 if (parseFlags & kParseCommentsFlag) {
473 while (is.Peek() == '/') {
474 is.Take();
475
476 if (is.Peek() == '*') {
477 is.Take();
478 while (true) {
479 if (is.Peek() == '\0')
480 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
481
482 if (is.Take() == '*') {
483 if (is.Peek() == '\0')
484 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
485
486 if (is.Take() == '/')
487 break;
488 }
489 }
490 } else if (is.Peek() == '/') {
491 is.Take();
492 while (is.Peek() != '\0' && is.Take() != '\n') { }
493 } else {
494 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
495 }
496
497 SkipWhitespace(is);
498 }
499 }
500 }
501
502 // Parse object: { string : value, ... }
503 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)504 void ParseObject(InputStream& is, Handler& handler) {
505 RAPIDJSON_ASSERT(is.Peek() == '{');
506 is.Take(); // Skip '{'
507
508 if (!handler.StartObject())
509 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
510
511 SkipWhitespaceAndComments<parseFlags>(is);
512 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
513
514 if (is.Peek() == '}') {
515 is.Take();
516 if (!handler.EndObject(0)) // empty object
517 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
518 return;
519 }
520
521 for (SizeType memberCount = 0;;) {
522 if (is.Peek() != '"')
523 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
524
525 ParseString<parseFlags>(is, handler, true);
526 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
527
528 SkipWhitespaceAndComments<parseFlags>(is);
529 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
530
531 if (is.Take() != ':')
532 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
533
534 SkipWhitespaceAndComments<parseFlags>(is);
535 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
536
537 ParseValue<parseFlags>(is, handler);
538 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
539
540 SkipWhitespaceAndComments<parseFlags>(is);
541 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
542
543 ++memberCount;
544
545 switch (is.Take()) {
546 case ',':
547 SkipWhitespaceAndComments<parseFlags>(is);
548 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
549 break;
550 case '}':
551 if (!handler.EndObject(memberCount))
552 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
553 return;
554 default:
555 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
556 break;
557 }
558 }
559 }
560
561 // Parse array: [ value, ... ]
562 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)563 void ParseArray(InputStream& is, Handler& handler) {
564 RAPIDJSON_ASSERT(is.Peek() == '[');
565 is.Take(); // Skip '['
566
567 if (!handler.StartArray())
568 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
569
570 SkipWhitespaceAndComments<parseFlags>(is);
571 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
572
573 if (is.Peek() == ']') {
574 is.Take();
575 if (!handler.EndArray(0)) // empty array
576 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
577 return;
578 }
579
580 for (SizeType elementCount = 0;;) {
581 ParseValue<parseFlags>(is, handler);
582 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
583
584 ++elementCount;
585 SkipWhitespaceAndComments<parseFlags>(is);
586 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
587
588 switch (is.Take()) {
589 case ',':
590 SkipWhitespaceAndComments<parseFlags>(is);
591 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
592 break;
593 case ']':
594 if (!handler.EndArray(elementCount))
595 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
596 return;
597 default:
598 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
599 break;
600 }
601 }
602 }
603
604 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)605 void ParseNull(InputStream& is, Handler& handler) {
606 RAPIDJSON_ASSERT(is.Peek() == 'n');
607 is.Take();
608
609 if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') {
610 if (!handler.Null())
611 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
612 }
613 else
614 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
615 }
616
617 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)618 void ParseTrue(InputStream& is, Handler& handler) {
619 RAPIDJSON_ASSERT(is.Peek() == 't');
620 is.Take();
621
622 if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') {
623 if (!handler.Bool(true))
624 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
625 }
626 else
627 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
628 }
629
630 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)631 void ParseFalse(InputStream& is, Handler& handler) {
632 RAPIDJSON_ASSERT(is.Peek() == 'f');
633 is.Take();
634
635 if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') {
636 if (!handler.Bool(false))
637 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
638 }
639 else
640 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
641 }
642
643 // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
644 template<typename InputStream>
ParseHex4(InputStream & is)645 unsigned ParseHex4(InputStream& is) {
646 unsigned codepoint = 0;
647 for (int i = 0; i < 4; i++) {
648 Ch c = is.Take();
649 codepoint <<= 4;
650 codepoint += static_cast<unsigned>(c);
651 if (c >= '0' && c <= '9')
652 codepoint -= '0';
653 else if (c >= 'A' && c <= 'F')
654 codepoint -= 'A' - 10;
655 else if (c >= 'a' && c <= 'f')
656 codepoint -= 'a' - 10;
657 else {
658 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1);
659 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
660 }
661 }
662 return codepoint;
663 }
664
665 template <typename CharType>
666 class StackStream {
667 public:
668 typedef CharType Ch;
669
StackStream(internal::Stack<StackAllocator> & stack)670 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)671 RAPIDJSON_FORCEINLINE void Put(Ch c) {
672 *stack_.template Push<Ch>() = c;
673 ++length_;
674 }
Length()675 size_t Length() const { return length_; }
Pop()676 Ch* Pop() {
677 return stack_.template Pop<Ch>(length_);
678 }
679
680 private:
681 StackStream(const StackStream&);
682 StackStream& operator=(const StackStream&);
683
684 internal::Stack<StackAllocator>& stack_;
685 SizeType length_;
686 };
687
688 // Parse string and generate String event. Different code paths for kParseInsituFlag.
689 template<unsigned parseFlags, typename InputStream, typename Handler>
690 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
691 internal::StreamLocalCopy<InputStream> copy(is);
692 InputStream& s(copy.s);
693
694 bool success = false;
695 if (parseFlags & kParseInsituFlag) {
696 typename InputStream::Ch *head = s.PutBegin();
697 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
698 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
699 size_t length = s.PutEnd(head) - 1;
700 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
701 const typename TargetEncoding::Ch* const str = (typename TargetEncoding::Ch*)head;
702 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
703 }
704 else {
705 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
706 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
707 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
708 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
709 const typename TargetEncoding::Ch* const str = stackStream.Pop();
710 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
711 }
712 if (!success)
713 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
714 }
715
716 // Parse string to an output is
717 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
718 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)719 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
720 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
721 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
722 static const char escape[256] = {
723 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
724 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
725 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
726 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
727 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
728 };
729 #undef Z16
730 //!@endcond
731
732 RAPIDJSON_ASSERT(is.Peek() == '\"');
733 is.Take(); // Skip '\"'
734
735 for (;;) {
736 Ch c = is.Peek();
737 if (c == '\\') { // Escape
738 is.Take();
739 Ch e = is.Take();
740 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
741 os.Put(escape[(unsigned char)e]);
742 }
743 else if (e == 'u') { // Unicode
744 unsigned codepoint = ParseHex4(is);
745 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
746 if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
747 // Handle UTF-16 surrogate pair
748 if (is.Take() != '\\' || is.Take() != 'u')
749 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
750 unsigned codepoint2 = ParseHex4(is);
751 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
752 if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)
753 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
754 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
755 }
756 TEncoding::Encode(os, codepoint);
757 }
758 else
759 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
760 }
761 else if (c == '"') { // Closing double quote
762 is.Take();
763 os.Put('\0'); // null-terminate the string
764 return;
765 }
766 else if (c == '\0')
767 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1);
768 else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
769 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
770 else {
771 if (parseFlags & kParseValidateEncodingFlag ?
772 !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
773 !Transcoder<SEncoding, TEncoding>::Transcode(is, os))
774 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
775 }
776 }
777 }
778
779 template<typename InputStream, bool backup>
780 class NumberStream;
781
782 template<typename InputStream>
783 class NumberStream<InputStream, false> {
784 public:
NumberStream(GenericReader & reader,InputStream & s)785 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
~NumberStream()786 ~NumberStream() {}
787
Peek()788 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()789 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()790 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Tell()791 size_t Tell() { return is.Tell(); }
Length()792 size_t Length() { return 0; }
Pop()793 const char* Pop() { return 0; }
794
795 protected:
796 NumberStream& operator=(const NumberStream&);
797
798 InputStream& is;
799 };
800
801 template<typename InputStream>
802 class NumberStream<InputStream, true> : public NumberStream<InputStream, false> {
803 typedef NumberStream<InputStream, false> Base;
804 public:
NumberStream(GenericReader & reader,InputStream & is)805 NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {}
~NumberStream()806 ~NumberStream() {}
807
TakePush()808 RAPIDJSON_FORCEINLINE Ch TakePush() {
809 stackStream.Put((char)Base::is.Peek());
810 return Base::is.Take();
811 }
812
Length()813 size_t Length() { return stackStream.Length(); }
814
Pop()815 const char* Pop() {
816 stackStream.Put('\0');
817 return stackStream.Pop();
818 }
819
820 private:
821 StackStream<char> stackStream;
822 };
823
824 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)825 void ParseNumber(InputStream& is, Handler& handler) {
826 internal::StreamLocalCopy<InputStream> copy(is);
827 NumberStream<InputStream, (parseFlags & kParseFullPrecisionFlag) != 0> s(*this, copy.s);
828
829 // Parse minus
830 bool minus = false;
831 if (s.Peek() == '-') {
832 minus = true;
833 s.Take();
834 }
835
836 // Parse int: zero / ( digit1-9 *DIGIT )
837 unsigned i = 0;
838 uint64_t i64 = 0;
839 bool use64bit = false;
840 int significandDigit = 0;
841 if (s.Peek() == '0') {
842 i = 0;
843 s.TakePush();
844 }
845 else if (s.Peek() >= '1' && s.Peek() <= '9') {
846 i = static_cast<unsigned>(s.TakePush() - '0');
847
848 if (minus)
849 while (s.Peek() >= '0' && s.Peek() <= '9') {
850 if (i >= 214748364) { // 2^31 = 2147483648
851 if (i != 214748364 || s.Peek() > '8') {
852 i64 = i;
853 use64bit = true;
854 break;
855 }
856 }
857 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
858 significandDigit++;
859 }
860 else
861 while (s.Peek() >= '0' && s.Peek() <= '9') {
862 if (i >= 429496729) { // 2^32 - 1 = 4294967295
863 if (i != 429496729 || s.Peek() > '5') {
864 i64 = i;
865 use64bit = true;
866 break;
867 }
868 }
869 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
870 significandDigit++;
871 }
872 }
873 else
874 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
875
876 // Parse 64bit int
877 bool useDouble = false;
878 double d = 0.0;
879 if (use64bit) {
880 if (minus)
881 while (s.Peek() >= '0' && s.Peek() <= '9') {
882 if (i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC)) // 2^63 = 9223372036854775808
883 if (i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8') {
884 d = i64;
885 useDouble = true;
886 break;
887 }
888 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
889 significandDigit++;
890 }
891 else
892 while (s.Peek() >= '0' && s.Peek() <= '9') {
893 if (i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999)) // 2^64 - 1 = 18446744073709551615
894 if (i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5') {
895 d = i64;
896 useDouble = true;
897 break;
898 }
899 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
900 significandDigit++;
901 }
902 }
903
904 // Force double for big integer
905 if (useDouble) {
906 while (s.Peek() >= '0' && s.Peek() <= '9') {
907 if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0
908 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
909 d = d * 10 + (s.TakePush() - '0');
910 }
911 }
912
913 // Parse frac = decimal-point 1*DIGIT
914 int expFrac = 0;
915 size_t decimalPosition;
916 if (s.Peek() == '.') {
917 s.Take();
918 decimalPosition = s.Length();
919
920 if (!(s.Peek() >= '0' && s.Peek() <= '9'))
921 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
922
923 if (!useDouble) {
924 #if RAPIDJSON_64BIT
925 // Use i64 to store significand in 64-bit architecture
926 if (!use64bit)
927 i64 = i;
928
929 while (s.Peek() >= '0' && s.Peek() <= '9') {
930 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
931 break;
932 else {
933 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
934 --expFrac;
935 if (i64 != 0)
936 significandDigit++;
937 }
938 }
939
940 d = (double)i64;
941 #else
942 // Use double to store significand in 32-bit architecture
943 d = use64bit ? (double)i64 : (double)i;
944 #endif
945 useDouble = true;
946 }
947
948 while (s.Peek() >= '0' && s.Peek() <= '9') {
949 if (significandDigit < 17) {
950 d = d * 10.0 + (s.TakePush() - '0');
951 --expFrac;
952 if (d > 0.0)
953 significandDigit++;
954 }
955 else
956 s.TakePush();
957 }
958 }
959 else
960 decimalPosition = s.Length(); // decimal position at the end of integer.
961
962 // Parse exp = e [ minus / plus ] 1*DIGIT
963 int exp = 0;
964 if (s.Peek() == 'e' || s.Peek() == 'E') {
965 if (!useDouble) {
966 d = use64bit ? i64 : i;
967 useDouble = true;
968 }
969 s.Take();
970
971 bool expMinus = false;
972 if (s.Peek() == '+')
973 s.Take();
974 else if (s.Peek() == '-') {
975 s.Take();
976 expMinus = true;
977 }
978
979 if (s.Peek() >= '0' && s.Peek() <= '9') {
980 exp = s.Take() - '0';
981 if (expMinus) {
982 while (s.Peek() >= '0' && s.Peek() <= '9') {
983 exp = exp * 10 + (s.Take() - '0');
984 if (exp >= 214748364) { // Issue #313: prevent overflow exponent
985 while (s.Peek() >= '0' && s.Peek() <= '9') // Consume the rest of exponent
986 s.Take();
987 }
988 }
989 }
990 else { // positive exp
991 int maxExp = 308 - expFrac;
992 while (s.Peek() >= '0' && s.Peek() <= '9') {
993 exp = exp * 10 + (s.Take() - '0');
994 if (exp > maxExp)
995 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
996 }
997 }
998 }
999 else
1000 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1001
1002 if (expMinus)
1003 exp = -exp;
1004 }
1005
1006 // Finish parsing, call event according to the type of number.
1007 bool cont = true;
1008 size_t length = s.Length();
1009 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1010
1011 if (useDouble) {
1012 int p = exp + expFrac;
1013 if (parseFlags & kParseFullPrecisionFlag)
1014 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1015 else
1016 d = internal::StrtodNormalPrecision(d, p);
1017
1018 cont = handler.Double(minus ? -d : d);
1019 }
1020 else {
1021 if (use64bit) {
1022 if (minus)
1023 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1024 else
1025 cont = handler.Uint64(i64);
1026 }
1027 else {
1028 if (minus)
1029 cont = handler.Int(static_cast<int32_t>(~i + 1));
1030 else
1031 cont = handler.Uint(i);
1032 }
1033 }
1034 if (!cont)
1035 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
1036 }
1037
1038 // Parse any JSON value
1039 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)1040 void ParseValue(InputStream& is, Handler& handler) {
1041 switch (is.Peek()) {
1042 case 'n': ParseNull <parseFlags>(is, handler); break;
1043 case 't': ParseTrue <parseFlags>(is, handler); break;
1044 case 'f': ParseFalse <parseFlags>(is, handler); break;
1045 case '"': ParseString<parseFlags>(is, handler); break;
1046 case '{': ParseObject<parseFlags>(is, handler); break;
1047 case '[': ParseArray <parseFlags>(is, handler); break;
1048 default :
1049 ParseNumber<parseFlags>(is, handler);
1050 break;
1051
1052 }
1053 }
1054
1055 // Iterative Parsing
1056
1057 // States
1058 enum IterativeParsingState {
1059 IterativeParsingStartState = 0,
1060 IterativeParsingFinishState,
1061 IterativeParsingErrorState,
1062
1063 // Object states
1064 IterativeParsingObjectInitialState,
1065 IterativeParsingMemberKeyState,
1066 IterativeParsingKeyValueDelimiterState,
1067 IterativeParsingMemberValueState,
1068 IterativeParsingMemberDelimiterState,
1069 IterativeParsingObjectFinishState,
1070
1071 // Array states
1072 IterativeParsingArrayInitialState,
1073 IterativeParsingElementState,
1074 IterativeParsingElementDelimiterState,
1075 IterativeParsingArrayFinishState,
1076
1077 // Single value state
1078 IterativeParsingValueState,
1079
1080 cIterativeParsingStateCount
1081 };
1082
1083 // Tokens
1084 enum Token {
1085 LeftBracketToken = 0,
1086 RightBracketToken,
1087
1088 LeftCurlyBracketToken,
1089 RightCurlyBracketToken,
1090
1091 CommaToken,
1092 ColonToken,
1093
1094 StringToken,
1095 FalseToken,
1096 TrueToken,
1097 NullToken,
1098 NumberToken,
1099
1100 kTokenCount
1101 };
1102
Tokenize(Ch c)1103 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1104
1105 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1106 #define N NumberToken
1107 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1108 // Maps from ASCII to Token
1109 static const unsigned char tokenMap[256] = {
1110 N16, // 00~0F
1111 N16, // 10~1F
1112 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1113 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1114 N16, // 40~4F
1115 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1116 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1117 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1118 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1119 };
1120 #undef N
1121 #undef N16
1122 //!@endcond
1123
1124 if (sizeof(Ch) == 1 || unsigned(c) < 256)
1125 return (Token)tokenMap[(unsigned char)c];
1126 else
1127 return NumberToken;
1128 }
1129
Predict(IterativeParsingState state,Token token)1130 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1131 // current state x one lookahead token -> new state
1132 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1133 // Start
1134 {
1135 IterativeParsingArrayInitialState, // Left bracket
1136 IterativeParsingErrorState, // Right bracket
1137 IterativeParsingObjectInitialState, // Left curly bracket
1138 IterativeParsingErrorState, // Right curly bracket
1139 IterativeParsingErrorState, // Comma
1140 IterativeParsingErrorState, // Colon
1141 IterativeParsingValueState, // String
1142 IterativeParsingValueState, // False
1143 IterativeParsingValueState, // True
1144 IterativeParsingValueState, // Null
1145 IterativeParsingValueState // Number
1146 },
1147 // Finish(sink state)
1148 {
1149 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1150 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1151 IterativeParsingErrorState
1152 },
1153 // Error(sink state)
1154 {
1155 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1156 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1157 IterativeParsingErrorState
1158 },
1159 // ObjectInitial
1160 {
1161 IterativeParsingErrorState, // Left bracket
1162 IterativeParsingErrorState, // Right bracket
1163 IterativeParsingErrorState, // Left curly bracket
1164 IterativeParsingObjectFinishState, // Right curly bracket
1165 IterativeParsingErrorState, // Comma
1166 IterativeParsingErrorState, // Colon
1167 IterativeParsingMemberKeyState, // String
1168 IterativeParsingErrorState, // False
1169 IterativeParsingErrorState, // True
1170 IterativeParsingErrorState, // Null
1171 IterativeParsingErrorState // Number
1172 },
1173 // MemberKey
1174 {
1175 IterativeParsingErrorState, // Left bracket
1176 IterativeParsingErrorState, // Right bracket
1177 IterativeParsingErrorState, // Left curly bracket
1178 IterativeParsingErrorState, // Right curly bracket
1179 IterativeParsingErrorState, // Comma
1180 IterativeParsingKeyValueDelimiterState, // Colon
1181 IterativeParsingErrorState, // String
1182 IterativeParsingErrorState, // False
1183 IterativeParsingErrorState, // True
1184 IterativeParsingErrorState, // Null
1185 IterativeParsingErrorState // Number
1186 },
1187 // KeyValueDelimiter
1188 {
1189 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1190 IterativeParsingErrorState, // Right bracket
1191 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1192 IterativeParsingErrorState, // Right curly bracket
1193 IterativeParsingErrorState, // Comma
1194 IterativeParsingErrorState, // Colon
1195 IterativeParsingMemberValueState, // String
1196 IterativeParsingMemberValueState, // False
1197 IterativeParsingMemberValueState, // True
1198 IterativeParsingMemberValueState, // Null
1199 IterativeParsingMemberValueState // Number
1200 },
1201 // MemberValue
1202 {
1203 IterativeParsingErrorState, // Left bracket
1204 IterativeParsingErrorState, // Right bracket
1205 IterativeParsingErrorState, // Left curly bracket
1206 IterativeParsingObjectFinishState, // Right curly bracket
1207 IterativeParsingMemberDelimiterState, // Comma
1208 IterativeParsingErrorState, // Colon
1209 IterativeParsingErrorState, // String
1210 IterativeParsingErrorState, // False
1211 IterativeParsingErrorState, // True
1212 IterativeParsingErrorState, // Null
1213 IterativeParsingErrorState // Number
1214 },
1215 // MemberDelimiter
1216 {
1217 IterativeParsingErrorState, // Left bracket
1218 IterativeParsingErrorState, // Right bracket
1219 IterativeParsingErrorState, // Left curly bracket
1220 IterativeParsingErrorState, // Right curly bracket
1221 IterativeParsingErrorState, // Comma
1222 IterativeParsingErrorState, // Colon
1223 IterativeParsingMemberKeyState, // String
1224 IterativeParsingErrorState, // False
1225 IterativeParsingErrorState, // True
1226 IterativeParsingErrorState, // Null
1227 IterativeParsingErrorState // Number
1228 },
1229 // ObjectFinish(sink state)
1230 {
1231 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1232 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1233 IterativeParsingErrorState
1234 },
1235 // ArrayInitial
1236 {
1237 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1238 IterativeParsingArrayFinishState, // Right bracket
1239 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1240 IterativeParsingErrorState, // Right curly bracket
1241 IterativeParsingErrorState, // Comma
1242 IterativeParsingErrorState, // Colon
1243 IterativeParsingElementState, // String
1244 IterativeParsingElementState, // False
1245 IterativeParsingElementState, // True
1246 IterativeParsingElementState, // Null
1247 IterativeParsingElementState // Number
1248 },
1249 // Element
1250 {
1251 IterativeParsingErrorState, // Left bracket
1252 IterativeParsingArrayFinishState, // Right bracket
1253 IterativeParsingErrorState, // Left curly bracket
1254 IterativeParsingErrorState, // Right curly bracket
1255 IterativeParsingElementDelimiterState, // Comma
1256 IterativeParsingErrorState, // Colon
1257 IterativeParsingErrorState, // String
1258 IterativeParsingErrorState, // False
1259 IterativeParsingErrorState, // True
1260 IterativeParsingErrorState, // Null
1261 IterativeParsingErrorState // Number
1262 },
1263 // ElementDelimiter
1264 {
1265 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1266 IterativeParsingErrorState, // Right bracket
1267 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1268 IterativeParsingErrorState, // Right curly bracket
1269 IterativeParsingErrorState, // Comma
1270 IterativeParsingErrorState, // Colon
1271 IterativeParsingElementState, // String
1272 IterativeParsingElementState, // False
1273 IterativeParsingElementState, // True
1274 IterativeParsingElementState, // Null
1275 IterativeParsingElementState // Number
1276 },
1277 // ArrayFinish(sink state)
1278 {
1279 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1280 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1281 IterativeParsingErrorState
1282 },
1283 // Single Value (sink state)
1284 {
1285 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1286 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1287 IterativeParsingErrorState
1288 }
1289 }; // End of G
1290
1291 return (IterativeParsingState)G[state][token];
1292 }
1293
1294 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1295 // May return a new state on state pop.
1296 template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1297 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1298 (void)token;
1299
1300 switch (dst) {
1301 case IterativeParsingErrorState:
1302 return dst;
1303
1304 case IterativeParsingObjectInitialState:
1305 case IterativeParsingArrayInitialState:
1306 {
1307 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1308 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1309 IterativeParsingState n = src;
1310 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1311 n = IterativeParsingElementState;
1312 else if (src == IterativeParsingKeyValueDelimiterState)
1313 n = IterativeParsingMemberValueState;
1314 // Push current state.
1315 *stack_.template Push<SizeType>(1) = n;
1316 // Initialize and push the member/element count.
1317 *stack_.template Push<SizeType>(1) = 0;
1318 // Call handler
1319 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1320 // On handler short circuits the parsing.
1321 if (!hr) {
1322 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1323 return IterativeParsingErrorState;
1324 }
1325 else {
1326 is.Take();
1327 return dst;
1328 }
1329 }
1330
1331 case IterativeParsingMemberKeyState:
1332 ParseString<parseFlags>(is, handler, true);
1333 if (HasParseError())
1334 return IterativeParsingErrorState;
1335 else
1336 return dst;
1337
1338 case IterativeParsingKeyValueDelimiterState:
1339 RAPIDJSON_ASSERT(token == ColonToken);
1340 is.Take();
1341 return dst;
1342
1343 case IterativeParsingMemberValueState:
1344 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1345 ParseValue<parseFlags>(is, handler);
1346 if (HasParseError()) {
1347 return IterativeParsingErrorState;
1348 }
1349 return dst;
1350
1351 case IterativeParsingElementState:
1352 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1353 ParseValue<parseFlags>(is, handler);
1354 if (HasParseError()) {
1355 return IterativeParsingErrorState;
1356 }
1357 return dst;
1358
1359 case IterativeParsingMemberDelimiterState:
1360 case IterativeParsingElementDelimiterState:
1361 is.Take();
1362 // Update member/element count.
1363 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1364 return dst;
1365
1366 case IterativeParsingObjectFinishState:
1367 {
1368 // Get member count.
1369 SizeType c = *stack_.template Pop<SizeType>(1);
1370 // If the object is not empty, count the last member.
1371 if (src == IterativeParsingMemberValueState)
1372 ++c;
1373 // Restore the state.
1374 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1375 // Transit to Finish state if this is the topmost scope.
1376 if (n == IterativeParsingStartState)
1377 n = IterativeParsingFinishState;
1378 // Call handler
1379 bool hr = handler.EndObject(c);
1380 // On handler short circuits the parsing.
1381 if (!hr) {
1382 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1383 return IterativeParsingErrorState;
1384 }
1385 else {
1386 is.Take();
1387 return n;
1388 }
1389 }
1390
1391 case IterativeParsingArrayFinishState:
1392 {
1393 // Get element count.
1394 SizeType c = *stack_.template Pop<SizeType>(1);
1395 // If the array is not empty, count the last element.
1396 if (src == IterativeParsingElementState)
1397 ++c;
1398 // Restore the state.
1399 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1400 // Transit to Finish state if this is the topmost scope.
1401 if (n == IterativeParsingStartState)
1402 n = IterativeParsingFinishState;
1403 // Call handler
1404 bool hr = handler.EndArray(c);
1405 // On handler short circuits the parsing.
1406 if (!hr) {
1407 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1408 return IterativeParsingErrorState;
1409 }
1410 else {
1411 is.Take();
1412 return n;
1413 }
1414 }
1415
1416 default:
1417 // This branch is for IterativeParsingValueState actually.
1418 // Use `default:` rather than
1419 // `case IterativeParsingValueState:` is for code coverage.
1420
1421 // The IterativeParsingStartState is not enumerated in this switch-case.
1422 // It is impossible for that case. And it can be caught by following assertion.
1423
1424 // The IterativeParsingFinishState is not enumerated in this switch-case either.
1425 // It is a "derivative" state which cannot triggered from Predict() directly.
1426 // Therefore it cannot happen here. And it can be caught by following assertion.
1427 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1428
1429 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1430 ParseValue<parseFlags>(is, handler);
1431 if (HasParseError()) {
1432 return IterativeParsingErrorState;
1433 }
1434 return IterativeParsingFinishState;
1435 }
1436 }
1437
1438 template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)1439 void HandleError(IterativeParsingState src, InputStream& is) {
1440 if (HasParseError()) {
1441 // Error flag has been set.
1442 return;
1443 }
1444
1445 switch (src) {
1446 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1447 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1448 case IterativeParsingObjectInitialState:
1449 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1450 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1451 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1452 case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1453 default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); return;
1454 }
1455 }
1456
1457 template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)1458 ParseResult IterativeParse(InputStream& is, Handler& handler) {
1459 parseResult_.Clear();
1460 ClearStackOnExit scope(*this);
1461 IterativeParsingState state = IterativeParsingStartState;
1462
1463 SkipWhitespaceAndComments<parseFlags>(is);
1464 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1465 while (is.Peek() != '\0') {
1466 Token t = Tokenize(is.Peek());
1467 IterativeParsingState n = Predict(state, t);
1468 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1469
1470 if (d == IterativeParsingErrorState) {
1471 HandleError(state, is);
1472 break;
1473 }
1474
1475 state = d;
1476
1477 // Do not further consume streams if a root JSON has been parsed.
1478 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1479 break;
1480
1481 SkipWhitespaceAndComments<parseFlags>(is);
1482 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1483 }
1484
1485 // Handle the end of file.
1486 if (state != IterativeParsingFinishState)
1487 HandleError(state, is);
1488
1489 return parseResult_;
1490 }
1491
1492 static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
1493 internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
1494 ParseResult parseResult_;
1495 }; // class GenericReader
1496
1497 //! Reader with UTF8 encoding and default allocator.
1498 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1499
1500 RAPIDJSON_NAMESPACE_END
1501
1502 #ifdef __GNUC__
1503 RAPIDJSON_DIAG_POP
1504 #endif
1505
1506 #ifdef _MSC_VER
1507 RAPIDJSON_DIAG_POP
1508 #endif
1509
1510 #endif // RAPIDJSON_READER_H_
1511