1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17
18 /*! \file reader.h */
19
20 #include "allocators.h"
21 #include "stream.h"
22 #include "encodedstream.h"
23 #include "internal/meta.h"
24 #include "internal/stack.h"
25 #include "internal/strtod.h"
26 #include <limits>
27
28 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
29 #include <intrin.h>
30 #pragma intrinsic(_BitScanForward)
31 #endif
32 #ifdef RAPIDJSON_SSE42
33 #include <nmmintrin.h>
34 #elif defined(RAPIDJSON_SSE2)
35 #include <emmintrin.h>
36 #endif
37
38 #ifdef _MSC_VER
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
41 RAPIDJSON_DIAG_OFF(4702) // unreachable code
42 #endif
43
44 #ifdef __clang__
45 RAPIDJSON_DIAG_PUSH
46 RAPIDJSON_DIAG_OFF(old-style-cast)
47 RAPIDJSON_DIAG_OFF(padded)
48 RAPIDJSON_DIAG_OFF(switch-enum)
49 #endif
50
51 #ifdef __GNUC__
52 RAPIDJSON_DIAG_PUSH
53 RAPIDJSON_DIAG_OFF(effc++)
54 #endif
55
56 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
57 #define RAPIDJSON_NOTHING /* deliberately empty */
58 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
59 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
60 RAPIDJSON_MULTILINEMACRO_BEGIN \
61 if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
62 RAPIDJSON_MULTILINEMACRO_END
63 #endif
64 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
65 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
66 //!@endcond
67
68 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
69 \ingroup RAPIDJSON_ERRORS
70 \brief Macro to indicate a parse error.
71 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
72 \param offset position of the error in JSON input (\c size_t)
73
74 This macros can be used as a customization point for the internal
75 error handling mechanism of RapidJSON.
76
77 A common usage model is to throw an exception instead of requiring the
78 caller to explicitly check the \ref rapidjson::GenericReader::Parse's
79 return value:
80
81 \code
82 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
83 throw ParseException(parseErrorCode, #parseErrorCode, offset)
84
85 #include <stdexcept> // std::runtime_error
86 #include "rapidjson/error/error.h" // rapidjson::ParseResult
87
88 struct ParseException : std::runtime_error, rapidjson::ParseResult {
89 ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
90 : std::runtime_error(msg), ParseResult(code, offset) {}
91 };
92
93 #include "rapidjson/reader.h"
94 \endcode
95
96 \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
97 */
98 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
99 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
100 RAPIDJSON_MULTILINEMACRO_BEGIN \
101 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
102 SetParseError(parseErrorCode, offset); \
103 RAPIDJSON_MULTILINEMACRO_END
104 #endif
105
106 /*! \def RAPIDJSON_PARSE_ERROR
107 \ingroup RAPIDJSON_ERRORS
108 \brief (Internal) macro to indicate and handle a parse error.
109 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
110 \param offset position of the error in JSON input (\c size_t)
111
112 Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
113
114 \see RAPIDJSON_PARSE_ERROR_NORETURN
115 \hideinitializer
116 */
117 #ifndef RAPIDJSON_PARSE_ERROR
118 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
119 RAPIDJSON_MULTILINEMACRO_BEGIN \
120 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
121 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
122 RAPIDJSON_MULTILINEMACRO_END
123 #endif
124
125 #include "error/error.h" // ParseErrorCode, ParseResult
126
127 RAPIDJSON_NAMESPACE_BEGIN
128
129 ///////////////////////////////////////////////////////////////////////////////
130 // ParseFlag
131
132 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
133 \ingroup RAPIDJSON_CONFIG
134 \brief User-defined kParseDefaultFlags definition.
135
136 User can define this as any \c ParseFlag combinations.
137 */
138 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
139 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
140 #endif
141
142 //! Combination of parseFlags
143 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
144 */
145 enum ParseFlag {
146 kParseNoFlags = 0, //!< No flags are set.
147 kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
148 kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
149 kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
150 kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
151 kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
152 kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
153 kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
154 kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
155 kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
156 kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
157 };
158
159 ///////////////////////////////////////////////////////////////////////////////
160 // Handler
161
162 /*! \class rapidjson::Handler
163 \brief Concept for receiving events from GenericReader upon parsing.
164 The functions return true if no error occurs. If they return false,
165 the event publisher should terminate the process.
166 \code
167 concept Handler {
168 typename Ch;
169
170 bool Null();
171 bool Bool(bool b);
172 bool Int(int i);
173 bool Uint(unsigned i);
174 bool Int64(int64_t i);
175 bool Uint64(uint64_t i);
176 bool Double(double d);
177 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
178 bool RawNumber(const Ch* str, SizeType length, bool copy);
179 bool String(const Ch* str, SizeType length, bool copy);
180 bool StartObject();
181 bool Key(const Ch* str, SizeType length, bool copy);
182 bool EndObject(SizeType memberCount);
183 bool StartArray();
184 bool EndArray(SizeType elementCount);
185 };
186 \endcode
187 */
188 ///////////////////////////////////////////////////////////////////////////////
189 // BaseReaderHandler
190
191 //! Default implementation of Handler.
192 /*! This can be used as base class of any reader handler.
193 \note implements Handler concept
194 */
195 template<typename Encoding = UTF8<>, typename Derived = void>
196 struct BaseReaderHandler {
197 typedef typename Encoding::Ch Ch;
198
199 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
200
DefaultBaseReaderHandler201 bool Default() { return true; }
NullBaseReaderHandler202 bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler203 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler204 bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler205 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler206 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler207 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler208 bool Double(double) { return static_cast<Override&>(*this).Default(); }
209 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
RawNumberBaseReaderHandler210 bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
StringBaseReaderHandler211 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler212 bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler213 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler214 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler215 bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler216 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
217 };
218
219 ///////////////////////////////////////////////////////////////////////////////
220 // StreamLocalCopy
221
222 namespace internal {
223
224 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
225 class StreamLocalCopy;
226
227 //! Do copy optimization.
228 template<typename Stream>
229 class StreamLocalCopy<Stream, 1> {
230 public:
StreamLocalCopy(Stream & original)231 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()232 ~StreamLocalCopy() { original_ = s; }
233
234 Stream s;
235
236 private:
237 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
238
239 Stream& original_;
240 };
241
242 //! Keep reference.
243 template<typename Stream>
244 class StreamLocalCopy<Stream, 0> {
245 public:
StreamLocalCopy(Stream & original)246 StreamLocalCopy(Stream& original) : s(original) {}
247
248 Stream& s;
249
250 private:
251 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
252 };
253
254 } // namespace internal
255
256 ///////////////////////////////////////////////////////////////////////////////
257 // SkipWhitespace
258
259 //! Skip the JSON white spaces in a stream.
260 /*! \param is A input stream for skipping white spaces.
261 \note This function has SSE2/SSE4.2 specialization.
262 */
263 template<typename InputStream>
SkipWhitespace(InputStream & is)264 void SkipWhitespace(InputStream& is) {
265 internal::StreamLocalCopy<InputStream> copy(is);
266 InputStream& s(copy.s);
267
268 typename InputStream::Ch c;
269 while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
270 s.Take();
271 }
272
SkipWhitespace(const char * p,const char * end)273 inline const char* SkipWhitespace(const char* p, const char* end) {
274 while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
275 ++p;
276 return p;
277 }
278
279 #ifdef RAPIDJSON_SSE42
280 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)281 inline const char *SkipWhitespace_SIMD(const char* p) {
282 // Fast return for single non-whitespace
283 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
284 ++p;
285 else
286 return p;
287
288 // 16-byte align to the next boundary
289 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
290 while (p != nextAligned)
291 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
292 ++p;
293 else
294 return p;
295
296 // The rest of string using SIMD
297 static const char whitespace[16] = " \n\r\t";
298 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
299
300 for (;; p += 16) {
301 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
302 const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
303 if (r != 0) { // some of characters is non-whitespace
304 #ifdef _MSC_VER // Find the index of first non-whitespace
305 unsigned long offset;
306 _BitScanForward(&offset, r);
307 return p + offset;
308 #else
309 return p + __builtin_ffs(r) - 1;
310 #endif
311 }
312 }
313 }
314
SkipWhitespace_SIMD(const char * p,const char * end)315 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
316 // Fast return for single non-whitespace
317 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
318 ++p;
319 else
320 return p;
321
322 // The middle of string using SIMD
323 static const char whitespace[16] = " \n\r\t";
324 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
325
326 for (; p <= end - 16; p += 16) {
327 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
328 const int r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
329 if (r != 0) { // some of characters is non-whitespace
330 #ifdef _MSC_VER // Find the index of first non-whitespace
331 unsigned long offset;
332 _BitScanForward(&offset, r);
333 return p + offset;
334 #else
335 return p + __builtin_ffs(r) - 1;
336 #endif
337 }
338 }
339
340 return SkipWhitespace(p, end);
341 }
342
343 #elif defined(RAPIDJSON_SSE2)
344
345 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)346 inline const char *SkipWhitespace_SIMD(const char* p) {
347 // Fast return for single non-whitespace
348 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
349 ++p;
350 else
351 return p;
352
353 // 16-byte align to the next boundary
354 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
355 while (p != nextAligned)
356 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
357 ++p;
358 else
359 return p;
360
361 // The rest of string
362 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
363 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
364 #undef C16
365
366 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
367 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
368 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
369 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
370
371 for (;; p += 16) {
372 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
373 __m128i x = _mm_cmpeq_epi8(s, w0);
374 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
375 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
376 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
377 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
378 if (r != 0) { // some of characters may be non-whitespace
379 #ifdef _MSC_VER // Find the index of first non-whitespace
380 unsigned long offset;
381 _BitScanForward(&offset, r);
382 return p + offset;
383 #else
384 return p + __builtin_ffs(r) - 1;
385 #endif
386 }
387 }
388 }
389
SkipWhitespace_SIMD(const char * p,const char * end)390 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
391 // Fast return for single non-whitespace
392 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
393 ++p;
394 else
395 return p;
396
397 // The rest of string
398 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
399 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
400 #undef C16
401
402 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
403 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
404 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
405 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
406
407 for (; p <= end - 16; p += 16) {
408 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
409 __m128i x = _mm_cmpeq_epi8(s, w0);
410 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
411 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
412 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
413 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
414 if (r != 0) { // some of characters may be non-whitespace
415 #ifdef _MSC_VER // Find the index of first non-whitespace
416 unsigned long offset;
417 _BitScanForward(&offset, r);
418 return p + offset;
419 #else
420 return p + __builtin_ffs(r) - 1;
421 #endif
422 }
423 }
424
425 return SkipWhitespace(p, end);
426 }
427
428 #endif // RAPIDJSON_SSE2
429
430 #ifdef RAPIDJSON_SIMD
431 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)432 template<> inline void SkipWhitespace(InsituStringStream& is) {
433 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
434 }
435
436 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)437 template<> inline void SkipWhitespace(StringStream& is) {
438 is.src_ = SkipWhitespace_SIMD(is.src_);
439 }
440
SkipWhitespace(EncodedInputStream<UTF8<>,MemoryStream> & is)441 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
442 is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
443 }
444 #endif // RAPIDJSON_SIMD
445
446 ///////////////////////////////////////////////////////////////////////////////
447 // GenericReader
448
449 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
450 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
451 object implementing Handler concept.
452
453 It needs to allocate a stack for storing a single decoded string during
454 non-destructive parsing.
455
456 For in-situ parsing, the decoded string is directly written to the source
457 text string, no temporary buffer is required.
458
459 A GenericReader object can be reused for parsing multiple JSON text.
460
461 \tparam SourceEncoding Encoding of the input stream.
462 \tparam TargetEncoding Encoding of the parse output.
463 \tparam StackAllocator Allocator type for stack.
464 */
465 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
466 class GenericReader {
467 public:
468 typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
469
470 //! Constructor.
471 /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
472 \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
473 */
stack_(stackAllocator,stackCapacity)474 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
475
476 //! Parse JSON text.
477 /*! \tparam parseFlags Combination of \ref ParseFlag.
478 \tparam InputStream Type of input stream, implementing Stream concept.
479 \tparam Handler Type of handler, implementing Handler concept.
480 \param is Input stream to be parsed.
481 \param handler The handler to receive events.
482 \return Whether the parsing is successful.
483 */
484 template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)485 ParseResult Parse(InputStream& is, Handler& handler) {
486 if (parseFlags & kParseIterativeFlag)
487 return IterativeParse<parseFlags>(is, handler);
488
489 parseResult_.Clear();
490
491 ClearStackOnExit scope(*this);
492
493 SkipWhitespaceAndComments<parseFlags>(is);
494 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
495
496 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
497 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
498 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
499 }
500 else {
501 ParseValue<parseFlags>(is, handler);
502 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
503
504 if (!(parseFlags & kParseStopWhenDoneFlag)) {
505 SkipWhitespaceAndComments<parseFlags>(is);
506 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
507
508 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
509 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
510 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
511 }
512 }
513 }
514
515 return parseResult_;
516 }
517
518 //! Parse JSON text (with \ref kParseDefaultFlags)
519 /*! \tparam InputStream Type of input stream, implementing Stream concept
520 \tparam Handler Type of handler, implementing Handler concept.
521 \param is Input stream to be parsed.
522 \param handler The handler to receive events.
523 \return Whether the parsing is successful.
524 */
525 template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)526 ParseResult Parse(InputStream& is, Handler& handler) {
527 return Parse<kParseDefaultFlags>(is, handler);
528 }
529
530 //! Whether a parse error has occured in the last parsing.
HasParseError()531 bool HasParseError() const { return parseResult_.IsError(); }
532
533 //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()534 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
535
536 //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()537 size_t GetErrorOffset() const { return parseResult_.Offset(); }
538
539 protected:
SetParseError(ParseErrorCode code,size_t offset)540 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
541
542 private:
543 // Prohibit copy constructor & assignment operator.
544 GenericReader(const GenericReader&);
545 GenericReader& operator=(const GenericReader&);
546
ClearStack()547 void ClearStack() { stack_.Clear(); }
548
549 // clear stack on any exit from ParseStream, e.g. due to exception
550 struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit551 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit552 ~ClearStackOnExit() { r_.ClearStack(); }
553 private:
554 GenericReader& r_;
555 ClearStackOnExit(const ClearStackOnExit&);
556 ClearStackOnExit& operator=(const ClearStackOnExit&);
557 };
558
559 template<unsigned parseFlags, typename InputStream>
SkipWhitespaceAndComments(InputStream & is)560 void SkipWhitespaceAndComments(InputStream& is) {
561 SkipWhitespace(is);
562
563 if (parseFlags & kParseCommentsFlag) {
564 while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
565 if (Consume(is, '*')) {
566 while (true) {
567 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
568 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
569 else if (Consume(is, '*')) {
570 if (Consume(is, '/'))
571 break;
572 }
573 else
574 is.Take();
575 }
576 }
577 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
578 while (is.Peek() != '\0' && is.Take() != '\n');
579 else
580 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
581
582 SkipWhitespace(is);
583 }
584 }
585 }
586
587 // Parse object: { string : value, ... }
588 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)589 void ParseObject(InputStream& is, Handler& handler) {
590 RAPIDJSON_ASSERT(is.Peek() == '{');
591 is.Take(); // Skip '{'
592
593 if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
594 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
595
596 SkipWhitespaceAndComments<parseFlags>(is);
597 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
598
599 if (Consume(is, '}')) {
600 if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
601 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
602 return;
603 }
604
605 for (SizeType memberCount = 0;;) {
606 if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
607 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
608
609 ParseString<parseFlags>(is, handler, true);
610 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
611
612 SkipWhitespaceAndComments<parseFlags>(is);
613 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
614
615 if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
616 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
617
618 SkipWhitespaceAndComments<parseFlags>(is);
619 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
620
621 ParseValue<parseFlags>(is, handler);
622 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
623
624 SkipWhitespaceAndComments<parseFlags>(is);
625 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
626
627 ++memberCount;
628
629 switch (is.Peek()) {
630 case ',':
631 is.Take();
632 SkipWhitespaceAndComments<parseFlags>(is);
633 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
634 break;
635 case '}':
636 is.Take();
637 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
638 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
639 return;
640 default:
641 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
642 }
643
644 if (parseFlags & kParseTrailingCommasFlag) {
645 if (is.Peek() == '}') {
646 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
647 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
648 is.Take();
649 return;
650 }
651 }
652 }
653 }
654
655 // Parse array: [ value, ... ]
656 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)657 void ParseArray(InputStream& is, Handler& handler) {
658 RAPIDJSON_ASSERT(is.Peek() == '[');
659 is.Take(); // Skip '['
660
661 if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
662 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
663
664 SkipWhitespaceAndComments<parseFlags>(is);
665 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
666
667 if (Consume(is, ']')) {
668 if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
669 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
670 return;
671 }
672
673 for (SizeType elementCount = 0;;) {
674 ParseValue<parseFlags>(is, handler);
675 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
676
677 ++elementCount;
678 SkipWhitespaceAndComments<parseFlags>(is);
679 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
680
681 if (Consume(is, ',')) {
682 SkipWhitespaceAndComments<parseFlags>(is);
683 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
684 }
685 else if (Consume(is, ']')) {
686 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
687 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
688 return;
689 }
690 else
691 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
692
693 if (parseFlags & kParseTrailingCommasFlag) {
694 if (is.Peek() == ']') {
695 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
696 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
697 is.Take();
698 return;
699 }
700 }
701 }
702 }
703
704 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)705 void ParseNull(InputStream& is, Handler& handler) {
706 RAPIDJSON_ASSERT(is.Peek() == 'n');
707 is.Take();
708
709 if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
710 if (RAPIDJSON_UNLIKELY(!handler.Null()))
711 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
712 }
713 else
714 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
715 }
716
717 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)718 void ParseTrue(InputStream& is, Handler& handler) {
719 RAPIDJSON_ASSERT(is.Peek() == 't');
720 is.Take();
721
722 if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
723 if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
724 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
725 }
726 else
727 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
728 }
729
730 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)731 void ParseFalse(InputStream& is, Handler& handler) {
732 RAPIDJSON_ASSERT(is.Peek() == 'f');
733 is.Take();
734
735 if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
736 if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
737 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
738 }
739 else
740 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
741 }
742
743 template<typename InputStream>
Consume(InputStream & is,typename InputStream::Ch expect)744 RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
745 if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
746 is.Take();
747 return true;
748 }
749 else
750 return false;
751 }
752
753 // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
754 template<typename InputStream>
ParseHex4(InputStream & is,size_t escapeOffset)755 unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
756 unsigned codepoint = 0;
757 for (int i = 0; i < 4; i++) {
758 Ch c = is.Peek();
759 codepoint <<= 4;
760 codepoint += static_cast<unsigned>(c);
761 if (c >= '0' && c <= '9')
762 codepoint -= '0';
763 else if (c >= 'A' && c <= 'F')
764 codepoint -= 'A' - 10;
765 else if (c >= 'a' && c <= 'f')
766 codepoint -= 'a' - 10;
767 else {
768 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
769 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
770 }
771 is.Take();
772 }
773 return codepoint;
774 }
775
776 template <typename CharType>
777 class StackStream {
778 public:
779 typedef CharType Ch;
780
StackStream(internal::Stack<StackAllocator> & stack)781 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)782 RAPIDJSON_FORCEINLINE void Put(Ch c) {
783 *stack_.template Push<Ch>() = c;
784 ++length_;
785 }
786
Push(SizeType count)787 RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
788 length_ += count;
789 return stack_.template Push<Ch>(count);
790 }
791
Length()792 size_t Length() const { return length_; }
793
Pop()794 Ch* Pop() {
795 return stack_.template Pop<Ch>(length_);
796 }
797
798 private:
799 StackStream(const StackStream&);
800 StackStream& operator=(const StackStream&);
801
802 internal::Stack<StackAllocator>& stack_;
803 SizeType length_;
804 };
805
806 // Parse string and generate String event. Different code paths for kParseInsituFlag.
807 template<unsigned parseFlags, typename InputStream, typename Handler>
808 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
809 internal::StreamLocalCopy<InputStream> copy(is);
810 InputStream& s(copy.s);
811
812 RAPIDJSON_ASSERT(s.Peek() == '\"');
813 s.Take(); // Skip '\"'
814
815 bool success = false;
816 if (parseFlags & kParseInsituFlag) {
817 typename InputStream::Ch *head = s.PutBegin();
818 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
819 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
820 size_t length = s.PutEnd(head) - 1;
821 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
822 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
823 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
824 }
825 else {
826 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
827 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
828 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
829 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
830 const typename TargetEncoding::Ch* const str = stackStream.Pop();
831 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
832 }
833 if (RAPIDJSON_UNLIKELY(!success))
834 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
835 }
836
837 // Parse string to an output is
838 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
839 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)840 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
841 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
842 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
843 static const char escape[256] = {
844 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
845 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
846 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
847 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
848 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
849 };
850 #undef Z16
851 //!@endcond
852
853 for (;;) {
854 // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
855 if (!(parseFlags & kParseValidateEncodingFlag))
856 ScanCopyUnescapedString(is, os);
857
858 Ch c = is.Peek();
859 if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
860 size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset
861 is.Take();
862 Ch e = is.Peek();
863 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
864 is.Take();
865 os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
866 }
867 else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
868 is.Take();
869 unsigned codepoint = ParseHex4(is, escapeOffset);
870 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
871 if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
872 // Handle UTF-16 surrogate pair
873 if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
874 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
875 unsigned codepoint2 = ParseHex4(is, escapeOffset);
876 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
877 if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
878 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
879 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
880 }
881 TEncoding::Encode(os, codepoint);
882 }
883 else
884 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
885 }
886 else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
887 is.Take();
888 os.Put('\0'); // null-terminate the string
889 return;
890 }
891 else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
892 if (c == '\0')
893 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
894 else
895 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
896 }
897 else {
898 size_t offset = is.Tell();
899 if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
900 !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
901 !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
902 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
903 }
904 }
905 }
906
907 template<typename InputStream, typename OutputStream>
ScanCopyUnescapedString(InputStream &,OutputStream &)908 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
909 // Do nothing for generic version
910 }
911
912 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
913 // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)914 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
915 const char* p = is.src_;
916
917 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
918 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
919 while (p != nextAligned)
920 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
921 is.src_ = p;
922 return;
923 }
924 else
925 os.Put(*p++);
926
927 // The rest of string using SIMD
928 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
929 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
930 static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
931 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
932 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
933 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
934
935 for (;; p += 16) {
936 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
937 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
938 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
939 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
940 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
941 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
942 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
943 SizeType length;
944 #ifdef _MSC_VER // Find the index of first escaped
945 unsigned long offset;
946 _BitScanForward(&offset, r);
947 length = offset;
948 #else
949 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
950 #endif
951 char* q = reinterpret_cast<char*>(os.Push(length));
952 for (size_t i = 0; i < length; i++)
953 q[i] = p[i];
954
955 p += length;
956 break;
957 }
958 _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
959 }
960
961 is.src_ = p;
962 }
963
964 // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)965 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
966 RAPIDJSON_ASSERT(&is == &os);
967 (void)os;
968
969 if (is.src_ == is.dst_) {
970 SkipUnescapedString(is);
971 return;
972 }
973
974 char* p = is.src_;
975 char *q = is.dst_;
976
977 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
978 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
979 while (p != nextAligned)
980 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
981 is.src_ = p;
982 is.dst_ = q;
983 return;
984 }
985 else
986 *q++ = *p++;
987
988 // The rest of string using SIMD
989 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
990 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
991 static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
992 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
993 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
994 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
995
996 for (;; p += 16, q += 16) {
997 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
998 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
999 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1000 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1001 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1002 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1003 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1004 size_t length;
1005 #ifdef _MSC_VER // Find the index of first escaped
1006 unsigned long offset;
1007 _BitScanForward(&offset, r);
1008 length = offset;
1009 #else
1010 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1011 #endif
1012 for (const char* pend = p + length; p != pend; )
1013 *q++ = *p++;
1014 break;
1015 }
1016 _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1017 }
1018
1019 is.src_ = p;
1020 is.dst_ = q;
1021 }
1022
1023 // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1024 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1025 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1026 char* p = is.src_;
1027
1028 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1029 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1030 for (; p != nextAligned; p++)
1031 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1032 is.src_ = is.dst_ = p;
1033 return;
1034 }
1035
1036 // The rest of string using SIMD
1037 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1038 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1039 static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
1040 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1041 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1042 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1043
1044 for (;; p += 16) {
1045 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1046 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1047 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1048 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1049 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1050 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1051 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1052 size_t length;
1053 #ifdef _MSC_VER // Find the index of first escaped
1054 unsigned long offset;
1055 _BitScanForward(&offset, r);
1056 length = offset;
1057 #else
1058 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1059 #endif
1060 p += length;
1061 break;
1062 }
1063 }
1064
1065 is.src_ = is.dst_ = p;
1066 }
1067 #endif
1068
1069 template<typename InputStream, bool backup, bool pushOnTake>
1070 class NumberStream;
1071
1072 template<typename InputStream>
1073 class NumberStream<InputStream, false, false> {
1074 public:
1075 typedef typename InputStream::Ch Ch;
1076
NumberStream(GenericReader & reader,InputStream & s)1077 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
~NumberStream()1078 ~NumberStream() {}
1079
Peek()1080 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()1081 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()1082 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Push(char)1083 RAPIDJSON_FORCEINLINE void Push(char) {}
1084
Tell()1085 size_t Tell() { return is.Tell(); }
Length()1086 size_t Length() { return 0; }
Pop()1087 const char* Pop() { return 0; }
1088
1089 protected:
1090 NumberStream& operator=(const NumberStream&);
1091
1092 InputStream& is;
1093 };
1094
1095 template<typename InputStream>
1096 class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
1097 typedef NumberStream<InputStream, false, false> Base;
1098 public:
NumberStream(GenericReader & reader,InputStream & is)1099 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
~NumberStream()1100 ~NumberStream() {}
1101
TakePush()1102 RAPIDJSON_FORCEINLINE Ch TakePush() {
1103 stackStream.Put(static_cast<char>(Base::is.Peek()));
1104 return Base::is.Take();
1105 }
1106
Push(char c)1107 RAPIDJSON_FORCEINLINE void Push(char c) {
1108 stackStream.Put(c);
1109 }
1110
Length()1111 size_t Length() { return stackStream.Length(); }
1112
Pop()1113 const char* Pop() {
1114 stackStream.Put('\0');
1115 return stackStream.Pop();
1116 }
1117
1118 private:
1119 StackStream<char> stackStream;
1120 };
1121
1122 template<typename InputStream>
1123 class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
1124 typedef NumberStream<InputStream, true, false> Base;
1125 public:
NumberStream(GenericReader & reader,InputStream & is)1126 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
~NumberStream()1127 ~NumberStream() {}
1128
Take()1129 RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1130 };
1131
1132 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)1133 void ParseNumber(InputStream& is, Handler& handler) {
1134 internal::StreamLocalCopy<InputStream> copy(is);
1135 NumberStream<InputStream,
1136 ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1137 ((parseFlags & kParseInsituFlag) == 0) :
1138 ((parseFlags & kParseFullPrecisionFlag) != 0),
1139 (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1140 (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1141
1142 size_t startOffset = s.Tell();
1143 double d = 0.0;
1144 bool useNanOrInf = false;
1145
1146 // Parse minus
1147 bool minus = Consume(s, '-');
1148
1149 // Parse int: zero / ( digit1-9 *DIGIT )
1150 unsigned i = 0;
1151 uint64_t i64 = 0;
1152 bool use64bit = false;
1153 int significandDigit = 0;
1154 if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1155 i = 0;
1156 s.TakePush();
1157 }
1158 else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1159 i = static_cast<unsigned>(s.TakePush() - '0');
1160
1161 if (minus)
1162 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1163 if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1164 if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1165 i64 = i;
1166 use64bit = true;
1167 break;
1168 }
1169 }
1170 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1171 significandDigit++;
1172 }
1173 else
1174 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1175 if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1176 if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1177 i64 = i;
1178 use64bit = true;
1179 break;
1180 }
1181 }
1182 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1183 significandDigit++;
1184 }
1185 }
1186 // Parse NaN or Infinity here
1187 else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1188 useNanOrInf = true;
1189 if (RAPIDJSON_LIKELY(Consume(s, 'N') && Consume(s, 'a') && Consume(s, 'N'))) {
1190 d = std::numeric_limits<double>::quiet_NaN();
1191 }
1192 else if (RAPIDJSON_LIKELY(Consume(s, 'I') && Consume(s, 'n') && Consume(s, 'f'))) {
1193 d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1194 if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1195 && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y'))))
1196 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1197 }
1198 else
1199 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1200 }
1201 else
1202 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1203
1204 // Parse 64bit int
1205 bool useDouble = false;
1206 if (use64bit) {
1207 if (minus)
1208 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1209 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1210 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1211 d = static_cast<double>(i64);
1212 useDouble = true;
1213 break;
1214 }
1215 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1216 significandDigit++;
1217 }
1218 else
1219 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1220 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1221 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1222 d = static_cast<double>(i64);
1223 useDouble = true;
1224 break;
1225 }
1226 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1227 significandDigit++;
1228 }
1229 }
1230
1231 // Force double for big integer
1232 if (useDouble) {
1233 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1234 if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0
1235 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1236 d = d * 10 + (s.TakePush() - '0');
1237 }
1238 }
1239
1240 // Parse frac = decimal-point 1*DIGIT
1241 int expFrac = 0;
1242 size_t decimalPosition;
1243 if (Consume(s, '.')) {
1244 decimalPosition = s.Length();
1245
1246 if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1247 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
1248
1249 if (!useDouble) {
1250 #if RAPIDJSON_64BIT
1251 // Use i64 to store significand in 64-bit architecture
1252 if (!use64bit)
1253 i64 = i;
1254
1255 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1256 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1257 break;
1258 else {
1259 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1260 --expFrac;
1261 if (i64 != 0)
1262 significandDigit++;
1263 }
1264 }
1265
1266 d = static_cast<double>(i64);
1267 #else
1268 // Use double to store significand in 32-bit architecture
1269 d = static_cast<double>(use64bit ? i64 : i);
1270 #endif
1271 useDouble = true;
1272 }
1273
1274 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1275 if (significandDigit < 17) {
1276 d = d * 10.0 + (s.TakePush() - '0');
1277 --expFrac;
1278 if (RAPIDJSON_LIKELY(d > 0.0))
1279 significandDigit++;
1280 }
1281 else
1282 s.TakePush();
1283 }
1284 }
1285 else
1286 decimalPosition = s.Length(); // decimal position at the end of integer.
1287
1288 // Parse exp = e [ minus / plus ] 1*DIGIT
1289 int exp = 0;
1290 if (Consume(s, 'e') || Consume(s, 'E')) {
1291 if (!useDouble) {
1292 d = static_cast<double>(use64bit ? i64 : i);
1293 useDouble = true;
1294 }
1295
1296 bool expMinus = false;
1297 if (Consume(s, '+'))
1298 ;
1299 else if (Consume(s, '-'))
1300 expMinus = true;
1301
1302 if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1303 exp = static_cast<int>(s.Take() - '0');
1304 if (expMinus) {
1305 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1306 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1307 if (exp >= 214748364) { // Issue #313: prevent overflow exponent
1308 while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1309 s.Take();
1310 }
1311 }
1312 }
1313 else { // positive exp
1314 int maxExp = 308 - expFrac;
1315 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1316 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1317 if (RAPIDJSON_UNLIKELY(exp > maxExp))
1318 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1319 }
1320 }
1321 }
1322 else
1323 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1324
1325 if (expMinus)
1326 exp = -exp;
1327 }
1328
1329 // Finish parsing, call event according to the type of number.
1330 bool cont = true;
1331
1332 if (parseFlags & kParseNumbersAsStringsFlag) {
1333 if (parseFlags & kParseInsituFlag) {
1334 s.Pop(); // Pop stack no matter if it will be used or not.
1335 typename InputStream::Ch* head = is.PutBegin();
1336 const size_t length = s.Tell() - startOffset;
1337 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1338 // unable to insert the \0 character here, it will erase the comma after this number
1339 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1340 cont = handler.RawNumber(str, SizeType(length), false);
1341 }
1342 else {
1343 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1344 StringStream srcStream(s.Pop());
1345 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
1346 while (numCharsToCopy--) {
1347 Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
1348 }
1349 dstStream.Put('\0');
1350 const typename TargetEncoding::Ch* str = dstStream.Pop();
1351 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1352 cont = handler.RawNumber(str, SizeType(length), true);
1353 }
1354 }
1355 else {
1356 size_t length = s.Length();
1357 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1358
1359 if (useDouble) {
1360 int p = exp + expFrac;
1361 if (parseFlags & kParseFullPrecisionFlag)
1362 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1363 else
1364 d = internal::StrtodNormalPrecision(d, p);
1365
1366 cont = handler.Double(minus ? -d : d);
1367 }
1368 else if (useNanOrInf) {
1369 cont = handler.Double(d);
1370 }
1371 else {
1372 if (use64bit) {
1373 if (minus)
1374 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1375 else
1376 cont = handler.Uint64(i64);
1377 }
1378 else {
1379 if (minus)
1380 cont = handler.Int(static_cast<int32_t>(~i + 1));
1381 else
1382 cont = handler.Uint(i);
1383 }
1384 }
1385 }
1386 if (RAPIDJSON_UNLIKELY(!cont))
1387 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
1388 }
1389
1390 // Parse any JSON value
1391 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)1392 void ParseValue(InputStream& is, Handler& handler) {
1393 switch (is.Peek()) {
1394 case 'n': ParseNull <parseFlags>(is, handler); break;
1395 case 't': ParseTrue <parseFlags>(is, handler); break;
1396 case 'f': ParseFalse <parseFlags>(is, handler); break;
1397 case '"': ParseString<parseFlags>(is, handler); break;
1398 case '{': ParseObject<parseFlags>(is, handler); break;
1399 case '[': ParseArray <parseFlags>(is, handler); break;
1400 default :
1401 ParseNumber<parseFlags>(is, handler);
1402 break;
1403
1404 }
1405 }
1406
1407 // Iterative Parsing
1408
1409 // States
1410 enum IterativeParsingState {
1411 IterativeParsingStartState = 0,
1412 IterativeParsingFinishState,
1413 IterativeParsingErrorState,
1414
1415 // Object states
1416 IterativeParsingObjectInitialState,
1417 IterativeParsingMemberKeyState,
1418 IterativeParsingKeyValueDelimiterState,
1419 IterativeParsingMemberValueState,
1420 IterativeParsingMemberDelimiterState,
1421 IterativeParsingObjectFinishState,
1422
1423 // Array states
1424 IterativeParsingArrayInitialState,
1425 IterativeParsingElementState,
1426 IterativeParsingElementDelimiterState,
1427 IterativeParsingArrayFinishState,
1428
1429 // Single value state
1430 IterativeParsingValueState
1431 };
1432
1433 enum { cIterativeParsingStateCount = IterativeParsingValueState + 1 };
1434
1435 // Tokens
1436 enum Token {
1437 LeftBracketToken = 0,
1438 RightBracketToken,
1439
1440 LeftCurlyBracketToken,
1441 RightCurlyBracketToken,
1442
1443 CommaToken,
1444 ColonToken,
1445
1446 StringToken,
1447 FalseToken,
1448 TrueToken,
1449 NullToken,
1450 NumberToken,
1451
1452 kTokenCount
1453 };
1454
Tokenize(Ch c)1455 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1456
1457 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1458 #define N NumberToken
1459 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1460 // Maps from ASCII to Token
1461 static const unsigned char tokenMap[256] = {
1462 N16, // 00~0F
1463 N16, // 10~1F
1464 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1465 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1466 N16, // 40~4F
1467 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1468 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1469 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1470 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1471 };
1472 #undef N
1473 #undef N16
1474 //!@endcond
1475
1476 if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1477 return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1478 else
1479 return NumberToken;
1480 }
1481
Predict(IterativeParsingState state,Token token)1482 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1483 // current state x one lookahead token -> new state
1484 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1485 // Start
1486 {
1487 IterativeParsingArrayInitialState, // Left bracket
1488 IterativeParsingErrorState, // Right bracket
1489 IterativeParsingObjectInitialState, // Left curly bracket
1490 IterativeParsingErrorState, // Right curly bracket
1491 IterativeParsingErrorState, // Comma
1492 IterativeParsingErrorState, // Colon
1493 IterativeParsingValueState, // String
1494 IterativeParsingValueState, // False
1495 IterativeParsingValueState, // True
1496 IterativeParsingValueState, // Null
1497 IterativeParsingValueState // Number
1498 },
1499 // Finish(sink state)
1500 {
1501 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1502 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1503 IterativeParsingErrorState
1504 },
1505 // Error(sink state)
1506 {
1507 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1508 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1509 IterativeParsingErrorState
1510 },
1511 // ObjectInitial
1512 {
1513 IterativeParsingErrorState, // Left bracket
1514 IterativeParsingErrorState, // Right bracket
1515 IterativeParsingErrorState, // Left curly bracket
1516 IterativeParsingObjectFinishState, // Right curly bracket
1517 IterativeParsingErrorState, // Comma
1518 IterativeParsingErrorState, // Colon
1519 IterativeParsingMemberKeyState, // String
1520 IterativeParsingErrorState, // False
1521 IterativeParsingErrorState, // True
1522 IterativeParsingErrorState, // Null
1523 IterativeParsingErrorState // Number
1524 },
1525 // MemberKey
1526 {
1527 IterativeParsingErrorState, // Left bracket
1528 IterativeParsingErrorState, // Right bracket
1529 IterativeParsingErrorState, // Left curly bracket
1530 IterativeParsingErrorState, // Right curly bracket
1531 IterativeParsingErrorState, // Comma
1532 IterativeParsingKeyValueDelimiterState, // Colon
1533 IterativeParsingErrorState, // String
1534 IterativeParsingErrorState, // False
1535 IterativeParsingErrorState, // True
1536 IterativeParsingErrorState, // Null
1537 IterativeParsingErrorState // Number
1538 },
1539 // KeyValueDelimiter
1540 {
1541 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1542 IterativeParsingErrorState, // Right bracket
1543 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1544 IterativeParsingErrorState, // Right curly bracket
1545 IterativeParsingErrorState, // Comma
1546 IterativeParsingErrorState, // Colon
1547 IterativeParsingMemberValueState, // String
1548 IterativeParsingMemberValueState, // False
1549 IterativeParsingMemberValueState, // True
1550 IterativeParsingMemberValueState, // Null
1551 IterativeParsingMemberValueState // Number
1552 },
1553 // MemberValue
1554 {
1555 IterativeParsingErrorState, // Left bracket
1556 IterativeParsingErrorState, // Right bracket
1557 IterativeParsingErrorState, // Left curly bracket
1558 IterativeParsingObjectFinishState, // Right curly bracket
1559 IterativeParsingMemberDelimiterState, // Comma
1560 IterativeParsingErrorState, // Colon
1561 IterativeParsingErrorState, // String
1562 IterativeParsingErrorState, // False
1563 IterativeParsingErrorState, // True
1564 IterativeParsingErrorState, // Null
1565 IterativeParsingErrorState // Number
1566 },
1567 // MemberDelimiter
1568 {
1569 IterativeParsingErrorState, // Left bracket
1570 IterativeParsingErrorState, // Right bracket
1571 IterativeParsingErrorState, // Left curly bracket
1572 IterativeParsingObjectFinishState, // Right curly bracket
1573 IterativeParsingErrorState, // Comma
1574 IterativeParsingErrorState, // Colon
1575 IterativeParsingMemberKeyState, // String
1576 IterativeParsingErrorState, // False
1577 IterativeParsingErrorState, // True
1578 IterativeParsingErrorState, // Null
1579 IterativeParsingErrorState // Number
1580 },
1581 // ObjectFinish(sink state)
1582 {
1583 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1584 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1585 IterativeParsingErrorState
1586 },
1587 // ArrayInitial
1588 {
1589 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1590 IterativeParsingArrayFinishState, // Right bracket
1591 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1592 IterativeParsingErrorState, // Right curly bracket
1593 IterativeParsingErrorState, // Comma
1594 IterativeParsingErrorState, // Colon
1595 IterativeParsingElementState, // String
1596 IterativeParsingElementState, // False
1597 IterativeParsingElementState, // True
1598 IterativeParsingElementState, // Null
1599 IterativeParsingElementState // Number
1600 },
1601 // Element
1602 {
1603 IterativeParsingErrorState, // Left bracket
1604 IterativeParsingArrayFinishState, // Right bracket
1605 IterativeParsingErrorState, // Left curly bracket
1606 IterativeParsingErrorState, // Right curly bracket
1607 IterativeParsingElementDelimiterState, // Comma
1608 IterativeParsingErrorState, // Colon
1609 IterativeParsingErrorState, // String
1610 IterativeParsingErrorState, // False
1611 IterativeParsingErrorState, // True
1612 IterativeParsingErrorState, // Null
1613 IterativeParsingErrorState // Number
1614 },
1615 // ElementDelimiter
1616 {
1617 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1618 IterativeParsingArrayFinishState, // Right bracket
1619 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1620 IterativeParsingErrorState, // Right curly bracket
1621 IterativeParsingErrorState, // Comma
1622 IterativeParsingErrorState, // Colon
1623 IterativeParsingElementState, // String
1624 IterativeParsingElementState, // False
1625 IterativeParsingElementState, // True
1626 IterativeParsingElementState, // Null
1627 IterativeParsingElementState // Number
1628 },
1629 // ArrayFinish(sink state)
1630 {
1631 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1632 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1633 IterativeParsingErrorState
1634 },
1635 // Single Value (sink state)
1636 {
1637 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1638 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1639 IterativeParsingErrorState
1640 }
1641 }; // End of G
1642
1643 return static_cast<IterativeParsingState>(G[state][token]);
1644 }
1645
1646 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1647 // May return a new state on state pop.
1648 template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1649 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1650 (void)token;
1651
1652 switch (dst) {
1653 case IterativeParsingErrorState:
1654 return dst;
1655
1656 case IterativeParsingObjectInitialState:
1657 case IterativeParsingArrayInitialState:
1658 {
1659 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1660 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1661 IterativeParsingState n = src;
1662 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1663 n = IterativeParsingElementState;
1664 else if (src == IterativeParsingKeyValueDelimiterState)
1665 n = IterativeParsingMemberValueState;
1666 // Push current state.
1667 *stack_.template Push<SizeType>(1) = n;
1668 // Initialize and push the member/element count.
1669 *stack_.template Push<SizeType>(1) = 0;
1670 // Call handler
1671 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1672 // On handler short circuits the parsing.
1673 if (!hr) {
1674 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1675 return IterativeParsingErrorState;
1676 }
1677 else {
1678 is.Take();
1679 return dst;
1680 }
1681 }
1682
1683 case IterativeParsingMemberKeyState:
1684 ParseString<parseFlags>(is, handler, true);
1685 if (HasParseError())
1686 return IterativeParsingErrorState;
1687 else
1688 return dst;
1689
1690 case IterativeParsingKeyValueDelimiterState:
1691 RAPIDJSON_ASSERT(token == ColonToken);
1692 is.Take();
1693 return dst;
1694
1695 case IterativeParsingMemberValueState:
1696 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1697 ParseValue<parseFlags>(is, handler);
1698 if (HasParseError()) {
1699 return IterativeParsingErrorState;
1700 }
1701 return dst;
1702
1703 case IterativeParsingElementState:
1704 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1705 ParseValue<parseFlags>(is, handler);
1706 if (HasParseError()) {
1707 return IterativeParsingErrorState;
1708 }
1709 return dst;
1710
1711 case IterativeParsingMemberDelimiterState:
1712 case IterativeParsingElementDelimiterState:
1713 is.Take();
1714 // Update member/element count.
1715 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1716 return dst;
1717
1718 case IterativeParsingObjectFinishState:
1719 {
1720 // Transit from delimiter is only allowed when trailing commas are enabled
1721 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
1722 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
1723 return IterativeParsingErrorState;
1724 }
1725 // Get member count.
1726 SizeType c = *stack_.template Pop<SizeType>(1);
1727 // If the object is not empty, count the last member.
1728 if (src == IterativeParsingMemberValueState)
1729 ++c;
1730 // Restore the state.
1731 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1732 // Transit to Finish state if this is the topmost scope.
1733 if (n == IterativeParsingStartState)
1734 n = IterativeParsingFinishState;
1735 // Call handler
1736 bool hr = handler.EndObject(c);
1737 // On handler short circuits the parsing.
1738 if (!hr) {
1739 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1740 return IterativeParsingErrorState;
1741 }
1742 else {
1743 is.Take();
1744 return n;
1745 }
1746 }
1747
1748 case IterativeParsingArrayFinishState:
1749 {
1750 // Transit from delimiter is only allowed when trailing commas are enabled
1751 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
1752 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
1753 return IterativeParsingErrorState;
1754 }
1755 // Get element count.
1756 SizeType c = *stack_.template Pop<SizeType>(1);
1757 // If the array is not empty, count the last element.
1758 if (src == IterativeParsingElementState)
1759 ++c;
1760 // Restore the state.
1761 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1762 // Transit to Finish state if this is the topmost scope.
1763 if (n == IterativeParsingStartState)
1764 n = IterativeParsingFinishState;
1765 // Call handler
1766 bool hr = handler.EndArray(c);
1767 // On handler short circuits the parsing.
1768 if (!hr) {
1769 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1770 return IterativeParsingErrorState;
1771 }
1772 else {
1773 is.Take();
1774 return n;
1775 }
1776 }
1777
1778 default:
1779 // This branch is for IterativeParsingValueState actually.
1780 // Use `default:` rather than
1781 // `case IterativeParsingValueState:` is for code coverage.
1782
1783 // The IterativeParsingStartState is not enumerated in this switch-case.
1784 // It is impossible for that case. And it can be caught by following assertion.
1785
1786 // The IterativeParsingFinishState is not enumerated in this switch-case either.
1787 // It is a "derivative" state which cannot triggered from Predict() directly.
1788 // Therefore it cannot happen here. And it can be caught by following assertion.
1789 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1790
1791 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1792 ParseValue<parseFlags>(is, handler);
1793 if (HasParseError()) {
1794 return IterativeParsingErrorState;
1795 }
1796 return IterativeParsingFinishState;
1797 }
1798 }
1799
1800 template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)1801 void HandleError(IterativeParsingState src, InputStream& is) {
1802 if (HasParseError()) {
1803 // Error flag has been set.
1804 return;
1805 }
1806
1807 switch (src) {
1808 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1809 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1810 case IterativeParsingObjectInitialState:
1811 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1812 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1813 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1814 case IterativeParsingKeyValueDelimiterState:
1815 case IterativeParsingArrayInitialState:
1816 case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
1817 default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1818 }
1819 }
1820
1821 template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)1822 ParseResult IterativeParse(InputStream& is, Handler& handler) {
1823 parseResult_.Clear();
1824 ClearStackOnExit scope(*this);
1825 IterativeParsingState state = IterativeParsingStartState;
1826
1827 SkipWhitespaceAndComments<parseFlags>(is);
1828 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1829 while (is.Peek() != '\0') {
1830 Token t = Tokenize(is.Peek());
1831 IterativeParsingState n = Predict(state, t);
1832 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1833
1834 if (d == IterativeParsingErrorState) {
1835 HandleError(state, is);
1836 break;
1837 }
1838
1839 state = d;
1840
1841 // Do not further consume streams if a root JSON has been parsed.
1842 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1843 break;
1844
1845 SkipWhitespaceAndComments<parseFlags>(is);
1846 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1847 }
1848
1849 // Handle the end of file.
1850 if (state != IterativeParsingFinishState)
1851 HandleError(state, is);
1852
1853 return parseResult_;
1854 }
1855
1856 static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
1857 internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
1858 ParseResult parseResult_;
1859 }; // class GenericReader
1860
1861 //! Reader with UTF8 encoding and default allocator.
1862 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1863
1864 RAPIDJSON_NAMESPACE_END
1865
1866 #ifdef __clang__
1867 RAPIDJSON_DIAG_POP
1868 #endif
1869
1870
1871 #ifdef __GNUC__
1872 RAPIDJSON_DIAG_POP
1873 #endif
1874
1875 #ifdef _MSC_VER
1876 RAPIDJSON_DIAG_POP
1877 #endif
1878
1879 #endif // RAPIDJSON_READER_H_
1880