1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17
18 /*! \file reader.h */
19
20 #include "allocators.h"
21 #include "stream.h"
22 #include "encodedstream.h"
23 #include "internal/meta.h"
24 #include "internal/stack.h"
25 #include "internal/strtod.h"
26 #include <limits>
27
28 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
29 #include <intrin.h>
30 #pragma intrinsic(_BitScanForward)
31 #endif
32 #ifdef RAPIDJSON_SSE42
33 #include <nmmintrin.h>
34 #elif defined(RAPIDJSON_SSE2)
35 #include <emmintrin.h>
36 #elif defined(RAPIDJSON_NEON)
37 #include <arm_neon.h>
38 #endif
39
40 #ifdef __clang__
41 RAPIDJSON_DIAG_PUSH
42 RAPIDJSON_DIAG_OFF(old-style-cast)
43 RAPIDJSON_DIAG_OFF(padded)
44 RAPIDJSON_DIAG_OFF(switch-enum)
45 RAPIDJSON_DIAG_OFF(conversion)
46 #elif defined(_MSC_VER)
47 RAPIDJSON_DIAG_PUSH
48 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
49 RAPIDJSON_DIAG_OFF(4702) // unreachable code
50 #endif
51
52 #ifdef __GNUC__
53 RAPIDJSON_DIAG_PUSH
54 RAPIDJSON_DIAG_OFF(effc++)
55 #endif
56
57 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
58 #define RAPIDJSON_NOTHING /* deliberately empty */
59 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
60 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
61 RAPIDJSON_MULTILINEMACRO_BEGIN \
62 if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
63 RAPIDJSON_MULTILINEMACRO_END
64 #endif
65 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
66 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
67 //!@endcond
68
69 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
70 \ingroup RAPIDJSON_ERRORS
71 \brief Macro to indicate a parse error.
72 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
73 \param offset position of the error in JSON input (\c size_t)
74
75 This macros can be used as a customization point for the internal
76 error handling mechanism of RapidJSON.
77
78 A common usage model is to throw an exception instead of requiring the
79 caller to explicitly check the \ref rapidjson::GenericReader::Parse's
80 return value:
81
82 \code
83 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
84 throw ParseException(parseErrorCode, #parseErrorCode, offset)
85
86 #include <stdexcept> // std::runtime_error
87 #include "rapidjson/error/error.h" // rapidjson::ParseResult
88
89 struct ParseException : std::runtime_error, rapidjson::ParseResult {
90 ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
91 : std::runtime_error(msg), ParseResult(code, offset) {}
92 };
93
94 #include "rapidjson/reader.h"
95 \endcode
96
97 \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
98 */
99 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
100 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
101 RAPIDJSON_MULTILINEMACRO_BEGIN \
102 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
103 SetParseError(parseErrorCode, offset); \
104 RAPIDJSON_MULTILINEMACRO_END
105 #endif
106
107 /*! \def RAPIDJSON_PARSE_ERROR
108 \ingroup RAPIDJSON_ERRORS
109 \brief (Internal) macro to indicate and handle a parse error.
110 \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
111 \param offset position of the error in JSON input (\c size_t)
112
113 Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
114
115 \see RAPIDJSON_PARSE_ERROR_NORETURN
116 \hideinitializer
117 */
118 #ifndef RAPIDJSON_PARSE_ERROR
119 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
120 RAPIDJSON_MULTILINEMACRO_BEGIN \
121 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
122 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
123 RAPIDJSON_MULTILINEMACRO_END
124 #endif
125
126 #include "error/error.h" // ParseErrorCode, ParseResult
127
128 RAPIDJSON_NAMESPACE_BEGIN
129
130 ///////////////////////////////////////////////////////////////////////////////
131 // ParseFlag
132
133 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
134 \ingroup RAPIDJSON_CONFIG
135 \brief User-defined kParseDefaultFlags definition.
136
137 User can define this as any \c ParseFlag combinations.
138 */
139 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
140 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
141 #endif
142
143 //! Combination of parseFlags
144 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
145 */
146 enum ParseFlag {
147 kParseNoFlags = 0, //!< No flags are set.
148 kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
149 kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
150 kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
151 kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
152 kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
153 kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
154 kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
155 kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
156 kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
157 kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
158 };
159
160 ///////////////////////////////////////////////////////////////////////////////
161 // Handler
162
163 /*! \class rapidjson::Handler
164 \brief Concept for receiving events from GenericReader upon parsing.
165 The functions return true if no error occurs. If they return false,
166 the event publisher should terminate the process.
167 \code
168 concept Handler {
169 typename Ch;
170
171 bool Null();
172 bool Bool(bool b);
173 bool Int(int i);
174 bool Uint(unsigned i);
175 bool Int64(int64_t i);
176 bool Uint64(uint64_t i);
177 bool Double(double d);
178 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
179 bool RawNumber(const Ch* str, SizeType length, bool copy);
180 bool String(const Ch* str, SizeType length, bool copy);
181 bool StartObject();
182 bool Key(const Ch* str, SizeType length, bool copy);
183 bool EndObject(SizeType memberCount);
184 bool StartArray();
185 bool EndArray(SizeType elementCount);
186 };
187 \endcode
188 */
189 ///////////////////////////////////////////////////////////////////////////////
190 // BaseReaderHandler
191
192 //! Default implementation of Handler.
193 /*! This can be used as base class of any reader handler.
194 \note implements Handler concept
195 */
196 template<typename Encoding = UTF8<>, typename Derived = void>
197 struct BaseReaderHandler {
198 typedef typename Encoding::Ch Ch;
199
200 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
201
DefaultBaseReaderHandler202 bool Default() { return true; }
NullBaseReaderHandler203 bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler204 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler205 bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler206 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler207 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler208 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler209 bool Double(double) { return static_cast<Override&>(*this).Default(); }
210 /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
RawNumberBaseReaderHandler211 bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
StringBaseReaderHandler212 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler213 bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler214 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler215 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler216 bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler217 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
218 };
219
220 ///////////////////////////////////////////////////////////////////////////////
221 // StreamLocalCopy
222
223 namespace internal {
224
225 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
226 class StreamLocalCopy;
227
228 //! Do copy optimization.
229 template<typename Stream>
230 class StreamLocalCopy<Stream, 1> {
231 public:
StreamLocalCopy(Stream & original)232 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()233 ~StreamLocalCopy() { original_ = s; }
234
235 Stream s;
236
237 private:
238 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
239
240 Stream& original_;
241 };
242
243 //! Keep reference.
244 template<typename Stream>
245 class StreamLocalCopy<Stream, 0> {
246 public:
StreamLocalCopy(Stream & original)247 StreamLocalCopy(Stream& original) : s(original) {}
248
249 Stream& s;
250
251 private:
252 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
253 };
254
255 } // namespace internal
256
257 ///////////////////////////////////////////////////////////////////////////////
258 // SkipWhitespace
259
260 //! Skip the JSON white spaces in a stream.
261 /*! \param is A input stream for skipping white spaces.
262 \note This function has SSE2/SSE4.2 specialization.
263 */
264 template<typename InputStream>
SkipWhitespace(InputStream & is)265 void SkipWhitespace(InputStream& is) {
266 internal::StreamLocalCopy<InputStream> copy(is);
267 InputStream& s(copy.s);
268
269 typename InputStream::Ch c;
270 while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
271 s.Take();
272 }
273
SkipWhitespace(const char * p,const char * end)274 inline const char* SkipWhitespace(const char* p, const char* end) {
275 while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
276 ++p;
277 return p;
278 }
279
280 #ifdef RAPIDJSON_SSE42
281 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)282 inline const char *SkipWhitespace_SIMD(const char* p) {
283 // Fast return for single non-whitespace
284 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
285 ++p;
286 else
287 return p;
288
289 // 16-byte align to the next boundary
290 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
291 while (p != nextAligned)
292 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
293 ++p;
294 else
295 return p;
296
297 // The rest of string using SIMD
298 static const char whitespace[16] = " \n\r\t";
299 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
300
301 for (;; p += 16) {
302 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
303 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
304 if (r != 16) // some of characters is non-whitespace
305 return p + r;
306 }
307 }
308
SkipWhitespace_SIMD(const char * p,const char * end)309 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
310 // Fast return for single non-whitespace
311 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
312 ++p;
313 else
314 return p;
315
316 // The middle of string using SIMD
317 static const char whitespace[16] = " \n\r\t";
318 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
319
320 for (; p <= end - 16; p += 16) {
321 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
322 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
323 if (r != 16) // some of characters is non-whitespace
324 return p + r;
325 }
326
327 return SkipWhitespace(p, end);
328 }
329
330 #elif defined(RAPIDJSON_SSE2)
331
332 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)333 inline const char *SkipWhitespace_SIMD(const char* p) {
334 // Fast return for single non-whitespace
335 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
336 ++p;
337 else
338 return p;
339
340 // 16-byte align to the next boundary
341 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
342 while (p != nextAligned)
343 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
344 ++p;
345 else
346 return p;
347
348 // The rest of string
349 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
350 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
351 #undef C16
352
353 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
354 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
355 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
356 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
357
358 for (;; p += 16) {
359 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
360 __m128i x = _mm_cmpeq_epi8(s, w0);
361 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
362 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
363 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
364 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
365 if (r != 0) { // some of characters may be non-whitespace
366 #ifdef _MSC_VER // Find the index of first non-whitespace
367 unsigned long offset;
368 _BitScanForward(&offset, r);
369 return p + offset;
370 #else
371 return p + __builtin_ffs(r) - 1;
372 #endif
373 }
374 }
375 }
376
SkipWhitespace_SIMD(const char * p,const char * end)377 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
378 // Fast return for single non-whitespace
379 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
380 ++p;
381 else
382 return p;
383
384 // The rest of string
385 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
386 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
387 #undef C16
388
389 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
390 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
391 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
392 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
393
394 for (; p <= end - 16; p += 16) {
395 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
396 __m128i x = _mm_cmpeq_epi8(s, w0);
397 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
398 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
399 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
400 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
401 if (r != 0) { // some of characters may be non-whitespace
402 #ifdef _MSC_VER // Find the index of first non-whitespace
403 unsigned long offset;
404 _BitScanForward(&offset, r);
405 return p + offset;
406 #else
407 return p + __builtin_ffs(r) - 1;
408 #endif
409 }
410 }
411
412 return SkipWhitespace(p, end);
413 }
414
415 #elif defined(RAPIDJSON_NEON)
416
417 //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)418 inline const char *SkipWhitespace_SIMD(const char* p) {
419 // Fast return for single non-whitespace
420 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
421 ++p;
422 else
423 return p;
424
425 // 16-byte align to the next boundary
426 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
427 while (p != nextAligned)
428 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
429 ++p;
430 else
431 return p;
432
433 const uint8x16_t w0 = vmovq_n_u8(' ');
434 const uint8x16_t w1 = vmovq_n_u8('\n');
435 const uint8x16_t w2 = vmovq_n_u8('\r');
436 const uint8x16_t w3 = vmovq_n_u8('\t');
437
438 for (;; p += 16) {
439 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
440 uint8x16_t x = vceqq_u8(s, w0);
441 x = vorrq_u8(x, vceqq_u8(s, w1));
442 x = vorrq_u8(x, vceqq_u8(s, w2));
443 x = vorrq_u8(x, vceqq_u8(s, w3));
444
445 x = vmvnq_u8(x); // Negate
446 x = vrev64q_u8(x); // Rev in 64
447 uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
448 uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
449
450 if (low == 0) {
451 if (high != 0) {
452 int lz =__builtin_clzll(high);;
453 return p + 8 + (lz >> 3);
454 }
455 } else {
456 int lz = __builtin_clzll(low);;
457 return p + (lz >> 3);
458 }
459 }
460 }
461
SkipWhitespace_SIMD(const char * p,const char * end)462 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
463 // Fast return for single non-whitespace
464 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
465 ++p;
466 else
467 return p;
468
469 const uint8x16_t w0 = vmovq_n_u8(' ');
470 const uint8x16_t w1 = vmovq_n_u8('\n');
471 const uint8x16_t w2 = vmovq_n_u8('\r');
472 const uint8x16_t w3 = vmovq_n_u8('\t');
473
474 for (; p <= end - 16; p += 16) {
475 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
476 uint8x16_t x = vceqq_u8(s, w0);
477 x = vorrq_u8(x, vceqq_u8(s, w1));
478 x = vorrq_u8(x, vceqq_u8(s, w2));
479 x = vorrq_u8(x, vceqq_u8(s, w3));
480
481 x = vmvnq_u8(x); // Negate
482 x = vrev64q_u8(x); // Rev in 64
483 uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
484 uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
485
486 if (low == 0) {
487 if (high != 0) {
488 int lz = __builtin_clzll(high);
489 return p + 8 + (lz >> 3);
490 }
491 } else {
492 int lz = __builtin_clzll(low);
493 return p + (lz >> 3);
494 }
495 }
496
497 return SkipWhitespace(p, end);
498 }
499
500 #endif // RAPIDJSON_NEON
501
502 #ifdef RAPIDJSON_SIMD
503 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)504 template<> inline void SkipWhitespace(InsituStringStream& is) {
505 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
506 }
507
508 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)509 template<> inline void SkipWhitespace(StringStream& is) {
510 is.src_ = SkipWhitespace_SIMD(is.src_);
511 }
512
SkipWhitespace(EncodedInputStream<UTF8<>,MemoryStream> & is)513 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
514 is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
515 }
516 #endif // RAPIDJSON_SIMD
517
518 ///////////////////////////////////////////////////////////////////////////////
519 // GenericReader
520
521 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
522 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
523 object implementing Handler concept.
524
525 It needs to allocate a stack for storing a single decoded string during
526 non-destructive parsing.
527
528 For in-situ parsing, the decoded string is directly written to the source
529 text string, no temporary buffer is required.
530
531 A GenericReader object can be reused for parsing multiple JSON text.
532
533 \tparam SourceEncoding Encoding of the input stream.
534 \tparam TargetEncoding Encoding of the parse output.
535 \tparam StackAllocator Allocator type for stack.
536 */
537 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
538 class GenericReader {
539 public:
540 typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
541
542 //! Constructor.
543 /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
544 \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
545 */
546 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
stack_(stackAllocator,stackCapacity)547 stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
548
549 //! Parse JSON text.
550 /*! \tparam parseFlags Combination of \ref ParseFlag.
551 \tparam InputStream Type of input stream, implementing Stream concept.
552 \tparam Handler Type of handler, implementing Handler concept.
553 \param is Input stream to be parsed.
554 \param handler The handler to receive events.
555 \return Whether the parsing is successful.
556 */
557 template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)558 ParseResult Parse(InputStream& is, Handler& handler) {
559 if (parseFlags & kParseIterativeFlag)
560 return IterativeParse<parseFlags>(is, handler);
561
562 parseResult_.Clear();
563
564 ClearStackOnExit scope(*this);
565
566 SkipWhitespaceAndComments<parseFlags>(is);
567 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
568
569 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
570 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
571 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
572 }
573 else {
574 ParseValue<parseFlags>(is, handler);
575 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
576
577 if (!(parseFlags & kParseStopWhenDoneFlag)) {
578 SkipWhitespaceAndComments<parseFlags>(is);
579 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
580
581 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
582 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
583 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
584 }
585 }
586 }
587
588 return parseResult_;
589 }
590
591 //! Parse JSON text (with \ref kParseDefaultFlags)
592 /*! \tparam InputStream Type of input stream, implementing Stream concept
593 \tparam Handler Type of handler, implementing Handler concept.
594 \param is Input stream to be parsed.
595 \param handler The handler to receive events.
596 \return Whether the parsing is successful.
597 */
598 template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)599 ParseResult Parse(InputStream& is, Handler& handler) {
600 return Parse<kParseDefaultFlags>(is, handler);
601 }
602
603 //! Initialize JSON text token-by-token parsing
604 /*!
605 */
IterativeParseInit()606 void IterativeParseInit() {
607 parseResult_.Clear();
608 state_ = IterativeParsingStartState;
609 }
610
611 //! Parse one token from JSON text
612 /*! \tparam InputStream Type of input stream, implementing Stream concept
613 \tparam Handler Type of handler, implementing Handler concept.
614 \param is Input stream to be parsed.
615 \param handler The handler to receive events.
616 \return Whether the parsing is successful.
617 */
618 template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParseNext(InputStream & is,Handler & handler)619 bool IterativeParseNext(InputStream& is, Handler& handler) {
620 while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
621 SkipWhitespaceAndComments<parseFlags>(is);
622
623 Token t = Tokenize(is.Peek());
624 IterativeParsingState n = Predict(state_, t);
625 IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
626
627 // If we've finished or hit an error...
628 if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
629 // Report errors.
630 if (d == IterativeParsingErrorState) {
631 HandleError(state_, is);
632 return false;
633 }
634
635 // Transition to the finish state.
636 RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
637 state_ = d;
638
639 // If StopWhenDone is not set...
640 if (!(parseFlags & kParseStopWhenDoneFlag)) {
641 // ... and extra non-whitespace data is found...
642 SkipWhitespaceAndComments<parseFlags>(is);
643 if (is.Peek() != '\0') {
644 // ... this is considered an error.
645 HandleError(state_, is);
646 return false;
647 }
648 }
649
650 // Success! We are done!
651 return true;
652 }
653
654 // Transition to the new state.
655 state_ = d;
656
657 // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
658 if (!IsIterativeParsingDelimiterState(n))
659 return true;
660 }
661
662 // We reached the end of file.
663 stack_.Clear();
664
665 if (state_ != IterativeParsingFinishState) {
666 HandleError(state_, is);
667 return false;
668 }
669
670 return true;
671 }
672
673 //! Check if token-by-token parsing JSON text is complete
674 /*! \return Whether the JSON has been fully decoded.
675 */
IterativeParseComplete()676 RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
677 return IsIterativeParsingCompleteState(state_);
678 }
679
680 //! Whether a parse error has occurred in the last parsing.
HasParseError()681 bool HasParseError() const { return parseResult_.IsError(); }
682
683 //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()684 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
685
686 //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()687 size_t GetErrorOffset() const { return parseResult_.Offset(); }
688
689 protected:
SetParseError(ParseErrorCode code,size_t offset)690 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
691
692 private:
693 // Prohibit copy constructor & assignment operator.
694 GenericReader(const GenericReader&);
695 GenericReader& operator=(const GenericReader&);
696
ClearStack()697 void ClearStack() { stack_.Clear(); }
698
699 // clear stack on any exit from ParseStream, e.g. due to exception
700 struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit701 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit702 ~ClearStackOnExit() { r_.ClearStack(); }
703 private:
704 GenericReader& r_;
705 ClearStackOnExit(const ClearStackOnExit&);
706 ClearStackOnExit& operator=(const ClearStackOnExit&);
707 };
708
709 template<unsigned parseFlags, typename InputStream>
SkipWhitespaceAndComments(InputStream & is)710 void SkipWhitespaceAndComments(InputStream& is) {
711 SkipWhitespace(is);
712
713 if (parseFlags & kParseCommentsFlag) {
714 while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
715 if (Consume(is, '*')) {
716 while (true) {
717 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
718 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
719 else if (Consume(is, '*')) {
720 if (Consume(is, '/'))
721 break;
722 }
723 else
724 is.Take();
725 }
726 }
727 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
728 while (is.Peek() != '\0' && is.Take() != '\n') {}
729 else
730 RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
731
732 SkipWhitespace(is);
733 }
734 }
735 }
736
737 // Parse object: { string : value, ... }
738 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)739 void ParseObject(InputStream& is, Handler& handler) {
740 RAPIDJSON_ASSERT(is.Peek() == '{');
741 is.Take(); // Skip '{'
742
743 if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
744 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
745
746 SkipWhitespaceAndComments<parseFlags>(is);
747 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
748
749 if (Consume(is, '}')) {
750 if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
751 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
752 return;
753 }
754
755 for (SizeType memberCount = 0;;) {
756 if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
757 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
758
759 ParseString<parseFlags>(is, handler, true);
760 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
761
762 SkipWhitespaceAndComments<parseFlags>(is);
763 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
764
765 if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
766 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
767
768 SkipWhitespaceAndComments<parseFlags>(is);
769 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
770
771 ParseValue<parseFlags>(is, handler);
772 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
773
774 SkipWhitespaceAndComments<parseFlags>(is);
775 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
776
777 ++memberCount;
778
779 switch (is.Peek()) {
780 case ',':
781 is.Take();
782 SkipWhitespaceAndComments<parseFlags>(is);
783 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
784 break;
785 case '}':
786 is.Take();
787 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
788 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
789 return;
790 default:
791 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
792 }
793
794 if (parseFlags & kParseTrailingCommasFlag) {
795 if (is.Peek() == '}') {
796 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
797 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
798 is.Take();
799 return;
800 }
801 }
802 }
803 }
804
805 // Parse array: [ value, ... ]
806 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)807 void ParseArray(InputStream& is, Handler& handler) {
808 RAPIDJSON_ASSERT(is.Peek() == '[');
809 is.Take(); // Skip '['
810
811 if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
812 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
813
814 SkipWhitespaceAndComments<parseFlags>(is);
815 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
816
817 if (Consume(is, ']')) {
818 if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
819 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
820 return;
821 }
822
823 for (SizeType elementCount = 0;;) {
824 ParseValue<parseFlags>(is, handler);
825 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
826
827 ++elementCount;
828 SkipWhitespaceAndComments<parseFlags>(is);
829 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
830
831 if (Consume(is, ',')) {
832 SkipWhitespaceAndComments<parseFlags>(is);
833 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
834 }
835 else if (Consume(is, ']')) {
836 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
837 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
838 return;
839 }
840 else
841 RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
842
843 if (parseFlags & kParseTrailingCommasFlag) {
844 if (is.Peek() == ']') {
845 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
846 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
847 is.Take();
848 return;
849 }
850 }
851 }
852 }
853
854 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)855 void ParseNull(InputStream& is, Handler& handler) {
856 RAPIDJSON_ASSERT(is.Peek() == 'n');
857 is.Take();
858
859 if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
860 if (RAPIDJSON_UNLIKELY(!handler.Null()))
861 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
862 }
863 else
864 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
865 }
866
867 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)868 void ParseTrue(InputStream& is, Handler& handler) {
869 RAPIDJSON_ASSERT(is.Peek() == 't');
870 is.Take();
871
872 if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
873 if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
874 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
875 }
876 else
877 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
878 }
879
880 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)881 void ParseFalse(InputStream& is, Handler& handler) {
882 RAPIDJSON_ASSERT(is.Peek() == 'f');
883 is.Take();
884
885 if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
886 if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
887 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
888 }
889 else
890 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
891 }
892
893 template<typename InputStream>
Consume(InputStream & is,typename InputStream::Ch expect)894 RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
895 if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
896 is.Take();
897 return true;
898 }
899 else
900 return false;
901 }
902
903 // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
904 template<typename InputStream>
ParseHex4(InputStream & is,size_t escapeOffset)905 unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
906 unsigned codepoint = 0;
907 for (int i = 0; i < 4; i++) {
908 Ch c = is.Peek();
909 codepoint <<= 4;
910 codepoint += static_cast<unsigned>(c);
911 if (c >= '0' && c <= '9')
912 codepoint -= '0';
913 else if (c >= 'A' && c <= 'F')
914 codepoint -= 'A' - 10;
915 else if (c >= 'a' && c <= 'f')
916 codepoint -= 'a' - 10;
917 else {
918 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
919 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
920 }
921 is.Take();
922 }
923 return codepoint;
924 }
925
926 template <typename CharType>
927 class StackStream {
928 public:
929 typedef CharType Ch;
930
StackStream(internal::Stack<StackAllocator> & stack)931 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)932 RAPIDJSON_FORCEINLINE void Put(Ch c) {
933 *stack_.template Push<Ch>() = c;
934 ++length_;
935 }
936
Push(SizeType count)937 RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
938 length_ += count;
939 return stack_.template Push<Ch>(count);
940 }
941
Length()942 size_t Length() const { return length_; }
943
Pop()944 Ch* Pop() {
945 return stack_.template Pop<Ch>(length_);
946 }
947
948 private:
949 StackStream(const StackStream&);
950 StackStream& operator=(const StackStream&);
951
952 internal::Stack<StackAllocator>& stack_;
953 SizeType length_;
954 };
955
956 // Parse string and generate String event. Different code paths for kParseInsituFlag.
957 template<unsigned parseFlags, typename InputStream, typename Handler>
958 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
959 internal::StreamLocalCopy<InputStream> copy(is);
960 InputStream& s(copy.s);
961
962 RAPIDJSON_ASSERT(s.Peek() == '\"');
963 s.Take(); // Skip '\"'
964
965 bool success = false;
966 if (parseFlags & kParseInsituFlag) {
967 typename InputStream::Ch *head = s.PutBegin();
968 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
969 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
970 size_t length = s.PutEnd(head) - 1;
971 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
972 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
973 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
974 }
975 else {
976 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
977 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
978 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
979 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
980 const typename TargetEncoding::Ch* const str = stackStream.Pop();
981 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
982 }
983 if (RAPIDJSON_UNLIKELY(!success))
984 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
985 }
986
987 // Parse string to an output is
988 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
989 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)990 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
991 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
992 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
993 static const char escape[256] = {
994 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
995 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
996 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
997 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
998 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
999 };
1000 #undef Z16
1001 //!@endcond
1002
1003 for (;;) {
1004 // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
1005 if (!(parseFlags & kParseValidateEncodingFlag))
1006 ScanCopyUnescapedString(is, os);
1007
1008 Ch c = is.Peek();
1009 if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
1010 size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset
1011 is.Take();
1012 Ch e = is.Peek();
1013 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
1014 is.Take();
1015 os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
1016 }
1017 else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
1018 is.Take();
1019 unsigned codepoint = ParseHex4(is, escapeOffset);
1020 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1021 if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
1022 // Handle UTF-16 surrogate pair
1023 if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1024 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1025 unsigned codepoint2 = ParseHex4(is, escapeOffset);
1026 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1027 if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1028 RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1029 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1030 }
1031 TEncoding::Encode(os, codepoint);
1032 }
1033 else
1034 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
1035 }
1036 else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
1037 is.Take();
1038 os.Put('\0'); // null-terminate the string
1039 return;
1040 }
1041 else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
1042 if (c == '\0')
1043 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
1044 else
1045 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
1046 }
1047 else {
1048 size_t offset = is.Tell();
1049 if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
1050 !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
1051 !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
1052 RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
1053 }
1054 }
1055 }
1056
1057 template<typename InputStream, typename OutputStream>
ScanCopyUnescapedString(InputStream &,OutputStream &)1058 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
1059 // Do nothing for generic version
1060 }
1061
1062 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
1063 // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)1064 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1065 const char* p = is.src_;
1066
1067 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1068 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1069 while (p != nextAligned)
1070 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1071 is.src_ = p;
1072 return;
1073 }
1074 else
1075 os.Put(*p++);
1076
1077 // The rest of string using SIMD
1078 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1079 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1080 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1081 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1082 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1083 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1084
1085 for (;; p += 16) {
1086 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1087 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1088 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1089 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1090 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1091 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1092 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1093 SizeType length;
1094 #ifdef _MSC_VER // Find the index of first escaped
1095 unsigned long offset;
1096 _BitScanForward(&offset, r);
1097 length = offset;
1098 #else
1099 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
1100 #endif
1101 if (length != 0) {
1102 char* q = reinterpret_cast<char*>(os.Push(length));
1103 for (size_t i = 0; i < length; i++)
1104 q[i] = p[i];
1105
1106 p += length;
1107 }
1108 break;
1109 }
1110 _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
1111 }
1112
1113 is.src_ = p;
1114 }
1115
1116 // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)1117 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1118 RAPIDJSON_ASSERT(&is == &os);
1119 (void)os;
1120
1121 if (is.src_ == is.dst_) {
1122 SkipUnescapedString(is);
1123 return;
1124 }
1125
1126 char* p = is.src_;
1127 char *q = is.dst_;
1128
1129 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1130 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1131 while (p != nextAligned)
1132 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1133 is.src_ = p;
1134 is.dst_ = q;
1135 return;
1136 }
1137 else
1138 *q++ = *p++;
1139
1140 // The rest of string using SIMD
1141 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1142 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1143 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1144 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1145 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1146 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1147
1148 for (;; p += 16, q += 16) {
1149 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1150 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1151 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1152 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1153 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1154 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1155 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1156 size_t length;
1157 #ifdef _MSC_VER // Find the index of first escaped
1158 unsigned long offset;
1159 _BitScanForward(&offset, r);
1160 length = offset;
1161 #else
1162 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1163 #endif
1164 for (const char* pend = p + length; p != pend; )
1165 *q++ = *p++;
1166 break;
1167 }
1168 _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1169 }
1170
1171 is.src_ = p;
1172 is.dst_ = q;
1173 }
1174
1175 // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1176 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1177 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1178 char* p = is.src_;
1179
1180 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1181 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1182 for (; p != nextAligned; p++)
1183 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1184 is.src_ = is.dst_ = p;
1185 return;
1186 }
1187
1188 // The rest of string using SIMD
1189 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1190 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1191 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1192 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1193 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1194 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1195
1196 for (;; p += 16) {
1197 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1198 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1199 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1200 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1201 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1202 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1203 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1204 size_t length;
1205 #ifdef _MSC_VER // Find the index of first escaped
1206 unsigned long offset;
1207 _BitScanForward(&offset, r);
1208 length = offset;
1209 #else
1210 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1211 #endif
1212 p += length;
1213 break;
1214 }
1215 }
1216
1217 is.src_ = is.dst_ = p;
1218 }
1219 #elif defined(RAPIDJSON_NEON)
1220 // StringStream -> StackStream<char>
ScanCopyUnescapedString(StringStream & is,StackStream<char> & os)1221 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1222 const char* p = is.src_;
1223
1224 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1225 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1226 while (p != nextAligned)
1227 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1228 is.src_ = p;
1229 return;
1230 }
1231 else
1232 os.Put(*p++);
1233
1234 // The rest of string using SIMD
1235 const uint8x16_t s0 = vmovq_n_u8('"');
1236 const uint8x16_t s1 = vmovq_n_u8('\\');
1237 const uint8x16_t s2 = vmovq_n_u8('\b');
1238 const uint8x16_t s3 = vmovq_n_u8(32);
1239
1240 for (;; p += 16) {
1241 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
1242 uint8x16_t x = vceqq_u8(s, s0);
1243 x = vorrq_u8(x, vceqq_u8(s, s1));
1244 x = vorrq_u8(x, vceqq_u8(s, s2));
1245 x = vorrq_u8(x, vcltq_u8(s, s3));
1246
1247 x = vrev64q_u8(x); // Rev in 64
1248 uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
1249 uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
1250
1251 SizeType length = 0;
1252 bool escaped = false;
1253 if (low == 0) {
1254 if (high != 0) {
1255 unsigned lz = (unsigned)__builtin_clzll(high);;
1256 length = 8 + (lz >> 3);
1257 escaped = true;
1258 }
1259 } else {
1260 unsigned lz = (unsigned)__builtin_clzll(low);;
1261 length = lz >> 3;
1262 escaped = true;
1263 }
1264 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1265 if (length != 0) {
1266 char* q = reinterpret_cast<char*>(os.Push(length));
1267 for (size_t i = 0; i < length; i++)
1268 q[i] = p[i];
1269
1270 p += length;
1271 }
1272 break;
1273 }
1274 vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
1275 }
1276
1277 is.src_ = p;
1278 }
1279
1280 // InsituStringStream -> InsituStringStream
ScanCopyUnescapedString(InsituStringStream & is,InsituStringStream & os)1281 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1282 RAPIDJSON_ASSERT(&is == &os);
1283 (void)os;
1284
1285 if (is.src_ == is.dst_) {
1286 SkipUnescapedString(is);
1287 return;
1288 }
1289
1290 char* p = is.src_;
1291 char *q = is.dst_;
1292
1293 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1294 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1295 while (p != nextAligned)
1296 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1297 is.src_ = p;
1298 is.dst_ = q;
1299 return;
1300 }
1301 else
1302 *q++ = *p++;
1303
1304 // The rest of string using SIMD
1305 const uint8x16_t s0 = vmovq_n_u8('"');
1306 const uint8x16_t s1 = vmovq_n_u8('\\');
1307 const uint8x16_t s2 = vmovq_n_u8('\b');
1308 const uint8x16_t s3 = vmovq_n_u8(32);
1309
1310 for (;; p += 16, q += 16) {
1311 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1312 uint8x16_t x = vceqq_u8(s, s0);
1313 x = vorrq_u8(x, vceqq_u8(s, s1));
1314 x = vorrq_u8(x, vceqq_u8(s, s2));
1315 x = vorrq_u8(x, vcltq_u8(s, s3));
1316
1317 x = vrev64q_u8(x); // Rev in 64
1318 uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
1319 uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
1320
1321 SizeType length = 0;
1322 bool escaped = false;
1323 if (low == 0) {
1324 if (high != 0) {
1325 unsigned lz = (unsigned)__builtin_clzll(high);
1326 length = 8 + (lz >> 3);
1327 escaped = true;
1328 }
1329 } else {
1330 unsigned lz = (unsigned)__builtin_clzll(low);
1331 length = lz >> 3;
1332 escaped = true;
1333 }
1334 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1335 for (const char* pend = p + length; p != pend; ) {
1336 *q++ = *p++;
1337 }
1338 break;
1339 }
1340 vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
1341 }
1342
1343 is.src_ = p;
1344 is.dst_ = q;
1345 }
1346
1347 // When read/write pointers are the same for insitu stream, just skip unescaped characters
SkipUnescapedString(InsituStringStream & is)1348 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1349 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1350 char* p = is.src_;
1351
1352 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1353 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1354 for (; p != nextAligned; p++)
1355 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1356 is.src_ = is.dst_ = p;
1357 return;
1358 }
1359
1360 // The rest of string using SIMD
1361 const uint8x16_t s0 = vmovq_n_u8('"');
1362 const uint8x16_t s1 = vmovq_n_u8('\\');
1363 const uint8x16_t s2 = vmovq_n_u8('\b');
1364 const uint8x16_t s3 = vmovq_n_u8(32);
1365
1366 for (;; p += 16) {
1367 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1368 uint8x16_t x = vceqq_u8(s, s0);
1369 x = vorrq_u8(x, vceqq_u8(s, s1));
1370 x = vorrq_u8(x, vceqq_u8(s, s2));
1371 x = vorrq_u8(x, vcltq_u8(s, s3));
1372
1373 x = vrev64q_u8(x); // Rev in 64
1374 uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract
1375 uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract
1376
1377 if (low == 0) {
1378 if (high != 0) {
1379 int lz = __builtin_clzll(high);
1380 p += 8 + (lz >> 3);
1381 break;
1382 }
1383 } else {
1384 int lz = __builtin_clzll(low);
1385 p += lz >> 3;
1386 break;
1387 }
1388 }
1389
1390 is.src_ = is.dst_ = p;
1391 }
1392 #endif // RAPIDJSON_NEON
1393
1394 template<typename InputStream, bool backup, bool pushOnTake>
1395 class NumberStream;
1396
1397 template<typename InputStream>
1398 class NumberStream<InputStream, false, false> {
1399 public:
1400 typedef typename InputStream::Ch Ch;
1401
NumberStream(GenericReader & reader,InputStream & s)1402 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
1403
Peek()1404 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()1405 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()1406 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Push(char)1407 RAPIDJSON_FORCEINLINE void Push(char) {}
1408
Tell()1409 size_t Tell() { return is.Tell(); }
Length()1410 size_t Length() { return 0; }
Pop()1411 const char* Pop() { return 0; }
1412
1413 protected:
1414 NumberStream& operator=(const NumberStream&);
1415
1416 InputStream& is;
1417 };
1418
1419 template<typename InputStream>
1420 class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
1421 typedef NumberStream<InputStream, false, false> Base;
1422 public:
NumberStream(GenericReader & reader,InputStream & is)1423 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
1424
TakePush()1425 RAPIDJSON_FORCEINLINE Ch TakePush() {
1426 stackStream.Put(static_cast<char>(Base::is.Peek()));
1427 return Base::is.Take();
1428 }
1429
Push(char c)1430 RAPIDJSON_FORCEINLINE void Push(char c) {
1431 stackStream.Put(c);
1432 }
1433
Length()1434 size_t Length() { return stackStream.Length(); }
1435
Pop()1436 const char* Pop() {
1437 stackStream.Put('\0');
1438 return stackStream.Pop();
1439 }
1440
1441 private:
1442 StackStream<char> stackStream;
1443 };
1444
1445 template<typename InputStream>
1446 class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
1447 typedef NumberStream<InputStream, true, false> Base;
1448 public:
NumberStream(GenericReader & reader,InputStream & is)1449 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
1450
Take()1451 RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1452 };
1453
1454 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)1455 void ParseNumber(InputStream& is, Handler& handler) {
1456 internal::StreamLocalCopy<InputStream> copy(is);
1457 NumberStream<InputStream,
1458 ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1459 ((parseFlags & kParseInsituFlag) == 0) :
1460 ((parseFlags & kParseFullPrecisionFlag) != 0),
1461 (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1462 (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1463
1464 size_t startOffset = s.Tell();
1465 double d = 0.0;
1466 bool useNanOrInf = false;
1467
1468 // Parse minus
1469 bool minus = Consume(s, '-');
1470
1471 // Parse int: zero / ( digit1-9 *DIGIT )
1472 unsigned i = 0;
1473 uint64_t i64 = 0;
1474 bool use64bit = false;
1475 int significandDigit = 0;
1476 if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1477 i = 0;
1478 s.TakePush();
1479 }
1480 else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1481 i = static_cast<unsigned>(s.TakePush() - '0');
1482
1483 if (minus)
1484 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1485 if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1486 if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1487 i64 = i;
1488 use64bit = true;
1489 break;
1490 }
1491 }
1492 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1493 significandDigit++;
1494 }
1495 else
1496 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1497 if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1498 if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1499 i64 = i;
1500 use64bit = true;
1501 break;
1502 }
1503 }
1504 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1505 significandDigit++;
1506 }
1507 }
1508 // Parse NaN or Infinity here
1509 else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1510 if (Consume(s, 'N')) {
1511 if (Consume(s, 'a') && Consume(s, 'N')) {
1512 d = std::numeric_limits<double>::quiet_NaN();
1513 useNanOrInf = true;
1514 }
1515 }
1516 else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
1517 if (Consume(s, 'n') && Consume(s, 'f')) {
1518 d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1519 useNanOrInf = true;
1520
1521 if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1522 && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
1523 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1524 }
1525 }
1526 }
1527
1528 if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
1529 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1530 }
1531 }
1532 else
1533 RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
1534
1535 // Parse 64bit int
1536 bool useDouble = false;
1537 if (use64bit) {
1538 if (minus)
1539 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1540 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1541 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1542 d = static_cast<double>(i64);
1543 useDouble = true;
1544 break;
1545 }
1546 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1547 significandDigit++;
1548 }
1549 else
1550 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1551 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1552 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1553 d = static_cast<double>(i64);
1554 useDouble = true;
1555 break;
1556 }
1557 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1558 significandDigit++;
1559 }
1560 }
1561
1562 // Force double for big integer
1563 if (useDouble) {
1564 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1565 d = d * 10 + (s.TakePush() - '0');
1566 }
1567 }
1568
1569 // Parse frac = decimal-point 1*DIGIT
1570 int expFrac = 0;
1571 size_t decimalPosition;
1572 if (Consume(s, '.')) {
1573 decimalPosition = s.Length();
1574
1575 if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1576 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
1577
1578 if (!useDouble) {
1579 #if RAPIDJSON_64BIT
1580 // Use i64 to store significand in 64-bit architecture
1581 if (!use64bit)
1582 i64 = i;
1583
1584 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1585 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1586 break;
1587 else {
1588 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1589 --expFrac;
1590 if (i64 != 0)
1591 significandDigit++;
1592 }
1593 }
1594
1595 d = static_cast<double>(i64);
1596 #else
1597 // Use double to store significand in 32-bit architecture
1598 d = static_cast<double>(use64bit ? i64 : i);
1599 #endif
1600 useDouble = true;
1601 }
1602
1603 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1604 if (significandDigit < 17) {
1605 d = d * 10.0 + (s.TakePush() - '0');
1606 --expFrac;
1607 if (RAPIDJSON_LIKELY(d > 0.0))
1608 significandDigit++;
1609 }
1610 else
1611 s.TakePush();
1612 }
1613 }
1614 else
1615 decimalPosition = s.Length(); // decimal position at the end of integer.
1616
1617 // Parse exp = e [ minus / plus ] 1*DIGIT
1618 int exp = 0;
1619 if (Consume(s, 'e') || Consume(s, 'E')) {
1620 if (!useDouble) {
1621 d = static_cast<double>(use64bit ? i64 : i);
1622 useDouble = true;
1623 }
1624
1625 bool expMinus = false;
1626 if (Consume(s, '+'))
1627 ;
1628 else if (Consume(s, '-'))
1629 expMinus = true;
1630
1631 if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1632 exp = static_cast<int>(s.Take() - '0');
1633 if (expMinus) {
1634 // (exp + expFrac) must not underflow int => we're detecting when -exp gets
1635 // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
1636 // underflow territory):
1637 //
1638 // -(exp * 10 + 9) + expFrac >= INT_MIN
1639 // <=> exp <= (expFrac - INT_MIN - 9) / 10
1640 RAPIDJSON_ASSERT(expFrac <= 0);
1641 int maxExp = (expFrac + 2147483639) / 10;
1642
1643 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1644 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1645 if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
1646 while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1647 s.Take();
1648 }
1649 }
1650 }
1651 else { // positive exp
1652 int maxExp = 308 - expFrac;
1653 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1654 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1655 if (RAPIDJSON_UNLIKELY(exp > maxExp))
1656 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1657 }
1658 }
1659 }
1660 else
1661 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
1662
1663 if (expMinus)
1664 exp = -exp;
1665 }
1666
1667 // Finish parsing, call event according to the type of number.
1668 bool cont = true;
1669
1670 if (parseFlags & kParseNumbersAsStringsFlag) {
1671 if (parseFlags & kParseInsituFlag) {
1672 s.Pop(); // Pop stack no matter if it will be used or not.
1673 typename InputStream::Ch* head = is.PutBegin();
1674 const size_t length = s.Tell() - startOffset;
1675 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1676 // unable to insert the \0 character here, it will erase the comma after this number
1677 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1678 cont = handler.RawNumber(str, SizeType(length), false);
1679 }
1680 else {
1681 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1682 StringStream srcStream(s.Pop());
1683 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
1684 while (numCharsToCopy--) {
1685 Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
1686 }
1687 dstStream.Put('\0');
1688 const typename TargetEncoding::Ch* str = dstStream.Pop();
1689 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1690 cont = handler.RawNumber(str, SizeType(length), true);
1691 }
1692 }
1693 else {
1694 size_t length = s.Length();
1695 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1696
1697 if (useDouble) {
1698 int p = exp + expFrac;
1699 if (parseFlags & kParseFullPrecisionFlag)
1700 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1701 else
1702 d = internal::StrtodNormalPrecision(d, p);
1703
1704 // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
1705 if (d > (std::numeric_limits<double>::max)()) {
1706 // Overflow
1707 // TODO: internal::StrtodX should report overflow (or underflow)
1708 RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
1709 }
1710
1711 cont = handler.Double(minus ? -d : d);
1712 }
1713 else if (useNanOrInf) {
1714 cont = handler.Double(d);
1715 }
1716 else {
1717 if (use64bit) {
1718 if (minus)
1719 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1720 else
1721 cont = handler.Uint64(i64);
1722 }
1723 else {
1724 if (minus)
1725 cont = handler.Int(static_cast<int32_t>(~i + 1));
1726 else
1727 cont = handler.Uint(i);
1728 }
1729 }
1730 }
1731 if (RAPIDJSON_UNLIKELY(!cont))
1732 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
1733 }
1734
1735 // Parse any JSON value
1736 template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)1737 void ParseValue(InputStream& is, Handler& handler) {
1738 switch (is.Peek()) {
1739 case 'n': ParseNull <parseFlags>(is, handler); break;
1740 case 't': ParseTrue <parseFlags>(is, handler); break;
1741 case 'f': ParseFalse <parseFlags>(is, handler); break;
1742 case '"': ParseString<parseFlags>(is, handler); break;
1743 case '{': ParseObject<parseFlags>(is, handler); break;
1744 case '[': ParseArray <parseFlags>(is, handler); break;
1745 default :
1746 ParseNumber<parseFlags>(is, handler);
1747 break;
1748
1749 }
1750 }
1751
1752 // Iterative Parsing
1753
1754 // States
1755 enum IterativeParsingState {
1756 IterativeParsingFinishState = 0, // sink states at top
1757 IterativeParsingErrorState, // sink states at top
1758 IterativeParsingStartState,
1759
1760 // Object states
1761 IterativeParsingObjectInitialState,
1762 IterativeParsingMemberKeyState,
1763 IterativeParsingMemberValueState,
1764 IterativeParsingObjectFinishState,
1765
1766 // Array states
1767 IterativeParsingArrayInitialState,
1768 IterativeParsingElementState,
1769 IterativeParsingArrayFinishState,
1770
1771 // Single value state
1772 IterativeParsingValueState,
1773
1774 // Delimiter states (at bottom)
1775 IterativeParsingElementDelimiterState,
1776 IterativeParsingMemberDelimiterState,
1777 IterativeParsingKeyValueDelimiterState,
1778
1779 cIterativeParsingStateCount
1780 };
1781
1782 // Tokens
1783 enum Token {
1784 LeftBracketToken = 0,
1785 RightBracketToken,
1786
1787 LeftCurlyBracketToken,
1788 RightCurlyBracketToken,
1789
1790 CommaToken,
1791 ColonToken,
1792
1793 StringToken,
1794 FalseToken,
1795 TrueToken,
1796 NullToken,
1797 NumberToken,
1798
1799 kTokenCount
1800 };
1801
Tokenize(Ch c)1802 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
1803
1804 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1805 #define N NumberToken
1806 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1807 // Maps from ASCII to Token
1808 static const unsigned char tokenMap[256] = {
1809 N16, // 00~0F
1810 N16, // 10~1F
1811 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1812 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1813 N16, // 40~4F
1814 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1815 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1816 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1817 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1818 };
1819 #undef N
1820 #undef N16
1821 //!@endcond
1822
1823 if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1824 return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1825 else
1826 return NumberToken;
1827 }
1828
Predict(IterativeParsingState state,Token token)1829 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
1830 // current state x one lookahead token -> new state
1831 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1832 // Finish(sink state)
1833 {
1834 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1835 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1836 IterativeParsingErrorState
1837 },
1838 // Error(sink state)
1839 {
1840 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1841 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1842 IterativeParsingErrorState
1843 },
1844 // Start
1845 {
1846 IterativeParsingArrayInitialState, // Left bracket
1847 IterativeParsingErrorState, // Right bracket
1848 IterativeParsingObjectInitialState, // Left curly bracket
1849 IterativeParsingErrorState, // Right curly bracket
1850 IterativeParsingErrorState, // Comma
1851 IterativeParsingErrorState, // Colon
1852 IterativeParsingValueState, // String
1853 IterativeParsingValueState, // False
1854 IterativeParsingValueState, // True
1855 IterativeParsingValueState, // Null
1856 IterativeParsingValueState // Number
1857 },
1858 // ObjectInitial
1859 {
1860 IterativeParsingErrorState, // Left bracket
1861 IterativeParsingErrorState, // Right bracket
1862 IterativeParsingErrorState, // Left curly bracket
1863 IterativeParsingObjectFinishState, // Right curly bracket
1864 IterativeParsingErrorState, // Comma
1865 IterativeParsingErrorState, // Colon
1866 IterativeParsingMemberKeyState, // String
1867 IterativeParsingErrorState, // False
1868 IterativeParsingErrorState, // True
1869 IterativeParsingErrorState, // Null
1870 IterativeParsingErrorState // Number
1871 },
1872 // MemberKey
1873 {
1874 IterativeParsingErrorState, // Left bracket
1875 IterativeParsingErrorState, // Right bracket
1876 IterativeParsingErrorState, // Left curly bracket
1877 IterativeParsingErrorState, // Right curly bracket
1878 IterativeParsingErrorState, // Comma
1879 IterativeParsingKeyValueDelimiterState, // Colon
1880 IterativeParsingErrorState, // String
1881 IterativeParsingErrorState, // False
1882 IterativeParsingErrorState, // True
1883 IterativeParsingErrorState, // Null
1884 IterativeParsingErrorState // Number
1885 },
1886 // MemberValue
1887 {
1888 IterativeParsingErrorState, // Left bracket
1889 IterativeParsingErrorState, // Right bracket
1890 IterativeParsingErrorState, // Left curly bracket
1891 IterativeParsingObjectFinishState, // Right curly bracket
1892 IterativeParsingMemberDelimiterState, // Comma
1893 IterativeParsingErrorState, // Colon
1894 IterativeParsingErrorState, // String
1895 IterativeParsingErrorState, // False
1896 IterativeParsingErrorState, // True
1897 IterativeParsingErrorState, // Null
1898 IterativeParsingErrorState // Number
1899 },
1900 // ObjectFinish(sink state)
1901 {
1902 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1903 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1904 IterativeParsingErrorState
1905 },
1906 // ArrayInitial
1907 {
1908 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1909 IterativeParsingArrayFinishState, // Right bracket
1910 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1911 IterativeParsingErrorState, // Right curly bracket
1912 IterativeParsingErrorState, // Comma
1913 IterativeParsingErrorState, // Colon
1914 IterativeParsingElementState, // String
1915 IterativeParsingElementState, // False
1916 IterativeParsingElementState, // True
1917 IterativeParsingElementState, // Null
1918 IterativeParsingElementState // Number
1919 },
1920 // Element
1921 {
1922 IterativeParsingErrorState, // Left bracket
1923 IterativeParsingArrayFinishState, // Right bracket
1924 IterativeParsingErrorState, // Left curly bracket
1925 IterativeParsingErrorState, // Right curly bracket
1926 IterativeParsingElementDelimiterState, // Comma
1927 IterativeParsingErrorState, // Colon
1928 IterativeParsingErrorState, // String
1929 IterativeParsingErrorState, // False
1930 IterativeParsingErrorState, // True
1931 IterativeParsingErrorState, // Null
1932 IterativeParsingErrorState // Number
1933 },
1934 // ArrayFinish(sink state)
1935 {
1936 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1937 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1938 IterativeParsingErrorState
1939 },
1940 // Single Value (sink state)
1941 {
1942 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1943 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1944 IterativeParsingErrorState
1945 },
1946 // ElementDelimiter
1947 {
1948 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1949 IterativeParsingArrayFinishState, // Right bracket
1950 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1951 IterativeParsingErrorState, // Right curly bracket
1952 IterativeParsingErrorState, // Comma
1953 IterativeParsingErrorState, // Colon
1954 IterativeParsingElementState, // String
1955 IterativeParsingElementState, // False
1956 IterativeParsingElementState, // True
1957 IterativeParsingElementState, // Null
1958 IterativeParsingElementState // Number
1959 },
1960 // MemberDelimiter
1961 {
1962 IterativeParsingErrorState, // Left bracket
1963 IterativeParsingErrorState, // Right bracket
1964 IterativeParsingErrorState, // Left curly bracket
1965 IterativeParsingObjectFinishState, // Right curly bracket
1966 IterativeParsingErrorState, // Comma
1967 IterativeParsingErrorState, // Colon
1968 IterativeParsingMemberKeyState, // String
1969 IterativeParsingErrorState, // False
1970 IterativeParsingErrorState, // True
1971 IterativeParsingErrorState, // Null
1972 IterativeParsingErrorState // Number
1973 },
1974 // KeyValueDelimiter
1975 {
1976 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1977 IterativeParsingErrorState, // Right bracket
1978 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1979 IterativeParsingErrorState, // Right curly bracket
1980 IterativeParsingErrorState, // Comma
1981 IterativeParsingErrorState, // Colon
1982 IterativeParsingMemberValueState, // String
1983 IterativeParsingMemberValueState, // False
1984 IterativeParsingMemberValueState, // True
1985 IterativeParsingMemberValueState, // Null
1986 IterativeParsingMemberValueState // Number
1987 },
1988 }; // End of G
1989
1990 return static_cast<IterativeParsingState>(G[state][token]);
1991 }
1992
1993 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1994 // May return a new state on state pop.
1995 template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1996 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1997 (void)token;
1998
1999 switch (dst) {
2000 case IterativeParsingErrorState:
2001 return dst;
2002
2003 case IterativeParsingObjectInitialState:
2004 case IterativeParsingArrayInitialState:
2005 {
2006 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
2007 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
2008 IterativeParsingState n = src;
2009 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
2010 n = IterativeParsingElementState;
2011 else if (src == IterativeParsingKeyValueDelimiterState)
2012 n = IterativeParsingMemberValueState;
2013 // Push current state.
2014 *stack_.template Push<SizeType>(1) = n;
2015 // Initialize and push the member/element count.
2016 *stack_.template Push<SizeType>(1) = 0;
2017 // Call handler
2018 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
2019 // On handler short circuits the parsing.
2020 if (!hr) {
2021 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2022 return IterativeParsingErrorState;
2023 }
2024 else {
2025 is.Take();
2026 return dst;
2027 }
2028 }
2029
2030 case IterativeParsingMemberKeyState:
2031 ParseString<parseFlags>(is, handler, true);
2032 if (HasParseError())
2033 return IterativeParsingErrorState;
2034 else
2035 return dst;
2036
2037 case IterativeParsingKeyValueDelimiterState:
2038 RAPIDJSON_ASSERT(token == ColonToken);
2039 is.Take();
2040 return dst;
2041
2042 case IterativeParsingMemberValueState:
2043 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2044 ParseValue<parseFlags>(is, handler);
2045 if (HasParseError()) {
2046 return IterativeParsingErrorState;
2047 }
2048 return dst;
2049
2050 case IterativeParsingElementState:
2051 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2052 ParseValue<parseFlags>(is, handler);
2053 if (HasParseError()) {
2054 return IterativeParsingErrorState;
2055 }
2056 return dst;
2057
2058 case IterativeParsingMemberDelimiterState:
2059 case IterativeParsingElementDelimiterState:
2060 is.Take();
2061 // Update member/element count.
2062 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
2063 return dst;
2064
2065 case IterativeParsingObjectFinishState:
2066 {
2067 // Transit from delimiter is only allowed when trailing commas are enabled
2068 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
2069 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
2070 return IterativeParsingErrorState;
2071 }
2072 // Get member count.
2073 SizeType c = *stack_.template Pop<SizeType>(1);
2074 // If the object is not empty, count the last member.
2075 if (src == IterativeParsingMemberValueState)
2076 ++c;
2077 // Restore the state.
2078 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2079 // Transit to Finish state if this is the topmost scope.
2080 if (n == IterativeParsingStartState)
2081 n = IterativeParsingFinishState;
2082 // Call handler
2083 bool hr = handler.EndObject(c);
2084 // On handler short circuits the parsing.
2085 if (!hr) {
2086 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2087 return IterativeParsingErrorState;
2088 }
2089 else {
2090 is.Take();
2091 return n;
2092 }
2093 }
2094
2095 case IterativeParsingArrayFinishState:
2096 {
2097 // Transit from delimiter is only allowed when trailing commas are enabled
2098 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
2099 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
2100 return IterativeParsingErrorState;
2101 }
2102 // Get element count.
2103 SizeType c = *stack_.template Pop<SizeType>(1);
2104 // If the array is not empty, count the last element.
2105 if (src == IterativeParsingElementState)
2106 ++c;
2107 // Restore the state.
2108 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2109 // Transit to Finish state if this is the topmost scope.
2110 if (n == IterativeParsingStartState)
2111 n = IterativeParsingFinishState;
2112 // Call handler
2113 bool hr = handler.EndArray(c);
2114 // On handler short circuits the parsing.
2115 if (!hr) {
2116 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
2117 return IterativeParsingErrorState;
2118 }
2119 else {
2120 is.Take();
2121 return n;
2122 }
2123 }
2124
2125 default:
2126 // This branch is for IterativeParsingValueState actually.
2127 // Use `default:` rather than
2128 // `case IterativeParsingValueState:` is for code coverage.
2129
2130 // The IterativeParsingStartState is not enumerated in this switch-case.
2131 // It is impossible for that case. And it can be caught by following assertion.
2132
2133 // The IterativeParsingFinishState is not enumerated in this switch-case either.
2134 // It is a "derivative" state which cannot triggered from Predict() directly.
2135 // Therefore it cannot happen here. And it can be caught by following assertion.
2136 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
2137
2138 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2139 ParseValue<parseFlags>(is, handler);
2140 if (HasParseError()) {
2141 return IterativeParsingErrorState;
2142 }
2143 return IterativeParsingFinishState;
2144 }
2145 }
2146
2147 template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)2148 void HandleError(IterativeParsingState src, InputStream& is) {
2149 if (HasParseError()) {
2150 // Error flag has been set.
2151 return;
2152 }
2153
2154 switch (src) {
2155 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
2156 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
2157 case IterativeParsingObjectInitialState:
2158 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
2159 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
2160 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
2161 case IterativeParsingKeyValueDelimiterState:
2162 case IterativeParsingArrayInitialState:
2163 case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
2164 default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
2165 }
2166 }
2167
IsIterativeParsingDelimiterState(IterativeParsingState s)2168 RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
2169 return s >= IterativeParsingElementDelimiterState;
2170 }
2171
IsIterativeParsingCompleteState(IterativeParsingState s)2172 RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
2173 return s <= IterativeParsingErrorState;
2174 }
2175
2176 template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)2177 ParseResult IterativeParse(InputStream& is, Handler& handler) {
2178 parseResult_.Clear();
2179 ClearStackOnExit scope(*this);
2180 IterativeParsingState state = IterativeParsingStartState;
2181
2182 SkipWhitespaceAndComments<parseFlags>(is);
2183 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2184 while (is.Peek() != '\0') {
2185 Token t = Tokenize(is.Peek());
2186 IterativeParsingState n = Predict(state, t);
2187 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
2188
2189 if (d == IterativeParsingErrorState) {
2190 HandleError(state, is);
2191 break;
2192 }
2193
2194 state = d;
2195
2196 // Do not further consume streams if a root JSON has been parsed.
2197 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
2198 break;
2199
2200 SkipWhitespaceAndComments<parseFlags>(is);
2201 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2202 }
2203
2204 // Handle the end of file.
2205 if (state != IterativeParsingFinishState)
2206 HandleError(state, is);
2207
2208 return parseResult_;
2209 }
2210
2211 static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
2212 internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
2213 ParseResult parseResult_;
2214 IterativeParsingState state_;
2215 }; // class GenericReader
2216
2217 //! Reader with UTF8 encoding and default allocator.
2218 typedef GenericReader<UTF8<>, UTF8<> > Reader;
2219
2220 RAPIDJSON_NAMESPACE_END
2221
2222 #if defined(__clang__) || defined(_MSC_VER)
2223 RAPIDJSON_DIAG_POP
2224 #endif
2225
2226
2227 #ifdef __GNUC__
2228 RAPIDJSON_DIAG_POP
2229 #endif
2230
2231 #endif // RAPIDJSON_READER_H_
2232