1 //
2 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Official repository: https://github.com/boostorg/json
9 //
10
11 #ifndef BOOST_JSON_BASIC_PARSER_HPP
OPENSSL_strncasecmp(const char * str1,const char * str2,size_t n)12 #define BOOST_JSON_BASIC_PARSER_HPP
13
14 #include <boost/json/detail/config.hpp>
15 #include <boost/json/error.hpp>
16 #include <boost/json/kind.hpp>
17 #include <boost/json/parse_options.hpp>
18 #include <boost/json/detail/stack.hpp>
19 #include <boost/json/detail/stream.hpp>
20 #include <boost/json/detail/utf8.hpp>
21
22 /* VFALCO NOTE
23
24 This file is in the detail namespace because it
25 is not allowed to be included directly by users,
26 who should be including <boost/json/basic_parser.hpp>
27 instead, which provides the member function definitions.
28
29 The source code is arranged this way to keep compile
30 times down.
31 */
32
33 BOOST_JSON_NS_BEGIN
34
35 /** An incremental SAX parser for serialized JSON.
36
37 This implements a SAX-style parser, invoking a
38 caller-supplied handler with each parsing event.
39 To use, first declare a variable of type
40 `basic_parser<T>` where `T` meets the handler
41 requirements specified below. Then call
42 @ref write_some one or more times with the input,
43 setting `more = false` on the final buffer.
44 The parsing events are realized through member
45 function calls on the handler, which exists
46 as a data member of the parser.
47 \n
48 The parser may dynamically allocate intermediate
49 storage as needed to accommodate the nesting level
50 of the input JSON. On subsequent invocations, the
51 parser can cheaply re-use this memory, improving
52 performance. This storage is freed when the
53 parser is destroyed
54
55 @par Usage
56
57 To get the declaration and function definitions
58 for this class it is necessary to include this
59 file instead:
60 @code
61 #include <boost/json/basic_parser_impl.hpp>
62 @endcode
63
64 Users who wish to parse JSON into the DOM container
65 @ref value will not use this class directly; instead
66 they will create an instance of @ref parser or
67 @ref stream_parser and use that instead. Alternatively,
68 they may call the function @ref parse. This class is
69 designed for users who wish to perform custom actions
70 instead of building a @ref value. For example, to
71 produce a DOM from an external library.
72 \n
73 @note
74
75 By default, only conforming JSON using UTF-8
76 encoding is accepted. However, select non-compliant
77 syntax can be allowed by construction using a
78 @ref parse_options set to desired values.
79
80 @par Handler
81
82 The handler provided must be implemented as an
83 object of class type which defines each of the
84 required event member functions below. The event
85 functions return a `bool` where `true` indicates
86 success, and `false` indicates failure. If the
87 member function returns `false`, it must set
88 the error code to a suitable value. This error
89 code will be returned by the write function to
90 the caller.
91 \n
92 Handlers are required to declare the maximum
93 limits on various elements. If these limits
94 are exceeded during parsing, then parsing
95 fails with an error.
96 \n
97 The following declaration meets the parser's
98 handler requirements:
99
100 @code
101 struct handler
102 {
103 /// The maximum number of elements allowed in an array
104 static constexpr std::size_t max_array_size = -1;
105
106 /// The maximum number of elements allowed in an object
107 static constexpr std::size_t max_object_size = -1;
108
109 /// The maximum number of characters allowed in a string
110 static constexpr std::size_t max_string_size = -1;
111
112 /// The maximum number of characters allowed in a key
113 static constexpr std::size_t max_key_size = -1;
114
115 /// Called once when the JSON parsing begins.
116 ///
117 /// @return `true` on success.
118 /// @param ec Set to the error, if any occurred.
119 ///
120 bool on_document_begin( error_code& ec );
121
122 /// Called when the JSON parsing is done.
123 ///
124 /// @return `true` on success.
125 /// @param ec Set to the error, if any occurred.
126 ///
127 bool on_document_end( error_code& ec );
128
129 /// Called when the beginning of an array is encountered.
130 ///
131 /// @return `true` on success.
132 /// @param ec Set to the error, if any occurred.
133 ///
134 bool on_array_begin( error_code& ec );
135
136 /// Called when the end of the current array is encountered.
137 ///
138 /// @return `true` on success.
139 /// @param n The number of elements in the array.
140 /// @param ec Set to the error, if any occurred.
141 ///
142 bool on_array_end( std::size_t n, error_code& ec );
143
144 /// Called when the beginning of an object is encountered.
145 ///
146 /// @return `true` on success.
147 /// @param ec Set to the error, if any occurred.
148 ///
149 bool on_object_begin( error_code& ec );
150
151 /// Called when the end of the current object is encountered.
152 ///
153 /// @return `true` on success.
154 /// @param n The number of elements in the object.
155 /// @param ec Set to the error, if any occurred.
156 ///
157 bool on_object_end( std::size_t n, error_code& ec );
158
159 /// Called with characters corresponding to part of the current string.
160 ///
161 /// @return `true` on success.
162 /// @param s The partial characters
163 /// @param n The total size of the string thus far
164 /// @param ec Set to the error, if any occurred.
165 ///
166 bool on_string_part( string_view s, std::size_t n, error_code& ec );
167
168 /// Called with the last characters corresponding to the current string.
169 ///
170 /// @return `true` on success.
171 /// @param s The remaining characters
172 /// @param n The total size of the string
173 /// @param ec Set to the error, if any occurred.
174 ///
175 bool on_string( string_view s, std::size_t n, error_code& ec );
176
177 /// Called with characters corresponding to part of the current key.
178 ///
179 /// @return `true` on success.
180 /// @param s The partial characters
181 /// @param n The total size of the key thus far
182 /// @param ec Set to the error, if any occurred.
183 ///
184 bool on_key_part( string_view s, std::size_t n, error_code& ec );
185
186 /// Called with the last characters corresponding to the current key.
187 ///
188 /// @return `true` on success.
189 /// @param s The remaining characters
190 /// @param n The total size of the key
191 /// @param ec Set to the error, if any occurred.
192 ///
193 bool on_key( string_view s, std::size_t n, error_code& ec );
194
195 /// Called with the characters corresponding to part of the current number.
196 ///
197 /// @return `true` on success.
198 /// @param s The partial characters
199 /// @param ec Set to the error, if any occurred.
200 ///
201 bool on_number_part( string_view s, error_code& ec );
202
203 /// Called when a signed integer is parsed.
204 ///
205 /// @return `true` on success.
206 /// @param i The value
207 /// @param s The remaining characters
208 /// @param ec Set to the error, if any occurred.
209 ///
210 bool on_int64( int64_t i, string_view s, error_code& ec );
211
212 /// Called when an unsigend integer is parsed.
213 ///
214 /// @return `true` on success.
215 /// @param u The value
216 /// @param s The remaining characters
217 /// @param ec Set to the error, if any occurred.
218 ///
219 bool on_uint64( uint64_t u, string_view s, error_code& ec );
220
221 /// Called when a double is parsed.
222 ///
223 /// @return `true` on success.
224 /// @param d The value
225 /// @param s The remaining characters
226 /// @param ec Set to the error, if any occurred.
227 ///
228 bool on_double( double d, string_view s, error_code& ec );
229
230 /// Called when a boolean is parsed.
231 ///
232 /// @return `true` on success.
233 /// @param b The value
234 /// @param s The remaining characters
235 /// @param ec Set to the error, if any occurred.
236 ///
237 bool on_bool( bool b, error_code& ec );
238
239 /// Called when a null is parsed.
240 ///
241 /// @return `true` on success.
242 /// @param ec Set to the error, if any occurred.
243 ///
244 bool on_null( error_code& ec );
245
246 /// Called with characters corresponding to part of the current comment.
247 ///
248 /// @return `true` on success.
249 /// @param s The partial characters.
250 /// @param ec Set to the error, if any occurred.
251 ///
252 bool on_comment_part( string_view s, error_code& ec );
253
254 /// Called with the last characters corresponding to the current comment.
255 ///
256 /// @return `true` on success.
257 /// @param s The remaining characters
258 /// @param ec Set to the error, if any occurred.
259 ///
260 bool on_comment( string_view s, error_code& ec );
261 };
262 @endcode
263
264 @see
265 @ref parse,
266 @ref stream_parser.
267
268 @headerfile <boost/json/basic_parser.hpp>
269 */
270 template<class Handler>
271 class basic_parser
272 {
273 enum class state : char
274 {
275 doc1, doc2, doc3, doc4,
276 com1, com2, com3, com4,
277 nul1, nul2, nul3,
278 tru1, tru2, tru3,
279 fal1, fal2, fal3, fal4,
280 str1, str2, str3, str4,
281 str5, str6, str7, str8,
282 sur1, sur2, sur3,
283 sur4, sur5, sur6,
284 obj1, obj2, obj3, obj4,
285 obj5, obj6, obj7, obj8,
286 obj9, obj10, obj11,
287 arr1, arr2, arr3,
288 arr4, arr5, arr6,
289 num1, num2, num3, num4,
290 num5, num6, num7, num8,
291 exp1, exp2, exp3,
292 val1, val2
293 };
294
295 struct number
296 {
297 uint64_t mant;
298 int bias;
299 int exp;
300 bool frac;
301 bool neg;
302 };
303
304 // optimization: must come first
305 Handler h_;
306
307 number num_;
308 error_code ec_;
309 detail::stack st_;
310 detail::utf8_sequence seq_;
311 unsigned u1_;
312 unsigned u2_;
313 bool more_; // false for final buffer
314 bool done_ = false; // true on complete parse
315 bool clean_ = true; // write_some exited cleanly
316 const char* end_;
317 parse_options opt_;
318 // how many levels deeper the parser can go
319 std::size_t depth_ = opt_.max_depth;
320
321 inline void reserve();
322 inline const char* sentinel();
323 inline bool incomplete(
324 const detail::const_stream_wrapper& cs);
325
326 #ifdef __INTEL_COMPILER
327 #pragma warning push
328 #pragma warning disable 2196
329 #endif
330
331 BOOST_NOINLINE
332 inline
333 const char*
334 suspend_or_fail(state st);
335
336 BOOST_NOINLINE
337 inline
338 const char*
339 suspend_or_fail(
340 state st,
341 std::size_t n);
342
343 BOOST_NOINLINE
344 inline
345 const char*
346 fail(const char* p) noexcept;
347
348 BOOST_NOINLINE
349 inline
350 const char*
351 fail(
352 const char* p,
353 error ev) noexcept;
354
355 BOOST_NOINLINE
356 inline
357 const char*
358 maybe_suspend(
359 const char* p,
360 state st);
361
362 BOOST_NOINLINE
363 inline
364 const char*
365 maybe_suspend(
366 const char* p,
367 state st,
368 std::size_t n);
369
370 BOOST_NOINLINE
371 inline
372 const char*
373 maybe_suspend(
374 const char* p,
375 state st,
376 const number& num);
377
378 BOOST_NOINLINE
379 inline
380 const char*
381 suspend(
382 const char* p,
383 state st);
384
385 BOOST_NOINLINE
386 inline
387 const char*
388 suspend(
389 const char* p,
390 state st,
391 const number& num);
392
393 #ifdef __INTEL_COMPILER
394 #pragma warning pop
395 #endif
396
397 template<bool StackEmpty_/*, bool Terminal_*/>
398 const char* parse_comment(const char* p,
399 std::integral_constant<bool, StackEmpty_> stack_empty,
400 /*std::integral_constant<bool, Terminal_>*/ bool terminal);
401
402 template<bool StackEmpty_>
403 const char* parse_document(const char* p,
404 std::integral_constant<bool, StackEmpty_> stack_empty);
405
406 template<bool StackEmpty_, bool AllowComments_/*,
407 bool AllowTrailing_, bool AllowBadUTF8_*/>
408 const char* parse_value(const char* p,
409 std::integral_constant<bool, StackEmpty_> stack_empty,
410 std::integral_constant<bool, AllowComments_> allow_comments,
411 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
412 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
413
414 template<bool StackEmpty_, bool AllowComments_/*,
415 bool AllowTrailing_, bool AllowBadUTF8_*/>
416 const char* resume_value(const char* p,
417 std::integral_constant<bool, StackEmpty_> stack_empty,
418 std::integral_constant<bool, AllowComments_> allow_comments,
419 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
420 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
421
422 template<bool StackEmpty_, bool AllowComments_/*,
423 bool AllowTrailing_, bool AllowBadUTF8_*/>
424 const char* parse_object(const char* p,
425 std::integral_constant<bool, StackEmpty_> stack_empty,
426 std::integral_constant<bool, AllowComments_> allow_comments,
427 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
428 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
429
430 template<bool StackEmpty_, bool AllowComments_/*,
431 bool AllowTrailing_, bool AllowBadUTF8_*/>
432 const char* parse_array(const char* p,
433 std::integral_constant<bool, StackEmpty_> stack_empty,
434 std::integral_constant<bool, AllowComments_> allow_comments,
435 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
436 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
437
438 template<bool StackEmpty_>
439 const char* parse_null(const char* p,
440 std::integral_constant<bool, StackEmpty_> stack_empty);
441
442 template<bool StackEmpty_>
443 const char* parse_true(const char* p,
444 std::integral_constant<bool, StackEmpty_> stack_empty);
445
446 template<bool StackEmpty_>
447 const char* parse_false(const char* p,
448 std::integral_constant<bool, StackEmpty_> stack_empty);
449
450 template<bool StackEmpty_, bool IsKey_/*,
451 bool AllowBadUTF8_*/>
452 const char* parse_string(const char* p,
453 std::integral_constant<bool, StackEmpty_> stack_empty,
454 std::integral_constant<bool, IsKey_> is_key,
455 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
456
457 template<bool StackEmpty_, char First_>
458 const char* parse_number(const char* p,
459 std::integral_constant<bool, StackEmpty_> stack_empty,
460 std::integral_constant<char, First_> first);
461
462 template<bool StackEmpty_, bool IsKey_/*,
463 bool AllowBadUTF8_*/>
464 const char* parse_unescaped(const char* p,
465 std::integral_constant<bool, StackEmpty_> stack_empty,
466 std::integral_constant<bool, IsKey_> is_key,
467 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
468
469 template<bool StackEmpty_/*, bool IsKey_,
470 bool AllowBadUTF8_*/>
471 const char* parse_escaped(
472 const char* p,
473 std::size_t total,
474 std::integral_constant<bool, StackEmpty_> stack_empty,
475 /*std::integral_constant<bool, IsKey_>*/ bool is_key,
476 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
477
478 // intentionally private
479 std::size_t
480 depth() const noexcept
481 {
482 return opt_.max_depth - depth_;
483 }
484
485 public:
486 /// Copy constructor (deleted)
487 basic_parser(
488 basic_parser const&) = delete;
489
490 /// Copy assignment (deleted)
491 basic_parser& operator=(
492 basic_parser const&) = delete;
493
494 /** Destructor.
495
496 All dynamically allocated internal memory is freed.
497
498 @par Effects
499 @code
500 this->handler().~Handler()
501 @endcode
502
503 @par Complexity
504 Same as `~Handler()`.
505
506 @par Exception Safety
507 Same as `~Handler()`.
508 */
509 ~basic_parser() = default;
510
511 /** Constructor.
512
513 This function constructs the parser with
514 the specified options, with any additional
515 arguments forwarded to the handler's constructor.
516
517 @par Complexity
518 Same as `Handler( std::forward< Args >( args )... )`.
519
520 @par Exception Safety
521 Same as `Handler( std::forward< Args >( args )... )`.
522
523 @param opt Configuration settings for the parser.
524 If this structure is default constructed, the
525 parser will accept only standard JSON.
526
527 @param args Optional additional arguments
528 forwarded to the handler's constructor.
529
530 @see parse_options
531 */
532 template<class... Args>
533 explicit
534 basic_parser(
535 parse_options const& opt,
536 Args&&... args);
537
538 /** Return a reference to the handler.
539
540 This function provides access to the constructed
541 instance of the handler owned by the parser.
542
543 @par Complexity
544 Constant.
545
546 @par Exception Safety
547 No-throw guarantee.
548 */
549 Handler&
550 handler() noexcept
551 {
552 return h_;
553 }
554
555 /** Return a reference to the handler.
556
557 This function provides access to the constructed
558 instance of the handler owned by the parser.
559
560 @par Complexity
561 Constant.
562
563 @par Exception Safety
564 No-throw guarantee.
565 */
566 Handler const&
567 handler() const noexcept
568 {
569 return h_;
570 }
571
572 /** Return the last error.
573
574 This returns the last error code which
575 was generated in the most recent call
576 to @ref write_some.
577
578 @par Complexity
579 Constant.
580
581 @par Exception Safety
582 No-throw guarantee.
583 */
584 error_code
585 last_error() const noexcept
586 {
587 return ec_;
588 }
589
590 /** Return true if a complete JSON has been parsed.
591
592 This function returns `true` when all of these
593 conditions are met:
594
595 @li A complete serialized JSON has been
596 presented to the parser, and
597
598 @li No error or exception has occurred since the
599 parser was constructed, or since the last call
600 to @ref reset,
601
602 @par Complexity
603 Constant.
604
605 @par Exception Safety
606 No-throw guarantee.
607 */
608 bool
609 done() const noexcept
610 {
611 return done_;
612 }
613
614 /** Reset the state, to parse a new document.
615
616 This function discards the current parsing
617 state, to prepare for parsing a new document.
618 Dynamically allocated temporary memory used
619 by the implementation is not deallocated.
620
621 @par Complexity
622 Constant.
623
624 @par Exception Safety
625 No-throw guarantee.
626 */
627 void
628 reset() noexcept;
629
630 /** Indicate a parsing failure.
631
632 This changes the state of the parser to indicate
633 that the parse has failed. A parser implementation
634 can use this to fail the parser if needed due to
635 external inputs.
636
637 @note
638
639 If `!ec`, the stored error code is unspecified.
640
641 @par Complexity
642 Constant.
643
644 @par Exception Safety
645 No-throw guarantee.
646
647 @param ec The error code to set. If the code does
648 not indicate failure, an implementation-defined
649 error code that indicates failure will be stored
650 instead.
651 */
652 void
653 fail(error_code ec) noexcept;
654
655 /** Parse some of an input string as JSON, incrementally.
656
657 This function parses the JSON in the specified
658 buffer, calling the handler to emit each SAX
659 parsing event. The parse proceeds from the
660 current state, which is at the beginning of a
661 new JSON or in the middle of the current JSON
662 if any characters were already parsed.
663 \n
664 The characters in the buffer are processed
665 starting from the beginning, until one of the
666 following conditions is met:
667
668 @li All of the characters in the buffer
669 have been parsed, or
670
671 @li Some of the characters in the buffer
672 have been parsed and the JSON is complete, or
673
674 @li A parsing error occurs.
675
676 The supplied buffer does not need to contain the
677 entire JSON. Subsequent calls can provide more
678 serialized data, allowing JSON to be processed
679 incrementally. The end of the serialized JSON
680 can be indicated by passing `more = false`.
681
682 @par Complexity
683 Linear in `size`.
684
685 @par Exception Safety
686 Basic guarantee.
687 Calls to the handler may throw.
688 Upon error or exception, subsequent calls will
689 fail until @ref reset is called to parse a new JSON.
690
691 @return The number of characters successfully
692 parsed, which may be smaller than `size`.
693
694 @param more `true` if there are possibly more
695 buffers in the current JSON, otherwise `false`.
696
697 @param data A pointer to a buffer of `size`
698 characters to parse.
699
700 @param size The number of characters pointed to
701 by `data`.
702
703 @param ec Set to the error, if any occurred.
704 */
705 std::size_t
706 write_some(
707 bool more,
708 char const* data,
709 std::size_t size,
710 error_code& ec);
711 };
712
713 BOOST_JSON_NS_END
714
715 #endif
716