1 //
2 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Official repository: https://github.com/boostorg/json
9 //
10 
11 #ifndef BOOST_JSON_BASIC_PARSER_HPP
12 #define BOOST_JSON_BASIC_PARSER_HPP
13 
14 #include <boost/json/detail/config.hpp>
15 #include <boost/json/error.hpp>
16 #include <boost/json/kind.hpp>
17 #include <boost/json/parse_options.hpp>
18 #include <boost/json/detail/stack.hpp>
19 #include <boost/json/detail/stream.hpp>
20 #include <boost/json/detail/utf8.hpp>
21 
22 /*  VFALCO NOTE
23 
24     This file is in the detail namespace because it
25     is not allowed to be included directly by users,
26     who should be including <boost/json/basic_parser.hpp>
27     instead, which provides the member function definitions.
28 
29     The source code is arranged this way to keep compile
30     times down.
31 */
32 
33 BOOST_JSON_NS_BEGIN
34 
35 /** An incremental SAX parser for serialized JSON.
36 
37     This implements a SAX-style parser, invoking a
38     caller-supplied handler with each parsing event.
39     To use, first declare a variable of type
40     `basic_parser<T>` where `T` meets the handler
41     requirements specified below. Then call
42     @ref write_some one or more times with the input,
43     setting `more = false` on the final buffer.
44     The parsing events are realized through member
45     function calls on the handler, which exists
46     as a data member of the parser.
47 \n
48     The parser may dynamically allocate intermediate
49     storage as needed to accommodate the nesting level
50     of the input JSON. On subsequent invocations, the
51     parser can cheaply re-use this memory, improving
52     performance. This storage is freed when the
53     parser is destroyed
54 
55     @par Usage
56 
57     To get the declaration and function definitions
58     for this class it is necessary to include this
59     file instead:
60     @code
61     #include <boost/json/basic_parser_impl.hpp>
62     @endcode
63 
64     Users who wish to parse JSON into the DOM container
65     @ref value will not use this class directly; instead
66     they will create an instance of @ref parser or
67     @ref stream_parser and use that instead. Alternatively,
68     they may call the function @ref parse. This class is
69     designed for users who wish to perform custom actions
70     instead of building a @ref value. For example, to
71     produce a DOM from an external library.
72 \n
73     @note
74 
75     By default, only conforming JSON using UTF-8
76     encoding is accepted. However, select non-compliant
77     syntax can be allowed by construction using a
78     @ref parse_options set to desired values.
79 
80     @par Handler
81 
82     The handler provided must be implemented as an
83     object of class type which defines each of the
84     required event member functions below. The event
85     functions return a `bool` where `true` indicates
86     success, and `false` indicates failure. If the
87     member function returns `false`, it must set
88     the error code to a suitable value. This error
89     code will be returned by the write function to
90     the caller.
91 \n
92     Handlers are required to declare the maximum
93     limits on various elements. If these limits
94     are exceeded during parsing, then parsing
95     fails with an error.
96 \n
97     The following declaration meets the parser's
98     handler requirements:
99 
100     @code
101     struct handler
102     {
103         /// The maximum number of elements allowed in an array
104         static constexpr std::size_t max_array_size = -1;
105 
106         /// The maximum number of elements allowed in an object
107         static constexpr std::size_t max_object_size = -1;
108 
109         /// The maximum number of characters allowed in a string
110         static constexpr std::size_t max_string_size = -1;
111 
112         /// The maximum number of characters allowed in a key
113         static constexpr std::size_t max_key_size = -1;
114 
115         /// Called once when the JSON parsing begins.
116         ///
117         /// @return `true` on success.
118         /// @param ec Set to the error, if any occurred.
119         ///
120         bool on_document_begin( error_code& ec );
121 
122         /// Called when the JSON parsing is done.
123         ///
124         /// @return `true` on success.
125         /// @param ec Set to the error, if any occurred.
126         ///
127         bool on_document_end( error_code& ec );
128 
129         /// Called when the beginning of an array is encountered.
130         ///
131         /// @return `true` on success.
132         /// @param ec Set to the error, if any occurred.
133         ///
134         bool on_array_begin( error_code& ec );
135 
136         /// Called when the end of the current array is encountered.
137         ///
138         /// @return `true` on success.
139         /// @param n The number of elements in the array.
140         /// @param ec Set to the error, if any occurred.
141         ///
142         bool on_array_end( std::size_t n, error_code& ec );
143 
144         /// Called when the beginning of an object is encountered.
145         ///
146         /// @return `true` on success.
147         /// @param ec Set to the error, if any occurred.
148         ///
149         bool on_object_begin( error_code& ec );
150 
151         /// Called when the end of the current object is encountered.
152         ///
153         /// @return `true` on success.
154         /// @param n The number of elements in the object.
155         /// @param ec Set to the error, if any occurred.
156         ///
157         bool on_object_end( std::size_t n, error_code& ec );
158 
159         /// Called with characters corresponding to part of the current string.
160         ///
161         /// @return `true` on success.
162         /// @param s The partial characters
163         /// @param n The total size of the string thus far
164         /// @param ec Set to the error, if any occurred.
165         ///
166         bool on_string_part( string_view s, std::size_t n, error_code& ec );
167 
168         /// Called with the last characters corresponding to the current string.
169         ///
170         /// @return `true` on success.
171         /// @param s The remaining characters
172         /// @param n The total size of the string
173         /// @param ec Set to the error, if any occurred.
174         ///
175         bool on_string( string_view s, std::size_t n, error_code& ec );
176 
177         /// Called with characters corresponding to part of the current key.
178         ///
179         /// @return `true` on success.
180         /// @param s The partial characters
181         /// @param n The total size of the key thus far
182         /// @param ec Set to the error, if any occurred.
183         ///
184         bool on_key_part( string_view s, std::size_t n, error_code& ec );
185 
186         /// Called with the last characters corresponding to the current key.
187         ///
188         /// @return `true` on success.
189         /// @param s The remaining characters
190         /// @param n The total size of the key
191         /// @param ec Set to the error, if any occurred.
192         ///
193         bool on_key( string_view s, std::size_t n, error_code& ec );
194 
195         /// Called with the characters corresponding to part of the current number.
196         ///
197         /// @return `true` on success.
198         /// @param s The partial characters
199         /// @param ec Set to the error, if any occurred.
200         ///
201         bool on_number_part( string_view s, error_code& ec );
202 
203         /// Called when a signed integer is parsed.
204         ///
205         /// @return `true` on success.
206         /// @param i The value
207         /// @param s The remaining characters
208         /// @param ec Set to the error, if any occurred.
209         ///
210         bool on_int64( int64_t i, string_view s, error_code& ec );
211 
212         /// Called when an unsigend integer is parsed.
213         ///
214         /// @return `true` on success.
215         /// @param u The value
216         /// @param s The remaining characters
217         /// @param ec Set to the error, if any occurred.
218         ///
219         bool on_uint64( uint64_t u, string_view s, error_code& ec );
220 
221         /// Called when a double is parsed.
222         ///
223         /// @return `true` on success.
224         /// @param d The value
225         /// @param s The remaining characters
226         /// @param ec Set to the error, if any occurred.
227         ///
228         bool on_double( double d, string_view s, error_code& ec );
229 
230         /// Called when a boolean is parsed.
231         ///
232         /// @return `true` on success.
233         /// @param b The value
234         /// @param s The remaining characters
235         /// @param ec Set to the error, if any occurred.
236         ///
237         bool on_bool( bool b, error_code& ec );
238 
239         /// Called when a null is parsed.
240         ///
241         /// @return `true` on success.
242         /// @param ec Set to the error, if any occurred.
243         ///
244         bool on_null( error_code& ec );
245 
246         /// Called with characters corresponding to part of the current comment.
247         ///
248         /// @return `true` on success.
249         /// @param s The partial characters.
250         /// @param ec Set to the error, if any occurred.
251         ///
252         bool on_comment_part( string_view s, error_code& ec );
253 
254         /// Called with the last characters corresponding to the current comment.
255         ///
256         /// @return `true` on success.
257         /// @param s The remaining characters
258         /// @param ec Set to the error, if any occurred.
259         ///
260         bool on_comment( string_view s, error_code& ec );
261     };
262     @endcode
263 
264     @see
265         @ref parse,
266         @ref stream_parser.
267 
268     @headerfile <boost/json/basic_parser.hpp>
269 */
270 template<class Handler>
271 class basic_parser
272 {
273     enum class state : char
274     {
275         doc1,  doc2,  doc3, doc4,
276         com1,  com2,  com3, com4,
277         nul1,  nul2,  nul3,
278         tru1,  tru2,  tru3,
279         fal1,  fal2,  fal3,  fal4,
280         str1,  str2,  str3,  str4,
281         str5,  str6,  str7,  str8,
282         sur1,  sur2,  sur3,
283         sur4,  sur5,  sur6,
284         obj1,  obj2,  obj3,  obj4,
285         obj5,  obj6,  obj7,  obj8,
286         obj9,  obj10, obj11,
287         arr1,  arr2,  arr3,
288         arr4,  arr5,  arr6,
289         num1,  num2,  num3,  num4,
290         num5,  num6,  num7,  num8,
291         exp1,  exp2,  exp3,
292         val1,  val2
293     };
294 
295     struct number
296     {
297         uint64_t mant;
298         int bias;
299         int exp;
300         bool frac;
301         bool neg;
302     };
303 
304     // optimization: must come first
305     Handler h_;
306 
307     number num_;
308     error_code ec_;
309     detail::stack st_;
310     detail::utf8_sequence seq_;
311     unsigned u1_;
312     unsigned u2_;
313     bool more_; // false for final buffer
314     bool done_ = false; // true on complete parse
315     bool clean_ = true; // write_some exited cleanly
316     const char* end_;
317     parse_options opt_;
318     // how many levels deeper the parser can go
319     std::size_t depth_ = opt_.max_depth;
320 
321     inline void reserve();
322     inline const char* sentinel();
323     inline bool incomplete(
324         const detail::const_stream_wrapper& cs);
325 
326 #ifdef __INTEL_COMPILER
327 #pragma warning push
328 #pragma warning disable 2196
329 #endif
330 
331     BOOST_NOINLINE
332     inline
333     const char*
334     suspend_or_fail(state st);
335 
336     BOOST_NOINLINE
337     inline
338     const char*
339     suspend_or_fail(
340         state st,
341         std::size_t n);
342 
343     BOOST_NOINLINE
344     inline
345     const char*
346     fail(const char* p) noexcept;
347 
348     BOOST_NOINLINE
349     inline
350     const char*
351     fail(
352         const char* p,
353         error ev) noexcept;
354 
355     BOOST_NOINLINE
356     inline
357     const char*
358     maybe_suspend(
359         const char* p,
360         state st);
361 
362     BOOST_NOINLINE
363     inline
364     const char*
365     maybe_suspend(
366         const char* p,
367         state st,
368         std::size_t n);
369 
370     BOOST_NOINLINE
371     inline
372     const char*
373     maybe_suspend(
374         const char* p,
375         state st,
376         const number& num);
377 
378     BOOST_NOINLINE
379     inline
380     const char*
381     suspend(
382         const char* p,
383         state st);
384 
385     BOOST_NOINLINE
386     inline
387     const char*
388     suspend(
389         const char* p,
390         state st,
391         const number& num);
392 
393 #ifdef __INTEL_COMPILER
394 #pragma warning pop
395 #endif
396 
397     template<bool StackEmpty_/*, bool Terminal_*/>
398     const char* parse_comment(const char* p,
399         std::integral_constant<bool, StackEmpty_> stack_empty,
400         /*std::integral_constant<bool, Terminal_>*/ bool terminal);
401 
402     template<bool StackEmpty_>
403     const char* parse_document(const char* p,
404         std::integral_constant<bool, StackEmpty_> stack_empty);
405 
406     template<bool StackEmpty_, bool AllowComments_/*,
407         bool AllowTrailing_, bool AllowBadUTF8_*/>
408     const char* parse_value(const char* p,
409         std::integral_constant<bool, StackEmpty_> stack_empty,
410         std::integral_constant<bool, AllowComments_> allow_comments,
411         /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
412         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
413 
414     template<bool StackEmpty_, bool AllowComments_/*,
415         bool AllowTrailing_, bool AllowBadUTF8_*/>
416     const char* resume_value(const char* p,
417         std::integral_constant<bool, StackEmpty_> stack_empty,
418         std::integral_constant<bool, AllowComments_> allow_comments,
419         /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
420         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
421 
422     template<bool StackEmpty_, bool AllowComments_/*,
423         bool AllowTrailing_, bool AllowBadUTF8_*/>
424     const char* parse_object(const char* p,
425         std::integral_constant<bool, StackEmpty_> stack_empty,
426         std::integral_constant<bool, AllowComments_> allow_comments,
427         /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
428         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
429 
430     template<bool StackEmpty_, bool AllowComments_/*,
431         bool AllowTrailing_, bool AllowBadUTF8_*/>
432     const char* parse_array(const char* p,
433         std::integral_constant<bool, StackEmpty_> stack_empty,
434         std::integral_constant<bool, AllowComments_> allow_comments,
435         /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
436         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
437 
438     template<bool StackEmpty_>
439     const char* parse_null(const char* p,
440         std::integral_constant<bool, StackEmpty_> stack_empty);
441 
442     template<bool StackEmpty_>
443     const char* parse_true(const char* p,
444         std::integral_constant<bool, StackEmpty_> stack_empty);
445 
446     template<bool StackEmpty_>
447     const char* parse_false(const char* p,
448         std::integral_constant<bool, StackEmpty_> stack_empty);
449 
450     template<bool StackEmpty_, bool IsKey_/*,
451         bool AllowBadUTF8_*/>
452     const char* parse_string(const char* p,
453         std::integral_constant<bool, StackEmpty_> stack_empty,
454         std::integral_constant<bool, IsKey_> is_key,
455         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
456 
457     template<bool StackEmpty_, char First_>
458     const char* parse_number(const char* p,
459         std::integral_constant<bool, StackEmpty_> stack_empty,
460         std::integral_constant<char, First_> first);
461 
462     template<bool StackEmpty_, bool IsKey_/*,
463         bool AllowBadUTF8_*/>
464     const char* parse_unescaped(const char* p,
465         std::integral_constant<bool, StackEmpty_> stack_empty,
466         std::integral_constant<bool, IsKey_> is_key,
467         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
468 
469     template<bool StackEmpty_/*, bool IsKey_,
470         bool AllowBadUTF8_*/>
471     const char* parse_escaped(
472         const char* p,
473         std::size_t total,
474         std::integral_constant<bool, StackEmpty_> stack_empty,
475         /*std::integral_constant<bool, IsKey_>*/ bool is_key,
476         /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
477 
478     // intentionally private
479     std::size_t
depth() const480     depth() const noexcept
481     {
482         return opt_.max_depth - depth_;
483     }
484 
485 public:
486     /// Copy constructor (deleted)
487     basic_parser(
488         basic_parser const&) = delete;
489 
490     /// Copy assignment (deleted)
491     basic_parser& operator=(
492         basic_parser const&) = delete;
493 
494     /** Destructor.
495 
496         All dynamically allocated internal memory is freed.
497 
498         @par Effects
499         @code
500         this->handler().~Handler()
501         @endcode
502 
503         @par Complexity
504         Same as `~Handler()`.
505 
506         @par Exception Safety
507         Same as `~Handler()`.
508     */
509     ~basic_parser() = default;
510 
511     /** Constructor.
512 
513         This function constructs the parser with
514         the specified options, with any additional
515         arguments forwarded to the handler's constructor.
516 
517         @par Complexity
518         Same as `Handler( std::forward< Args >( args )... )`.
519 
520         @par Exception Safety
521         Same as `Handler( std::forward< Args >( args )... )`.
522 
523         @param opt Configuration settings for the parser.
524         If this structure is default constructed, the
525         parser will accept only standard JSON.
526 
527         @param args Optional additional arguments
528         forwarded to the handler's constructor.
529 
530         @see parse_options
531     */
532     template<class... Args>
533     explicit
534     basic_parser(
535         parse_options const& opt,
536         Args&&... args);
537 
538     /** Return a reference to the handler.
539 
540         This function provides access to the constructed
541         instance of the handler owned by the parser.
542 
543         @par Complexity
544         Constant.
545 
546         @par Exception Safety
547         No-throw guarantee.
548     */
549     Handler&
handler()550     handler() noexcept
551     {
552         return h_;
553     }
554 
555     /** Return a reference to the handler.
556 
557         This function provides access to the constructed
558         instance of the handler owned by the parser.
559 
560         @par Complexity
561         Constant.
562 
563         @par Exception Safety
564         No-throw guarantee.
565     */
566     Handler const&
handler() const567     handler() const noexcept
568     {
569         return h_;
570     }
571 
572     /** Return the last error.
573 
574         This returns the last error code which
575         was generated in the most recent call
576         to @ref write_some.
577 
578         @par Complexity
579         Constant.
580 
581         @par Exception Safety
582         No-throw guarantee.
583     */
584     error_code
last_error() const585     last_error() const noexcept
586     {
587         return ec_;
588     }
589 
590     /** Return true if a complete JSON has been parsed.
591 
592         This function returns `true` when all of these
593         conditions are met:
594 
595         @li A complete serialized JSON has been
596             presented to the parser, and
597 
598         @li No error or exception has occurred since the
599             parser was constructed, or since the last call
600             to @ref reset,
601 
602         @par Complexity
603         Constant.
604 
605         @par Exception Safety
606         No-throw guarantee.
607     */
608     bool
done() const609     done() const noexcept
610     {
611         return done_;
612     }
613 
614     /** Reset the state, to parse a new document.
615 
616         This function discards the current parsing
617         state, to prepare for parsing a new document.
618         Dynamically allocated temporary memory used
619         by the implementation is not deallocated.
620 
621         @par Complexity
622         Constant.
623 
624         @par Exception Safety
625         No-throw guarantee.
626     */
627     void
628     reset() noexcept;
629 
630     /** Indicate a parsing failure.
631 
632         This changes the state of the parser to indicate
633         that the parse has failed. A parser implementation
634         can use this to fail the parser if needed due to
635         external inputs.
636 
637         @note
638 
639         If `!ec`, the stored error code is unspecified.
640 
641         @par Complexity
642         Constant.
643 
644         @par Exception Safety
645         No-throw guarantee.
646 
647         @param ec The error code to set. If the code does
648         not indicate failure, an implementation-defined
649         error code that indicates failure will be stored
650         instead.
651     */
652     void
653     fail(error_code ec) noexcept;
654 
655     /** Parse some of an input string as JSON, incrementally.
656 
657         This function parses the JSON in the specified
658         buffer, calling the handler to emit each SAX
659         parsing event. The parse proceeds from the
660         current state, which is at the beginning of a
661         new JSON or in the middle of the current JSON
662         if any characters were already parsed.
663     \n
664         The characters in the buffer are processed
665         starting from the beginning, until one of the
666         following conditions is met:
667 
668         @li All of the characters in the buffer
669         have been parsed, or
670 
671         @li Some of the characters in the buffer
672         have been parsed and the JSON is complete, or
673 
674         @li A parsing error occurs.
675 
676         The supplied buffer does not need to contain the
677         entire JSON. Subsequent calls can provide more
678         serialized data, allowing JSON to be processed
679         incrementally. The end of the serialized JSON
680         can be indicated by passing `more = false`.
681 
682         @par Complexity
683         Linear in `size`.
684 
685         @par Exception Safety
686         Basic guarantee.
687         Calls to the handler may throw.
688         Upon error or exception, subsequent calls will
689         fail until @ref reset is called to parse a new JSON.
690 
691         @return The number of characters successfully
692         parsed, which may be smaller than `size`.
693 
694         @param more `true` if there are possibly more
695         buffers in the current JSON, otherwise `false`.
696 
697         @param data A pointer to a buffer of `size`
698         characters to parse.
699 
700         @param size The number of characters pointed to
701         by `data`.
702 
703         @param ec Set to the error, if any occurred.
704     */
705     std::size_t
706     write_some(
707         bool more,
708         char const* data,
709         std::size_t size,
710         error_code& ec);
711 };
712 
713 BOOST_JSON_NS_END
714 
715 #endif
716