1 // 2 // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com) 3 // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com) 4 // 5 // Distributed under the Boost Software License, Version 1.0. (See accompanying 6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 // 8 // Official repository: https://github.com/boostorg/json 9 // 10 11 #ifndef BOOST_JSON_BASIC_PARSER_HPP 12 #define BOOST_JSON_BASIC_PARSER_HPP 13 14 #include <boost/json/detail/config.hpp> 15 #include <boost/json/error.hpp> 16 #include <boost/json/kind.hpp> 17 #include <boost/json/parse_options.hpp> 18 #include <boost/json/detail/stack.hpp> 19 #include <boost/json/detail/stream.hpp> 20 #include <boost/json/detail/utf8.hpp> 21 22 /* VFALCO NOTE 23 24 This file is in the detail namespace because it 25 is not allowed to be included directly by users, 26 who should be including <boost/json/basic_parser.hpp> 27 instead, which provides the member function definitions. 28 29 The source code is arranged this way to keep compile 30 times down. 31 */ 32 33 BOOST_JSON_NS_BEGIN 34 35 /** An incremental SAX parser for serialized JSON. 36 37 This implements a SAX-style parser, invoking a 38 caller-supplied handler with each parsing event. 39 To use, first declare a variable of type 40 `basic_parser<T>` where `T` meets the handler 41 requirements specified below. Then call 42 @ref write_some one or more times with the input, 43 setting `more = false` on the final buffer. 44 The parsing events are realized through member 45 function calls on the handler, which exists 46 as a data member of the parser. 47 \n 48 The parser may dynamically allocate intermediate 49 storage as needed to accommodate the nesting level 50 of the input JSON. On subsequent invocations, the 51 parser can cheaply re-use this memory, improving 52 performance. This storage is freed when the 53 parser is destroyed 54 55 @par Usage 56 57 To get the declaration and function definitions 58 for this class it is necessary to include this 59 file instead: 60 @code 61 #include <boost/json/basic_parser_impl.hpp> 62 @endcode 63 64 Users who wish to parse JSON into the DOM container 65 @ref value will not use this class directly; instead 66 they will create an instance of @ref parser or 67 @ref stream_parser and use that instead. Alternatively, 68 they may call the function @ref parse. This class is 69 designed for users who wish to perform custom actions 70 instead of building a @ref value. For example, to 71 produce a DOM from an external library. 72 \n 73 @note 74 75 By default, only conforming JSON using UTF-8 76 encoding is accepted. However, select non-compliant 77 syntax can be allowed by construction using a 78 @ref parse_options set to desired values. 79 80 @par Handler 81 82 The handler provided must be implemented as an 83 object of class type which defines each of the 84 required event member functions below. The event 85 functions return a `bool` where `true` indicates 86 success, and `false` indicates failure. If the 87 member function returns `false`, it must set 88 the error code to a suitable value. This error 89 code will be returned by the write function to 90 the caller. 91 \n 92 Handlers are required to declare the maximum 93 limits on various elements. If these limits 94 are exceeded during parsing, then parsing 95 fails with an error. 96 \n 97 The following declaration meets the parser's 98 handler requirements: 99 100 @code 101 struct handler 102 { 103 /// The maximum number of elements allowed in an array 104 static constexpr std::size_t max_array_size = -1; 105 106 /// The maximum number of elements allowed in an object 107 static constexpr std::size_t max_object_size = -1; 108 109 /// The maximum number of characters allowed in a string 110 static constexpr std::size_t max_string_size = -1; 111 112 /// The maximum number of characters allowed in a key 113 static constexpr std::size_t max_key_size = -1; 114 115 /// Called once when the JSON parsing begins. 116 /// 117 /// @return `true` on success. 118 /// @param ec Set to the error, if any occurred. 119 /// 120 bool on_document_begin( error_code& ec ); 121 122 /// Called when the JSON parsing is done. 123 /// 124 /// @return `true` on success. 125 /// @param ec Set to the error, if any occurred. 126 /// 127 bool on_document_end( error_code& ec ); 128 129 /// Called when the beginning of an array is encountered. 130 /// 131 /// @return `true` on success. 132 /// @param ec Set to the error, if any occurred. 133 /// 134 bool on_array_begin( error_code& ec ); 135 136 /// Called when the end of the current array is encountered. 137 /// 138 /// @return `true` on success. 139 /// @param n The number of elements in the array. 140 /// @param ec Set to the error, if any occurred. 141 /// 142 bool on_array_end( std::size_t n, error_code& ec ); 143 144 /// Called when the beginning of an object is encountered. 145 /// 146 /// @return `true` on success. 147 /// @param ec Set to the error, if any occurred. 148 /// 149 bool on_object_begin( error_code& ec ); 150 151 /// Called when the end of the current object is encountered. 152 /// 153 /// @return `true` on success. 154 /// @param n The number of elements in the object. 155 /// @param ec Set to the error, if any occurred. 156 /// 157 bool on_object_end( std::size_t n, error_code& ec ); 158 159 /// Called with characters corresponding to part of the current string. 160 /// 161 /// @return `true` on success. 162 /// @param s The partial characters 163 /// @param n The total size of the string thus far 164 /// @param ec Set to the error, if any occurred. 165 /// 166 bool on_string_part( string_view s, std::size_t n, error_code& ec ); 167 168 /// Called with the last characters corresponding to the current string. 169 /// 170 /// @return `true` on success. 171 /// @param s The remaining characters 172 /// @param n The total size of the string 173 /// @param ec Set to the error, if any occurred. 174 /// 175 bool on_string( string_view s, std::size_t n, error_code& ec ); 176 177 /// Called with characters corresponding to part of the current key. 178 /// 179 /// @return `true` on success. 180 /// @param s The partial characters 181 /// @param n The total size of the key thus far 182 /// @param ec Set to the error, if any occurred. 183 /// 184 bool on_key_part( string_view s, std::size_t n, error_code& ec ); 185 186 /// Called with the last characters corresponding to the current key. 187 /// 188 /// @return `true` on success. 189 /// @param s The remaining characters 190 /// @param n The total size of the key 191 /// @param ec Set to the error, if any occurred. 192 /// 193 bool on_key( string_view s, std::size_t n, error_code& ec ); 194 195 /// Called with the characters corresponding to part of the current number. 196 /// 197 /// @return `true` on success. 198 /// @param s The partial characters 199 /// @param ec Set to the error, if any occurred. 200 /// 201 bool on_number_part( string_view s, error_code& ec ); 202 203 /// Called when a signed integer is parsed. 204 /// 205 /// @return `true` on success. 206 /// @param i The value 207 /// @param s The remaining characters 208 /// @param ec Set to the error, if any occurred. 209 /// 210 bool on_int64( int64_t i, string_view s, error_code& ec ); 211 212 /// Called when an unsigend integer is parsed. 213 /// 214 /// @return `true` on success. 215 /// @param u The value 216 /// @param s The remaining characters 217 /// @param ec Set to the error, if any occurred. 218 /// 219 bool on_uint64( uint64_t u, string_view s, error_code& ec ); 220 221 /// Called when a double is parsed. 222 /// 223 /// @return `true` on success. 224 /// @param d The value 225 /// @param s The remaining characters 226 /// @param ec Set to the error, if any occurred. 227 /// 228 bool on_double( double d, string_view s, error_code& ec ); 229 230 /// Called when a boolean is parsed. 231 /// 232 /// @return `true` on success. 233 /// @param b The value 234 /// @param s The remaining characters 235 /// @param ec Set to the error, if any occurred. 236 /// 237 bool on_bool( bool b, error_code& ec ); 238 239 /// Called when a null is parsed. 240 /// 241 /// @return `true` on success. 242 /// @param ec Set to the error, if any occurred. 243 /// 244 bool on_null( error_code& ec ); 245 246 /// Called with characters corresponding to part of the current comment. 247 /// 248 /// @return `true` on success. 249 /// @param s The partial characters. 250 /// @param ec Set to the error, if any occurred. 251 /// 252 bool on_comment_part( string_view s, error_code& ec ); 253 254 /// Called with the last characters corresponding to the current comment. 255 /// 256 /// @return `true` on success. 257 /// @param s The remaining characters 258 /// @param ec Set to the error, if any occurred. 259 /// 260 bool on_comment( string_view s, error_code& ec ); 261 }; 262 @endcode 263 264 @see 265 @ref parse, 266 @ref stream_parser. 267 268 @headerfile <boost/json/basic_parser.hpp> 269 */ 270 template<class Handler> 271 class basic_parser 272 { 273 enum class state : char 274 { 275 doc1, doc2, doc3, doc4, 276 com1, com2, com3, com4, 277 nul1, nul2, nul3, 278 tru1, tru2, tru3, 279 fal1, fal2, fal3, fal4, 280 str1, str2, str3, str4, 281 str5, str6, str7, str8, 282 sur1, sur2, sur3, 283 sur4, sur5, sur6, 284 obj1, obj2, obj3, obj4, 285 obj5, obj6, obj7, obj8, 286 obj9, obj10, obj11, 287 arr1, arr2, arr3, 288 arr4, arr5, arr6, 289 num1, num2, num3, num4, 290 num5, num6, num7, num8, 291 exp1, exp2, exp3, 292 val1, val2 293 }; 294 295 struct number 296 { 297 uint64_t mant; 298 int bias; 299 int exp; 300 bool frac; 301 bool neg; 302 }; 303 304 // optimization: must come first 305 Handler h_; 306 307 number num_; 308 error_code ec_; 309 detail::stack st_; 310 detail::utf8_sequence seq_; 311 unsigned u1_; 312 unsigned u2_; 313 bool more_; // false for final buffer 314 bool done_ = false; // true on complete parse 315 bool clean_ = true; // write_some exited cleanly 316 const char* end_; 317 parse_options opt_; 318 // how many levels deeper the parser can go 319 std::size_t depth_ = opt_.max_depth; 320 321 inline void reserve(); 322 inline const char* sentinel(); 323 inline bool incomplete( 324 const detail::const_stream_wrapper& cs); 325 326 #ifdef __INTEL_COMPILER 327 #pragma warning push 328 #pragma warning disable 2196 329 #endif 330 331 BOOST_NOINLINE 332 inline 333 const char* 334 suspend_or_fail(state st); 335 336 BOOST_NOINLINE 337 inline 338 const char* 339 suspend_or_fail( 340 state st, 341 std::size_t n); 342 343 BOOST_NOINLINE 344 inline 345 const char* 346 fail(const char* p) noexcept; 347 348 BOOST_NOINLINE 349 inline 350 const char* 351 fail( 352 const char* p, 353 error ev) noexcept; 354 355 BOOST_NOINLINE 356 inline 357 const char* 358 maybe_suspend( 359 const char* p, 360 state st); 361 362 BOOST_NOINLINE 363 inline 364 const char* 365 maybe_suspend( 366 const char* p, 367 state st, 368 std::size_t n); 369 370 BOOST_NOINLINE 371 inline 372 const char* 373 maybe_suspend( 374 const char* p, 375 state st, 376 const number& num); 377 378 BOOST_NOINLINE 379 inline 380 const char* 381 suspend( 382 const char* p, 383 state st); 384 385 BOOST_NOINLINE 386 inline 387 const char* 388 suspend( 389 const char* p, 390 state st, 391 const number& num); 392 393 #ifdef __INTEL_COMPILER 394 #pragma warning pop 395 #endif 396 397 template<bool StackEmpty_/*, bool Terminal_*/> 398 const char* parse_comment(const char* p, 399 std::integral_constant<bool, StackEmpty_> stack_empty, 400 /*std::integral_constant<bool, Terminal_>*/ bool terminal); 401 402 template<bool StackEmpty_> 403 const char* parse_document(const char* p, 404 std::integral_constant<bool, StackEmpty_> stack_empty); 405 406 template<bool StackEmpty_, bool AllowComments_/*, 407 bool AllowTrailing_, bool AllowBadUTF8_*/> 408 const char* parse_value(const char* p, 409 std::integral_constant<bool, StackEmpty_> stack_empty, 410 std::integral_constant<bool, AllowComments_> allow_comments, 411 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing, 412 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 413 414 template<bool StackEmpty_, bool AllowComments_/*, 415 bool AllowTrailing_, bool AllowBadUTF8_*/> 416 const char* resume_value(const char* p, 417 std::integral_constant<bool, StackEmpty_> stack_empty, 418 std::integral_constant<bool, AllowComments_> allow_comments, 419 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing, 420 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 421 422 template<bool StackEmpty_, bool AllowComments_/*, 423 bool AllowTrailing_, bool AllowBadUTF8_*/> 424 const char* parse_object(const char* p, 425 std::integral_constant<bool, StackEmpty_> stack_empty, 426 std::integral_constant<bool, AllowComments_> allow_comments, 427 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing, 428 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 429 430 template<bool StackEmpty_, bool AllowComments_/*, 431 bool AllowTrailing_, bool AllowBadUTF8_*/> 432 const char* parse_array(const char* p, 433 std::integral_constant<bool, StackEmpty_> stack_empty, 434 std::integral_constant<bool, AllowComments_> allow_comments, 435 /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing, 436 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 437 438 template<bool StackEmpty_> 439 const char* parse_null(const char* p, 440 std::integral_constant<bool, StackEmpty_> stack_empty); 441 442 template<bool StackEmpty_> 443 const char* parse_true(const char* p, 444 std::integral_constant<bool, StackEmpty_> stack_empty); 445 446 template<bool StackEmpty_> 447 const char* parse_false(const char* p, 448 std::integral_constant<bool, StackEmpty_> stack_empty); 449 450 template<bool StackEmpty_, bool IsKey_/*, 451 bool AllowBadUTF8_*/> 452 const char* parse_string(const char* p, 453 std::integral_constant<bool, StackEmpty_> stack_empty, 454 std::integral_constant<bool, IsKey_> is_key, 455 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 456 457 template<bool StackEmpty_, char First_> 458 const char* parse_number(const char* p, 459 std::integral_constant<bool, StackEmpty_> stack_empty, 460 std::integral_constant<char, First_> first); 461 462 template<bool StackEmpty_, bool IsKey_/*, 463 bool AllowBadUTF8_*/> 464 const char* parse_unescaped(const char* p, 465 std::integral_constant<bool, StackEmpty_> stack_empty, 466 std::integral_constant<bool, IsKey_> is_key, 467 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 468 469 template<bool StackEmpty_/*, bool IsKey_, 470 bool AllowBadUTF8_*/> 471 const char* parse_escaped( 472 const char* p, 473 std::size_t total, 474 std::integral_constant<bool, StackEmpty_> stack_empty, 475 /*std::integral_constant<bool, IsKey_>*/ bool is_key, 476 /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8); 477 478 // intentionally private 479 std::size_t depth() const480 depth() const noexcept 481 { 482 return opt_.max_depth - depth_; 483 } 484 485 public: 486 /// Copy constructor (deleted) 487 basic_parser( 488 basic_parser const&) = delete; 489 490 /// Copy assignment (deleted) 491 basic_parser& operator=( 492 basic_parser const&) = delete; 493 494 /** Destructor. 495 496 All dynamically allocated internal memory is freed. 497 498 @par Effects 499 @code 500 this->handler().~Handler() 501 @endcode 502 503 @par Complexity 504 Same as `~Handler()`. 505 506 @par Exception Safety 507 Same as `~Handler()`. 508 */ 509 ~basic_parser() = default; 510 511 /** Constructor. 512 513 This function constructs the parser with 514 the specified options, with any additional 515 arguments forwarded to the handler's constructor. 516 517 @par Complexity 518 Same as `Handler( std::forward< Args >( args )... )`. 519 520 @par Exception Safety 521 Same as `Handler( std::forward< Args >( args )... )`. 522 523 @param opt Configuration settings for the parser. 524 If this structure is default constructed, the 525 parser will accept only standard JSON. 526 527 @param args Optional additional arguments 528 forwarded to the handler's constructor. 529 530 @see parse_options 531 */ 532 template<class... Args> 533 explicit 534 basic_parser( 535 parse_options const& opt, 536 Args&&... args); 537 538 /** Return a reference to the handler. 539 540 This function provides access to the constructed 541 instance of the handler owned by the parser. 542 543 @par Complexity 544 Constant. 545 546 @par Exception Safety 547 No-throw guarantee. 548 */ 549 Handler& handler()550 handler() noexcept 551 { 552 return h_; 553 } 554 555 /** Return a reference to the handler. 556 557 This function provides access to the constructed 558 instance of the handler owned by the parser. 559 560 @par Complexity 561 Constant. 562 563 @par Exception Safety 564 No-throw guarantee. 565 */ 566 Handler const& handler() const567 handler() const noexcept 568 { 569 return h_; 570 } 571 572 /** Return the last error. 573 574 This returns the last error code which 575 was generated in the most recent call 576 to @ref write_some. 577 578 @par Complexity 579 Constant. 580 581 @par Exception Safety 582 No-throw guarantee. 583 */ 584 error_code last_error() const585 last_error() const noexcept 586 { 587 return ec_; 588 } 589 590 /** Return true if a complete JSON has been parsed. 591 592 This function returns `true` when all of these 593 conditions are met: 594 595 @li A complete serialized JSON has been 596 presented to the parser, and 597 598 @li No error or exception has occurred since the 599 parser was constructed, or since the last call 600 to @ref reset, 601 602 @par Complexity 603 Constant. 604 605 @par Exception Safety 606 No-throw guarantee. 607 */ 608 bool done() const609 done() const noexcept 610 { 611 return done_; 612 } 613 614 /** Reset the state, to parse a new document. 615 616 This function discards the current parsing 617 state, to prepare for parsing a new document. 618 Dynamically allocated temporary memory used 619 by the implementation is not deallocated. 620 621 @par Complexity 622 Constant. 623 624 @par Exception Safety 625 No-throw guarantee. 626 */ 627 void 628 reset() noexcept; 629 630 /** Indicate a parsing failure. 631 632 This changes the state of the parser to indicate 633 that the parse has failed. A parser implementation 634 can use this to fail the parser if needed due to 635 external inputs. 636 637 @note 638 639 If `!ec`, the stored error code is unspecified. 640 641 @par Complexity 642 Constant. 643 644 @par Exception Safety 645 No-throw guarantee. 646 647 @param ec The error code to set. If the code does 648 not indicate failure, an implementation-defined 649 error code that indicates failure will be stored 650 instead. 651 */ 652 void 653 fail(error_code ec) noexcept; 654 655 /** Parse some of an input string as JSON, incrementally. 656 657 This function parses the JSON in the specified 658 buffer, calling the handler to emit each SAX 659 parsing event. The parse proceeds from the 660 current state, which is at the beginning of a 661 new JSON or in the middle of the current JSON 662 if any characters were already parsed. 663 \n 664 The characters in the buffer are processed 665 starting from the beginning, until one of the 666 following conditions is met: 667 668 @li All of the characters in the buffer 669 have been parsed, or 670 671 @li Some of the characters in the buffer 672 have been parsed and the JSON is complete, or 673 674 @li A parsing error occurs. 675 676 The supplied buffer does not need to contain the 677 entire JSON. Subsequent calls can provide more 678 serialized data, allowing JSON to be processed 679 incrementally. The end of the serialized JSON 680 can be indicated by passing `more = false`. 681 682 @par Complexity 683 Linear in `size`. 684 685 @par Exception Safety 686 Basic guarantee. 687 Calls to the handler may throw. 688 Upon error or exception, subsequent calls will 689 fail until @ref reset is called to parse a new JSON. 690 691 @return The number of characters successfully 692 parsed, which may be smaller than `size`. 693 694 @param more `true` if there are possibly more 695 buffers in the current JSON, otherwise `false`. 696 697 @param data A pointer to a buffer of `size` 698 characters to parse. 699 700 @param size The number of characters pointed to 701 by `data`. 702 703 @param ec Set to the error, if any occurred. 704 */ 705 std::size_t 706 write_some( 707 bool more, 708 char const* data, 709 std::size_t size, 710 error_code& ec); 711 }; 712 713 BOOST_JSON_NS_END 714 715 #endif 716