1 #pragma once 2 3 #include <algorithm> // generate_n 4 #include <array> // array 5 #include <cassert> // assert 6 #include <cmath> // ldexp 7 #include <cstddef> // size_t 8 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t 9 #include <cstdio> // snprintf 10 #include <cstring> // memcpy 11 #include <iterator> // back_inserter 12 #include <limits> // numeric_limits 13 #include <string> // char_traits, string 14 #include <utility> // make_pair, move 15 16 #include <nlohmann/detail/exceptions.hpp> 17 #include <nlohmann/detail/input/input_adapters.hpp> 18 #include <nlohmann/detail/input/json_sax.hpp> 19 #include <nlohmann/detail/macro_scope.hpp> 20 #include <nlohmann/detail/meta/is_sax.hpp> 21 #include <nlohmann/detail/value_t.hpp> 22 23 namespace nlohmann 24 { 25 namespace detail 26 { 27 /////////////////// 28 // binary reader // 29 /////////////////// 30 31 /*! 32 @brief deserialization of CBOR, MessagePack, and UBJSON values 33 */ 34 template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>> 35 class binary_reader 36 { 37 using number_integer_t = typename BasicJsonType::number_integer_t; 38 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 39 using number_float_t = typename BasicJsonType::number_float_t; 40 using string_t = typename BasicJsonType::string_t; 41 using json_sax_t = SAX; 42 43 public: 44 /*! 45 @brief create a binary reader 46 47 @param[in] adapter input adapter to read from 48 */ binary_reader(input_adapter_t adapter)49 explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter)) 50 { 51 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 52 assert(ia); 53 } 54 55 // make class move-only 56 binary_reader(const binary_reader&) = delete; 57 binary_reader(binary_reader&&) = default; 58 binary_reader& operator=(const binary_reader&) = delete; 59 binary_reader& operator=(binary_reader&&) = default; 60 ~binary_reader() = default; 61 62 /*! 63 @param[in] format the binary format to parse 64 @param[in] sax_ a SAX event processor 65 @param[in] strict whether to expect the input to be consumed completed 66 67 @return 68 */ sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true)69 bool sax_parse(const input_format_t format, 70 json_sax_t* sax_, 71 const bool strict = true) 72 { 73 sax = sax_; 74 bool result = false; 75 76 switch (format) 77 { 78 case input_format_t::bson: 79 result = parse_bson_internal(); 80 break; 81 82 case input_format_t::cbor: 83 result = parse_cbor_internal(); 84 break; 85 86 case input_format_t::msgpack: 87 result = parse_msgpack_internal(); 88 break; 89 90 case input_format_t::ubjson: 91 result = parse_ubjson_internal(); 92 break; 93 94 default: // LCOV_EXCL_LINE 95 assert(false); // LCOV_EXCL_LINE 96 } 97 98 // strict mode: next byte must be EOF 99 if (result and strict) 100 { 101 if (format == input_format_t::ubjson) 102 { 103 get_ignore_noop(); 104 } 105 else 106 { 107 get(); 108 } 109 110 if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) 111 { 112 return sax->parse_error(chars_read, get_token_string(), 113 parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"))); 114 } 115 } 116 117 return result; 118 } 119 120 /*! 121 @brief determine system byte order 122 123 @return true if and only if system's byte order is little endian 124 125 @note from http://stackoverflow.com/a/1001328/266378 126 */ little_endianess(int num=1)127 static constexpr bool little_endianess(int num = 1) noexcept 128 { 129 return *reinterpret_cast<char*>(&num) == 1; 130 } 131 132 private: 133 ////////// 134 // BSON // 135 ////////// 136 137 /*! 138 @brief Reads in a BSON-object and passes it to the SAX-parser. 139 @return whether a valid BSON-value was passed to the SAX parser 140 */ parse_bson_internal()141 bool parse_bson_internal() 142 { 143 std::int32_t document_size; 144 get_number<std::int32_t, true>(input_format_t::bson, document_size); 145 146 if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) 147 { 148 return false; 149 } 150 151 if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) 152 { 153 return false; 154 } 155 156 return sax->end_object(); 157 } 158 159 /*! 160 @brief Parses a C-style string from the BSON input. 161 @param[in, out] result A reference to the string variable where the read 162 string is to be stored. 163 @return `true` if the \x00-byte indicating the end of the string was 164 encountered before the EOF; false` indicates an unexpected EOF. 165 */ get_bson_cstr(string_t & result)166 bool get_bson_cstr(string_t& result) 167 { 168 auto out = std::back_inserter(result); 169 while (true) 170 { 171 get(); 172 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) 173 { 174 return false; 175 } 176 if (current == 0x00) 177 { 178 return true; 179 } 180 *out++ = static_cast<char>(current); 181 } 182 183 return true; 184 } 185 186 /*! 187 @brief Parses a zero-terminated string of length @a len from the BSON 188 input. 189 @param[in] len The length (including the zero-byte at the end) of the 190 string to be read. 191 @param[in, out] result A reference to the string variable where the read 192 string is to be stored. 193 @tparam NumberType The type of the length @a len 194 @pre len >= 1 195 @return `true` if the string was successfully parsed 196 */ 197 template<typename NumberType> get_bson_string(const NumberType len,string_t & result)198 bool get_bson_string(const NumberType len, string_t& result) 199 { 200 if (JSON_UNLIKELY(len < 1)) 201 { 202 auto last_token = get_token_string(); 203 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"))); 204 } 205 206 return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof(); 207 } 208 209 /*! 210 @brief Read a BSON document element of the given @a element_type. 211 @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html 212 @param[in] element_type_parse_position The position in the input stream, 213 where the `element_type` was read. 214 @warning Not all BSON element types are supported yet. An unsupported 215 @a element_type will give rise to a parse_error.114: 216 Unsupported BSON record type 0x... 217 @return whether a valid BSON-object/array was passed to the SAX parser 218 */ parse_bson_element_internal(const int element_type,const std::size_t element_type_parse_position)219 bool parse_bson_element_internal(const int element_type, 220 const std::size_t element_type_parse_position) 221 { 222 switch (element_type) 223 { 224 case 0x01: // double 225 { 226 double number; 227 return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), ""); 228 } 229 230 case 0x02: // string 231 { 232 std::int32_t len; 233 string_t value; 234 return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); 235 } 236 237 case 0x03: // object 238 { 239 return parse_bson_internal(); 240 } 241 242 case 0x04: // array 243 { 244 return parse_bson_array(); 245 } 246 247 case 0x08: // boolean 248 { 249 return sax->boolean(get() != 0); 250 } 251 252 case 0x0A: // null 253 { 254 return sax->null(); 255 } 256 257 case 0x10: // int32 258 { 259 std::int32_t value; 260 return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value); 261 } 262 263 case 0x12: // int64 264 { 265 std::int64_t value; 266 return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value); 267 } 268 269 default: // anything else not supported (yet) 270 { 271 std::array<char, 3> cr{{}}; 272 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type)); 273 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()))); 274 } 275 } 276 } 277 278 /*! 279 @brief Read a BSON element list (as specified in the BSON-spec) 280 281 The same binary layout is used for objects and arrays, hence it must be 282 indicated with the argument @a is_array which one is expected 283 (true --> array, false --> object). 284 285 @param[in] is_array Determines if the element list being read is to be 286 treated as an object (@a is_array == false), or as an 287 array (@a is_array == true). 288 @return whether a valid BSON-object/array was passed to the SAX parser 289 */ parse_bson_element_list(const bool is_array)290 bool parse_bson_element_list(const bool is_array) 291 { 292 string_t key; 293 while (int element_type = get()) 294 { 295 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) 296 { 297 return false; 298 } 299 300 const std::size_t element_type_parse_position = chars_read; 301 if (JSON_UNLIKELY(not get_bson_cstr(key))) 302 { 303 return false; 304 } 305 306 if (not is_array and not sax->key(key)) 307 { 308 return false; 309 } 310 311 if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) 312 { 313 return false; 314 } 315 316 // get_bson_cstr only appends 317 key.clear(); 318 } 319 320 return true; 321 } 322 323 /*! 324 @brief Reads an array from the BSON input and passes it to the SAX-parser. 325 @return whether a valid BSON-array was passed to the SAX parser 326 */ parse_bson_array()327 bool parse_bson_array() 328 { 329 std::int32_t document_size; 330 get_number<std::int32_t, true>(input_format_t::bson, document_size); 331 332 if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) 333 { 334 return false; 335 } 336 337 if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) 338 { 339 return false; 340 } 341 342 return sax->end_array(); 343 } 344 345 ////////// 346 // CBOR // 347 ////////// 348 349 /*! 350 @param[in] get_char whether a new character should be retrieved from the 351 input (true, default) or whether the last read 352 character should be considered instead 353 354 @return whether a valid CBOR value was passed to the SAX parser 355 */ parse_cbor_internal(const bool get_char=true)356 bool parse_cbor_internal(const bool get_char = true) 357 { 358 switch (get_char ? get() : current) 359 { 360 // EOF 361 case std::char_traits<char>::eof(): 362 return unexpect_eof(input_format_t::cbor, "value"); 363 364 // Integer 0x00..0x17 (0..23) 365 case 0x00: 366 case 0x01: 367 case 0x02: 368 case 0x03: 369 case 0x04: 370 case 0x05: 371 case 0x06: 372 case 0x07: 373 case 0x08: 374 case 0x09: 375 case 0x0A: 376 case 0x0B: 377 case 0x0C: 378 case 0x0D: 379 case 0x0E: 380 case 0x0F: 381 case 0x10: 382 case 0x11: 383 case 0x12: 384 case 0x13: 385 case 0x14: 386 case 0x15: 387 case 0x16: 388 case 0x17: 389 return sax->number_unsigned(static_cast<number_unsigned_t>(current)); 390 391 case 0x18: // Unsigned integer (one-byte uint8_t follows) 392 { 393 std::uint8_t number; 394 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); 395 } 396 397 case 0x19: // Unsigned integer (two-byte uint16_t follows) 398 { 399 std::uint16_t number; 400 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); 401 } 402 403 case 0x1A: // Unsigned integer (four-byte uint32_t follows) 404 { 405 std::uint32_t number; 406 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); 407 } 408 409 case 0x1B: // Unsigned integer (eight-byte uint64_t follows) 410 { 411 std::uint64_t number; 412 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); 413 } 414 415 // Negative integer -1-0x00..-1-0x17 (-1..-24) 416 case 0x20: 417 case 0x21: 418 case 0x22: 419 case 0x23: 420 case 0x24: 421 case 0x25: 422 case 0x26: 423 case 0x27: 424 case 0x28: 425 case 0x29: 426 case 0x2A: 427 case 0x2B: 428 case 0x2C: 429 case 0x2D: 430 case 0x2E: 431 case 0x2F: 432 case 0x30: 433 case 0x31: 434 case 0x32: 435 case 0x33: 436 case 0x34: 437 case 0x35: 438 case 0x36: 439 case 0x37: 440 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current)); 441 442 case 0x38: // Negative integer (one-byte uint8_t follows) 443 { 444 std::uint8_t number; 445 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); 446 } 447 448 case 0x39: // Negative integer -1-n (two-byte uint16_t follows) 449 { 450 std::uint16_t number; 451 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); 452 } 453 454 case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) 455 { 456 std::uint32_t number; 457 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); 458 } 459 460 case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) 461 { 462 std::uint64_t number; 463 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) 464 - static_cast<number_integer_t>(number)); 465 } 466 467 // UTF-8 string (0x00..0x17 bytes follow) 468 case 0x60: 469 case 0x61: 470 case 0x62: 471 case 0x63: 472 case 0x64: 473 case 0x65: 474 case 0x66: 475 case 0x67: 476 case 0x68: 477 case 0x69: 478 case 0x6A: 479 case 0x6B: 480 case 0x6C: 481 case 0x6D: 482 case 0x6E: 483 case 0x6F: 484 case 0x70: 485 case 0x71: 486 case 0x72: 487 case 0x73: 488 case 0x74: 489 case 0x75: 490 case 0x76: 491 case 0x77: 492 case 0x78: // UTF-8 string (one-byte uint8_t for n follows) 493 case 0x79: // UTF-8 string (two-byte uint16_t for n follow) 494 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) 495 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) 496 case 0x7F: // UTF-8 string (indefinite length) 497 { 498 string_t s; 499 return get_cbor_string(s) and sax->string(s); 500 } 501 502 // array (0x00..0x17 data items follow) 503 case 0x80: 504 case 0x81: 505 case 0x82: 506 case 0x83: 507 case 0x84: 508 case 0x85: 509 case 0x86: 510 case 0x87: 511 case 0x88: 512 case 0x89: 513 case 0x8A: 514 case 0x8B: 515 case 0x8C: 516 case 0x8D: 517 case 0x8E: 518 case 0x8F: 519 case 0x90: 520 case 0x91: 521 case 0x92: 522 case 0x93: 523 case 0x94: 524 case 0x95: 525 case 0x96: 526 case 0x97: 527 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu)); 528 529 case 0x98: // array (one-byte uint8_t for n follows) 530 { 531 std::uint8_t len; 532 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); 533 } 534 535 case 0x99: // array (two-byte uint16_t for n follow) 536 { 537 std::uint16_t len; 538 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); 539 } 540 541 case 0x9A: // array (four-byte uint32_t for n follow) 542 { 543 std::uint32_t len; 544 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); 545 } 546 547 case 0x9B: // array (eight-byte uint64_t for n follow) 548 { 549 std::uint64_t len; 550 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); 551 } 552 553 case 0x9F: // array (indefinite length) 554 return get_cbor_array(std::size_t(-1)); 555 556 // map (0x00..0x17 pairs of data items follow) 557 case 0xA0: 558 case 0xA1: 559 case 0xA2: 560 case 0xA3: 561 case 0xA4: 562 case 0xA5: 563 case 0xA6: 564 case 0xA7: 565 case 0xA8: 566 case 0xA9: 567 case 0xAA: 568 case 0xAB: 569 case 0xAC: 570 case 0xAD: 571 case 0xAE: 572 case 0xAF: 573 case 0xB0: 574 case 0xB1: 575 case 0xB2: 576 case 0xB3: 577 case 0xB4: 578 case 0xB5: 579 case 0xB6: 580 case 0xB7: 581 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu)); 582 583 case 0xB8: // map (one-byte uint8_t for n follows) 584 { 585 std::uint8_t len; 586 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); 587 } 588 589 case 0xB9: // map (two-byte uint16_t for n follow) 590 { 591 std::uint16_t len; 592 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); 593 } 594 595 case 0xBA: // map (four-byte uint32_t for n follow) 596 { 597 std::uint32_t len; 598 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); 599 } 600 601 case 0xBB: // map (eight-byte uint64_t for n follow) 602 { 603 std::uint64_t len; 604 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); 605 } 606 607 case 0xBF: // map (indefinite length) 608 return get_cbor_object(std::size_t(-1)); 609 610 case 0xF4: // false 611 return sax->boolean(false); 612 613 case 0xF5: // true 614 return sax->boolean(true); 615 616 case 0xF6: // null 617 return sax->null(); 618 619 case 0xF9: // Half-Precision Float (two-byte IEEE 754) 620 { 621 const int byte1_raw = get(); 622 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) 623 { 624 return false; 625 } 626 const int byte2_raw = get(); 627 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) 628 { 629 return false; 630 } 631 632 const auto byte1 = static_cast<unsigned char>(byte1_raw); 633 const auto byte2 = static_cast<unsigned char>(byte2_raw); 634 635 // code from RFC 7049, Appendix D, Figure 3: 636 // As half-precision floating-point numbers were only added 637 // to IEEE 754 in 2008, today's programming platforms often 638 // still only have limited support for them. It is very 639 // easy to include at least decoding support for them even 640 // without such support. An example of a small decoder for 641 // half-precision floating-point numbers in the C language 642 // is shown in Fig. 3. 643 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2); 644 const double val = [&half] 645 { 646 const int exp = (half >> 10u) & 0x1Fu; 647 const unsigned int mant = half & 0x3FFu; 648 assert(0 <= exp and exp <= 32); 649 assert(0 <= mant and mant <= 1024); 650 switch (exp) 651 { 652 case 0: 653 return std::ldexp(mant, -24); 654 case 31: 655 return (mant == 0) 656 ? std::numeric_limits<double>::infinity() 657 : std::numeric_limits<double>::quiet_NaN(); 658 default: 659 return std::ldexp(mant + 1024, exp - 25); 660 } 661 }(); 662 return sax->number_float((half & 0x8000u) != 0 663 ? static_cast<number_float_t>(-val) 664 : static_cast<number_float_t>(val), ""); 665 } 666 667 case 0xFA: // Single-Precision Float (four-byte IEEE 754) 668 { 669 float number; 670 return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); 671 } 672 673 case 0xFB: // Double-Precision Float (eight-byte IEEE 754) 674 { 675 double number; 676 return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); 677 } 678 679 default: // anything else (0xFF is handled inside the other types) 680 { 681 auto last_token = get_token_string(); 682 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"))); 683 } 684 } 685 } 686 687 /*! 688 @brief reads a CBOR string 689 690 This function first reads starting bytes to determine the expected 691 string length and then copies this number of bytes into a string. 692 Additionally, CBOR's strings with indefinite lengths are supported. 693 694 @param[out] result created string 695 696 @return whether string creation completed 697 */ get_cbor_string(string_t & result)698 bool get_cbor_string(string_t& result) 699 { 700 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) 701 { 702 return false; 703 } 704 705 switch (current) 706 { 707 // UTF-8 string (0x00..0x17 bytes follow) 708 case 0x60: 709 case 0x61: 710 case 0x62: 711 case 0x63: 712 case 0x64: 713 case 0x65: 714 case 0x66: 715 case 0x67: 716 case 0x68: 717 case 0x69: 718 case 0x6A: 719 case 0x6B: 720 case 0x6C: 721 case 0x6D: 722 case 0x6E: 723 case 0x6F: 724 case 0x70: 725 case 0x71: 726 case 0x72: 727 case 0x73: 728 case 0x74: 729 case 0x75: 730 case 0x76: 731 case 0x77: 732 { 733 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result); 734 } 735 736 case 0x78: // UTF-8 string (one-byte uint8_t for n follows) 737 { 738 std::uint8_t len; 739 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); 740 } 741 742 case 0x79: // UTF-8 string (two-byte uint16_t for n follow) 743 { 744 std::uint16_t len; 745 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); 746 } 747 748 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) 749 { 750 std::uint32_t len; 751 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); 752 } 753 754 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) 755 { 756 std::uint64_t len; 757 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); 758 } 759 760 case 0x7F: // UTF-8 string (indefinite length) 761 { 762 while (get() != 0xFF) 763 { 764 string_t chunk; 765 if (not get_cbor_string(chunk)) 766 { 767 return false; 768 } 769 result.append(chunk); 770 } 771 return true; 772 } 773 774 default: 775 { 776 auto last_token = get_token_string(); 777 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); 778 } 779 } 780 } 781 782 /*! 783 @param[in] len the length of the array or std::size_t(-1) for an 784 array of indefinite size 785 @return whether array creation completed 786 */ get_cbor_array(const std::size_t len)787 bool get_cbor_array(const std::size_t len) 788 { 789 if (JSON_UNLIKELY(not sax->start_array(len))) 790 { 791 return false; 792 } 793 794 if (len != std::size_t(-1)) 795 { 796 for (std::size_t i = 0; i < len; ++i) 797 { 798 if (JSON_UNLIKELY(not parse_cbor_internal())) 799 { 800 return false; 801 } 802 } 803 } 804 else 805 { 806 while (get() != 0xFF) 807 { 808 if (JSON_UNLIKELY(not parse_cbor_internal(false))) 809 { 810 return false; 811 } 812 } 813 } 814 815 return sax->end_array(); 816 } 817 818 /*! 819 @param[in] len the length of the object or std::size_t(-1) for an 820 object of indefinite size 821 @return whether object creation completed 822 */ get_cbor_object(const std::size_t len)823 bool get_cbor_object(const std::size_t len) 824 { 825 if (JSON_UNLIKELY(not sax->start_object(len))) 826 { 827 return false; 828 } 829 830 string_t key; 831 if (len != std::size_t(-1)) 832 { 833 for (std::size_t i = 0; i < len; ++i) 834 { 835 get(); 836 if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) 837 { 838 return false; 839 } 840 841 if (JSON_UNLIKELY(not parse_cbor_internal())) 842 { 843 return false; 844 } 845 key.clear(); 846 } 847 } 848 else 849 { 850 while (get() != 0xFF) 851 { 852 if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) 853 { 854 return false; 855 } 856 857 if (JSON_UNLIKELY(not parse_cbor_internal())) 858 { 859 return false; 860 } 861 key.clear(); 862 } 863 } 864 865 return sax->end_object(); 866 } 867 868 ///////////// 869 // MsgPack // 870 ///////////// 871 872 /*! 873 @return whether a valid MessagePack value was passed to the SAX parser 874 */ parse_msgpack_internal()875 bool parse_msgpack_internal() 876 { 877 switch (get()) 878 { 879 // EOF 880 case std::char_traits<char>::eof(): 881 return unexpect_eof(input_format_t::msgpack, "value"); 882 883 // positive fixint 884 case 0x00: 885 case 0x01: 886 case 0x02: 887 case 0x03: 888 case 0x04: 889 case 0x05: 890 case 0x06: 891 case 0x07: 892 case 0x08: 893 case 0x09: 894 case 0x0A: 895 case 0x0B: 896 case 0x0C: 897 case 0x0D: 898 case 0x0E: 899 case 0x0F: 900 case 0x10: 901 case 0x11: 902 case 0x12: 903 case 0x13: 904 case 0x14: 905 case 0x15: 906 case 0x16: 907 case 0x17: 908 case 0x18: 909 case 0x19: 910 case 0x1A: 911 case 0x1B: 912 case 0x1C: 913 case 0x1D: 914 case 0x1E: 915 case 0x1F: 916 case 0x20: 917 case 0x21: 918 case 0x22: 919 case 0x23: 920 case 0x24: 921 case 0x25: 922 case 0x26: 923 case 0x27: 924 case 0x28: 925 case 0x29: 926 case 0x2A: 927 case 0x2B: 928 case 0x2C: 929 case 0x2D: 930 case 0x2E: 931 case 0x2F: 932 case 0x30: 933 case 0x31: 934 case 0x32: 935 case 0x33: 936 case 0x34: 937 case 0x35: 938 case 0x36: 939 case 0x37: 940 case 0x38: 941 case 0x39: 942 case 0x3A: 943 case 0x3B: 944 case 0x3C: 945 case 0x3D: 946 case 0x3E: 947 case 0x3F: 948 case 0x40: 949 case 0x41: 950 case 0x42: 951 case 0x43: 952 case 0x44: 953 case 0x45: 954 case 0x46: 955 case 0x47: 956 case 0x48: 957 case 0x49: 958 case 0x4A: 959 case 0x4B: 960 case 0x4C: 961 case 0x4D: 962 case 0x4E: 963 case 0x4F: 964 case 0x50: 965 case 0x51: 966 case 0x52: 967 case 0x53: 968 case 0x54: 969 case 0x55: 970 case 0x56: 971 case 0x57: 972 case 0x58: 973 case 0x59: 974 case 0x5A: 975 case 0x5B: 976 case 0x5C: 977 case 0x5D: 978 case 0x5E: 979 case 0x5F: 980 case 0x60: 981 case 0x61: 982 case 0x62: 983 case 0x63: 984 case 0x64: 985 case 0x65: 986 case 0x66: 987 case 0x67: 988 case 0x68: 989 case 0x69: 990 case 0x6A: 991 case 0x6B: 992 case 0x6C: 993 case 0x6D: 994 case 0x6E: 995 case 0x6F: 996 case 0x70: 997 case 0x71: 998 case 0x72: 999 case 0x73: 1000 case 0x74: 1001 case 0x75: 1002 case 0x76: 1003 case 0x77: 1004 case 0x78: 1005 case 0x79: 1006 case 0x7A: 1007 case 0x7B: 1008 case 0x7C: 1009 case 0x7D: 1010 case 0x7E: 1011 case 0x7F: 1012 return sax->number_unsigned(static_cast<number_unsigned_t>(current)); 1013 1014 // fixmap 1015 case 0x80: 1016 case 0x81: 1017 case 0x82: 1018 case 0x83: 1019 case 0x84: 1020 case 0x85: 1021 case 0x86: 1022 case 0x87: 1023 case 0x88: 1024 case 0x89: 1025 case 0x8A: 1026 case 0x8B: 1027 case 0x8C: 1028 case 0x8D: 1029 case 0x8E: 1030 case 0x8F: 1031 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); 1032 1033 // fixarray 1034 case 0x90: 1035 case 0x91: 1036 case 0x92: 1037 case 0x93: 1038 case 0x94: 1039 case 0x95: 1040 case 0x96: 1041 case 0x97: 1042 case 0x98: 1043 case 0x99: 1044 case 0x9A: 1045 case 0x9B: 1046 case 0x9C: 1047 case 0x9D: 1048 case 0x9E: 1049 case 0x9F: 1050 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu)); 1051 1052 // fixstr 1053 case 0xA0: 1054 case 0xA1: 1055 case 0xA2: 1056 case 0xA3: 1057 case 0xA4: 1058 case 0xA5: 1059 case 0xA6: 1060 case 0xA7: 1061 case 0xA8: 1062 case 0xA9: 1063 case 0xAA: 1064 case 0xAB: 1065 case 0xAC: 1066 case 0xAD: 1067 case 0xAE: 1068 case 0xAF: 1069 case 0xB0: 1070 case 0xB1: 1071 case 0xB2: 1072 case 0xB3: 1073 case 0xB4: 1074 case 0xB5: 1075 case 0xB6: 1076 case 0xB7: 1077 case 0xB8: 1078 case 0xB9: 1079 case 0xBA: 1080 case 0xBB: 1081 case 0xBC: 1082 case 0xBD: 1083 case 0xBE: 1084 case 0xBF: 1085 { 1086 string_t s; 1087 return get_msgpack_string(s) and sax->string(s); 1088 } 1089 1090 case 0xC0: // nil 1091 return sax->null(); 1092 1093 case 0xC2: // false 1094 return sax->boolean(false); 1095 1096 case 0xC3: // true 1097 return sax->boolean(true); 1098 1099 case 0xCA: // float 32 1100 { 1101 float number; 1102 return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); 1103 } 1104 1105 case 0xCB: // float 64 1106 { 1107 double number; 1108 return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); 1109 } 1110 1111 case 0xCC: // uint 8 1112 { 1113 std::uint8_t number; 1114 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); 1115 } 1116 1117 case 0xCD: // uint 16 1118 { 1119 std::uint16_t number; 1120 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); 1121 } 1122 1123 case 0xCE: // uint 32 1124 { 1125 std::uint32_t number; 1126 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); 1127 } 1128 1129 case 0xCF: // uint 64 1130 { 1131 std::uint64_t number; 1132 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); 1133 } 1134 1135 case 0xD0: // int 8 1136 { 1137 std::int8_t number; 1138 return get_number(input_format_t::msgpack, number) and sax->number_integer(number); 1139 } 1140 1141 case 0xD1: // int 16 1142 { 1143 std::int16_t number; 1144 return get_number(input_format_t::msgpack, number) and sax->number_integer(number); 1145 } 1146 1147 case 0xD2: // int 32 1148 { 1149 std::int32_t number; 1150 return get_number(input_format_t::msgpack, number) and sax->number_integer(number); 1151 } 1152 1153 case 0xD3: // int 64 1154 { 1155 std::int64_t number; 1156 return get_number(input_format_t::msgpack, number) and sax->number_integer(number); 1157 } 1158 1159 case 0xD9: // str 8 1160 case 0xDA: // str 16 1161 case 0xDB: // str 32 1162 { 1163 string_t s; 1164 return get_msgpack_string(s) and sax->string(s); 1165 } 1166 1167 case 0xDC: // array 16 1168 { 1169 std::uint16_t len; 1170 return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); 1171 } 1172 1173 case 0xDD: // array 32 1174 { 1175 std::uint32_t len; 1176 return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); 1177 } 1178 1179 case 0xDE: // map 16 1180 { 1181 std::uint16_t len; 1182 return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); 1183 } 1184 1185 case 0xDF: // map 32 1186 { 1187 std::uint32_t len; 1188 return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); 1189 } 1190 1191 // negative fixint 1192 case 0xE0: 1193 case 0xE1: 1194 case 0xE2: 1195 case 0xE3: 1196 case 0xE4: 1197 case 0xE5: 1198 case 0xE6: 1199 case 0xE7: 1200 case 0xE8: 1201 case 0xE9: 1202 case 0xEA: 1203 case 0xEB: 1204 case 0xEC: 1205 case 0xED: 1206 case 0xEE: 1207 case 0xEF: 1208 case 0xF0: 1209 case 0xF1: 1210 case 0xF2: 1211 case 0xF3: 1212 case 0xF4: 1213 case 0xF5: 1214 case 0xF6: 1215 case 0xF7: 1216 case 0xF8: 1217 case 0xF9: 1218 case 0xFA: 1219 case 0xFB: 1220 case 0xFC: 1221 case 0xFD: 1222 case 0xFE: 1223 case 0xFF: 1224 return sax->number_integer(static_cast<std::int8_t>(current)); 1225 1226 default: // anything else 1227 { 1228 auto last_token = get_token_string(); 1229 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"))); 1230 } 1231 } 1232 } 1233 1234 /*! 1235 @brief reads a MessagePack string 1236 1237 This function first reads starting bytes to determine the expected 1238 string length and then copies this number of bytes into a string. 1239 1240 @param[out] result created string 1241 1242 @return whether string creation completed 1243 */ get_msgpack_string(string_t & result)1244 bool get_msgpack_string(string_t& result) 1245 { 1246 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string"))) 1247 { 1248 return false; 1249 } 1250 1251 switch (current) 1252 { 1253 // fixstr 1254 case 0xA0: 1255 case 0xA1: 1256 case 0xA2: 1257 case 0xA3: 1258 case 0xA4: 1259 case 0xA5: 1260 case 0xA6: 1261 case 0xA7: 1262 case 0xA8: 1263 case 0xA9: 1264 case 0xAA: 1265 case 0xAB: 1266 case 0xAC: 1267 case 0xAD: 1268 case 0xAE: 1269 case 0xAF: 1270 case 0xB0: 1271 case 0xB1: 1272 case 0xB2: 1273 case 0xB3: 1274 case 0xB4: 1275 case 0xB5: 1276 case 0xB6: 1277 case 0xB7: 1278 case 0xB8: 1279 case 0xB9: 1280 case 0xBA: 1281 case 0xBB: 1282 case 0xBC: 1283 case 0xBD: 1284 case 0xBE: 1285 case 0xBF: 1286 { 1287 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result); 1288 } 1289 1290 case 0xD9: // str 8 1291 { 1292 std::uint8_t len; 1293 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); 1294 } 1295 1296 case 0xDA: // str 16 1297 { 1298 std::uint16_t len; 1299 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); 1300 } 1301 1302 case 0xDB: // str 32 1303 { 1304 std::uint32_t len; 1305 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); 1306 } 1307 1308 default: 1309 { 1310 auto last_token = get_token_string(); 1311 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"))); 1312 } 1313 } 1314 } 1315 1316 /*! 1317 @param[in] len the length of the array 1318 @return whether array creation completed 1319 */ get_msgpack_array(const std::size_t len)1320 bool get_msgpack_array(const std::size_t len) 1321 { 1322 if (JSON_UNLIKELY(not sax->start_array(len))) 1323 { 1324 return false; 1325 } 1326 1327 for (std::size_t i = 0; i < len; ++i) 1328 { 1329 if (JSON_UNLIKELY(not parse_msgpack_internal())) 1330 { 1331 return false; 1332 } 1333 } 1334 1335 return sax->end_array(); 1336 } 1337 1338 /*! 1339 @param[in] len the length of the object 1340 @return whether object creation completed 1341 */ get_msgpack_object(const std::size_t len)1342 bool get_msgpack_object(const std::size_t len) 1343 { 1344 if (JSON_UNLIKELY(not sax->start_object(len))) 1345 { 1346 return false; 1347 } 1348 1349 string_t key; 1350 for (std::size_t i = 0; i < len; ++i) 1351 { 1352 get(); 1353 if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(key))) 1354 { 1355 return false; 1356 } 1357 1358 if (JSON_UNLIKELY(not parse_msgpack_internal())) 1359 { 1360 return false; 1361 } 1362 key.clear(); 1363 } 1364 1365 return sax->end_object(); 1366 } 1367 1368 //////////// 1369 // UBJSON // 1370 //////////// 1371 1372 /*! 1373 @param[in] get_char whether a new character should be retrieved from the 1374 input (true, default) or whether the last read 1375 character should be considered instead 1376 1377 @return whether a valid UBJSON value was passed to the SAX parser 1378 */ parse_ubjson_internal(const bool get_char=true)1379 bool parse_ubjson_internal(const bool get_char = true) 1380 { 1381 return get_ubjson_value(get_char ? get_ignore_noop() : current); 1382 } 1383 1384 /*! 1385 @brief reads a UBJSON string 1386 1387 This function is either called after reading the 'S' byte explicitly 1388 indicating a string, or in case of an object key where the 'S' byte can be 1389 left out. 1390 1391 @param[out] result created string 1392 @param[in] get_char whether a new character should be retrieved from the 1393 input (true, default) or whether the last read 1394 character should be considered instead 1395 1396 @return whether string creation completed 1397 */ get_ubjson_string(string_t & result,const bool get_char=true)1398 bool get_ubjson_string(string_t& result, const bool get_char = true) 1399 { 1400 if (get_char) 1401 { 1402 get(); // TODO(niels): may we ignore N here? 1403 } 1404 1405 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) 1406 { 1407 return false; 1408 } 1409 1410 switch (current) 1411 { 1412 case 'U': 1413 { 1414 std::uint8_t len; 1415 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); 1416 } 1417 1418 case 'i': 1419 { 1420 std::int8_t len; 1421 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); 1422 } 1423 1424 case 'I': 1425 { 1426 std::int16_t len; 1427 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); 1428 } 1429 1430 case 'l': 1431 { 1432 std::int32_t len; 1433 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); 1434 } 1435 1436 case 'L': 1437 { 1438 std::int64_t len; 1439 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); 1440 } 1441 1442 default: 1443 auto last_token = get_token_string(); 1444 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"))); 1445 } 1446 } 1447 1448 /*! 1449 @param[out] result determined size 1450 @return whether size determination completed 1451 */ get_ubjson_size_value(std::size_t & result)1452 bool get_ubjson_size_value(std::size_t& result) 1453 { 1454 switch (get_ignore_noop()) 1455 { 1456 case 'U': 1457 { 1458 std::uint8_t number; 1459 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) 1460 { 1461 return false; 1462 } 1463 result = static_cast<std::size_t>(number); 1464 return true; 1465 } 1466 1467 case 'i': 1468 { 1469 std::int8_t number; 1470 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) 1471 { 1472 return false; 1473 } 1474 result = static_cast<std::size_t>(number); 1475 return true; 1476 } 1477 1478 case 'I': 1479 { 1480 std::int16_t number; 1481 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) 1482 { 1483 return false; 1484 } 1485 result = static_cast<std::size_t>(number); 1486 return true; 1487 } 1488 1489 case 'l': 1490 { 1491 std::int32_t number; 1492 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) 1493 { 1494 return false; 1495 } 1496 result = static_cast<std::size_t>(number); 1497 return true; 1498 } 1499 1500 case 'L': 1501 { 1502 std::int64_t number; 1503 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) 1504 { 1505 return false; 1506 } 1507 result = static_cast<std::size_t>(number); 1508 return true; 1509 } 1510 1511 default: 1512 { 1513 auto last_token = get_token_string(); 1514 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"))); 1515 } 1516 } 1517 } 1518 1519 /*! 1520 @brief determine the type and size for a container 1521 1522 In the optimized UBJSON format, a type and a size can be provided to allow 1523 for a more compact representation. 1524 1525 @param[out] result pair of the size and the type 1526 1527 @return whether pair creation completed 1528 */ get_ubjson_size_type(std::pair<std::size_t,int> & result)1529 bool get_ubjson_size_type(std::pair<std::size_t, int>& result) 1530 { 1531 result.first = string_t::npos; // size 1532 result.second = 0; // type 1533 1534 get_ignore_noop(); 1535 1536 if (current == '$') 1537 { 1538 result.second = get(); // must not ignore 'N', because 'N' maybe the type 1539 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type"))) 1540 { 1541 return false; 1542 } 1543 1544 get_ignore_noop(); 1545 if (JSON_UNLIKELY(current != '#')) 1546 { 1547 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) 1548 { 1549 return false; 1550 } 1551 auto last_token = get_token_string(); 1552 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"))); 1553 } 1554 1555 return get_ubjson_size_value(result.first); 1556 } 1557 1558 if (current == '#') 1559 { 1560 return get_ubjson_size_value(result.first); 1561 } 1562 1563 return true; 1564 } 1565 1566 /*! 1567 @param prefix the previously read or set type prefix 1568 @return whether value creation completed 1569 */ get_ubjson_value(const int prefix)1570 bool get_ubjson_value(const int prefix) 1571 { 1572 switch (prefix) 1573 { 1574 case std::char_traits<char>::eof(): // EOF 1575 return unexpect_eof(input_format_t::ubjson, "value"); 1576 1577 case 'T': // true 1578 return sax->boolean(true); 1579 case 'F': // false 1580 return sax->boolean(false); 1581 1582 case 'Z': // null 1583 return sax->null(); 1584 1585 case 'U': 1586 { 1587 std::uint8_t number; 1588 return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number); 1589 } 1590 1591 case 'i': 1592 { 1593 std::int8_t number; 1594 return get_number(input_format_t::ubjson, number) and sax->number_integer(number); 1595 } 1596 1597 case 'I': 1598 { 1599 std::int16_t number; 1600 return get_number(input_format_t::ubjson, number) and sax->number_integer(number); 1601 } 1602 1603 case 'l': 1604 { 1605 std::int32_t number; 1606 return get_number(input_format_t::ubjson, number) and sax->number_integer(number); 1607 } 1608 1609 case 'L': 1610 { 1611 std::int64_t number; 1612 return get_number(input_format_t::ubjson, number) and sax->number_integer(number); 1613 } 1614 1615 case 'd': 1616 { 1617 float number; 1618 return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); 1619 } 1620 1621 case 'D': 1622 { 1623 double number; 1624 return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); 1625 } 1626 1627 case 'C': // char 1628 { 1629 get(); 1630 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char"))) 1631 { 1632 return false; 1633 } 1634 if (JSON_UNLIKELY(current > 127)) 1635 { 1636 auto last_token = get_token_string(); 1637 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"))); 1638 } 1639 string_t s(1, static_cast<char>(current)); 1640 return sax->string(s); 1641 } 1642 1643 case 'S': // string 1644 { 1645 string_t s; 1646 return get_ubjson_string(s) and sax->string(s); 1647 } 1648 1649 case '[': // array 1650 return get_ubjson_array(); 1651 1652 case '{': // object 1653 return get_ubjson_object(); 1654 1655 default: // anything else 1656 { 1657 auto last_token = get_token_string(); 1658 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"))); 1659 } 1660 } 1661 } 1662 1663 /*! 1664 @return whether array creation completed 1665 */ get_ubjson_array()1666 bool get_ubjson_array() 1667 { 1668 std::pair<std::size_t, int> size_and_type; 1669 if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) 1670 { 1671 return false; 1672 } 1673 1674 if (size_and_type.first != string_t::npos) 1675 { 1676 if (JSON_UNLIKELY(not sax->start_array(size_and_type.first))) 1677 { 1678 return false; 1679 } 1680 1681 if (size_and_type.second != 0) 1682 { 1683 if (size_and_type.second != 'N') 1684 { 1685 for (std::size_t i = 0; i < size_and_type.first; ++i) 1686 { 1687 if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) 1688 { 1689 return false; 1690 } 1691 } 1692 } 1693 } 1694 else 1695 { 1696 for (std::size_t i = 0; i < size_and_type.first; ++i) 1697 { 1698 if (JSON_UNLIKELY(not parse_ubjson_internal())) 1699 { 1700 return false; 1701 } 1702 } 1703 } 1704 } 1705 else 1706 { 1707 if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) 1708 { 1709 return false; 1710 } 1711 1712 while (current != ']') 1713 { 1714 if (JSON_UNLIKELY(not parse_ubjson_internal(false))) 1715 { 1716 return false; 1717 } 1718 get_ignore_noop(); 1719 } 1720 } 1721 1722 return sax->end_array(); 1723 } 1724 1725 /*! 1726 @return whether object creation completed 1727 */ get_ubjson_object()1728 bool get_ubjson_object() 1729 { 1730 std::pair<std::size_t, int> size_and_type; 1731 if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) 1732 { 1733 return false; 1734 } 1735 1736 string_t key; 1737 if (size_and_type.first != string_t::npos) 1738 { 1739 if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) 1740 { 1741 return false; 1742 } 1743 1744 if (size_and_type.second != 0) 1745 { 1746 for (std::size_t i = 0; i < size_and_type.first; ++i) 1747 { 1748 if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) 1749 { 1750 return false; 1751 } 1752 if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) 1753 { 1754 return false; 1755 } 1756 key.clear(); 1757 } 1758 } 1759 else 1760 { 1761 for (std::size_t i = 0; i < size_and_type.first; ++i) 1762 { 1763 if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) 1764 { 1765 return false; 1766 } 1767 if (JSON_UNLIKELY(not parse_ubjson_internal())) 1768 { 1769 return false; 1770 } 1771 key.clear(); 1772 } 1773 } 1774 } 1775 else 1776 { 1777 if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) 1778 { 1779 return false; 1780 } 1781 1782 while (current != '}') 1783 { 1784 if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) 1785 { 1786 return false; 1787 } 1788 if (JSON_UNLIKELY(not parse_ubjson_internal())) 1789 { 1790 return false; 1791 } 1792 get_ignore_noop(); 1793 key.clear(); 1794 } 1795 } 1796 1797 return sax->end_object(); 1798 } 1799 1800 /////////////////////// 1801 // Utility functions // 1802 /////////////////////// 1803 1804 /*! 1805 @brief get next character from the input 1806 1807 This function provides the interface to the used input adapter. It does 1808 not throw in case the input reached EOF, but returns a -'ve valued 1809 `std::char_traits<char>::eof()` in that case. 1810 1811 @return character read from the input 1812 */ get()1813 int get() 1814 { 1815 ++chars_read; 1816 return current = ia->get_character(); 1817 } 1818 1819 /*! 1820 @return character read from the input after ignoring all 'N' entries 1821 */ get_ignore_noop()1822 int get_ignore_noop() 1823 { 1824 do 1825 { 1826 get(); 1827 } 1828 while (current == 'N'); 1829 1830 return current; 1831 } 1832 1833 /* 1834 @brief read a number from the input 1835 1836 @tparam NumberType the type of the number 1837 @param[in] format the current format (for diagnostics) 1838 @param[out] result number of type @a NumberType 1839 1840 @return whether conversion completed 1841 1842 @note This function needs to respect the system's endianess, because 1843 bytes in CBOR, MessagePack, and UBJSON are stored in network order 1844 (big endian) and therefore need reordering on little endian systems. 1845 */ 1846 template<typename NumberType, bool InputIsLittleEndian = false> get_number(const input_format_t format,NumberType & result)1847 bool get_number(const input_format_t format, NumberType& result) 1848 { 1849 // step 1: read input into array with system's byte order 1850 std::array<std::uint8_t, sizeof(NumberType)> vec; 1851 for (std::size_t i = 0; i < sizeof(NumberType); ++i) 1852 { 1853 get(); 1854 if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) 1855 { 1856 return false; 1857 } 1858 1859 // reverse byte order prior to conversion if necessary 1860 if (is_little_endian != InputIsLittleEndian) 1861 { 1862 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current); 1863 } 1864 else 1865 { 1866 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE 1867 } 1868 } 1869 1870 // step 2: convert array into number of type T and return 1871 std::memcpy(&result, vec.data(), sizeof(NumberType)); 1872 return true; 1873 } 1874 1875 /*! 1876 @brief create a string by reading characters from the input 1877 1878 @tparam NumberType the type of the number 1879 @param[in] format the current format (for diagnostics) 1880 @param[in] len number of characters to read 1881 @param[out] result string created by reading @a len bytes 1882 1883 @return whether string creation completed 1884 1885 @note We can not reserve @a len bytes for the result, because @a len 1886 may be too large. Usually, @ref unexpect_eof() detects the end of 1887 the input before we run out of string memory. 1888 */ 1889 template<typename NumberType> get_string(const input_format_t format,const NumberType len,string_t & result)1890 bool get_string(const input_format_t format, 1891 const NumberType len, 1892 string_t& result) 1893 { 1894 bool success = true; 1895 std::generate_n(std::back_inserter(result), len, [this, &success, &format]() 1896 { 1897 get(); 1898 if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) 1899 { 1900 success = false; 1901 } 1902 return static_cast<char>(current); 1903 }); 1904 return success; 1905 } 1906 1907 /*! 1908 @param[in] format the current format (for diagnostics) 1909 @param[in] context further context information (for diagnostics) 1910 @return whether the last read character is not EOF 1911 */ unexpect_eof(const input_format_t format,const char * context) const1912 bool unexpect_eof(const input_format_t format, const char* context) const 1913 { 1914 if (JSON_UNLIKELY(current == std::char_traits<char>::eof())) 1915 { 1916 return sax->parse_error(chars_read, "<end of file>", 1917 parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context))); 1918 } 1919 return true; 1920 } 1921 1922 /*! 1923 @return a string representation of the last read byte 1924 */ get_token_string() const1925 std::string get_token_string() const 1926 { 1927 std::array<char, 3> cr{{}}; 1928 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current)); 1929 return std::string{cr.data()}; 1930 } 1931 1932 /*! 1933 @param[in] format the current format 1934 @param[in] detail a detailed error message 1935 @param[in] context further contect information 1936 @return a message string to use in the parse_error exceptions 1937 */ exception_message(const input_format_t format,const std::string & detail,const std::string & context) const1938 std::string exception_message(const input_format_t format, 1939 const std::string& detail, 1940 const std::string& context) const 1941 { 1942 std::string error_msg = "syntax error while parsing "; 1943 1944 switch (format) 1945 { 1946 case input_format_t::cbor: 1947 error_msg += "CBOR"; 1948 break; 1949 1950 case input_format_t::msgpack: 1951 error_msg += "MessagePack"; 1952 break; 1953 1954 case input_format_t::ubjson: 1955 error_msg += "UBJSON"; 1956 break; 1957 1958 case input_format_t::bson: 1959 error_msg += "BSON"; 1960 break; 1961 1962 default: // LCOV_EXCL_LINE 1963 assert(false); // LCOV_EXCL_LINE 1964 } 1965 1966 return error_msg + " " + context + ": " + detail; 1967 } 1968 1969 private: 1970 /// input adapter 1971 input_adapter_t ia = nullptr; 1972 1973 /// the current character 1974 int current = std::char_traits<char>::eof(); 1975 1976 /// the number of characters read 1977 std::size_t chars_read = 0; 1978 1979 /// whether we can assume little endianess 1980 const bool is_little_endian = little_endianess(); 1981 1982 /// the SAX parser 1983 json_sax_t* sax = nullptr; 1984 }; 1985 } // namespace detail 1986 } // namespace nlohmann 1987