1 #pragma once
2
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cmath> // ldexp
6 #include <cstddef> // size_t
7 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
8 #include <cstdio> // snprintf
9 #include <cstring> // memcpy
10 #include <iterator> // back_inserter
11 #include <limits> // numeric_limits
12 #include <string> // char_traits, string
13 #include <utility> // make_pair, move
14 #include <vector> // vector
15
16 #include <nlohmann/detail/exceptions.hpp>
17 #include <nlohmann/detail/input/input_adapters.hpp>
18 #include <nlohmann/detail/input/json_sax.hpp>
19 #include <nlohmann/detail/input/lexer.hpp>
20 #include <nlohmann/detail/macro_scope.hpp>
21 #include <nlohmann/detail/meta/is_sax.hpp>
22 #include <nlohmann/detail/value_t.hpp>
23
24 namespace nlohmann
25 {
26 namespace detail
27 {
28
29 /// how to treat CBOR tags
30 enum class cbor_tag_handler_t
31 {
32 error, ///< throw a parse_error exception in case of a tag
33 ignore ///< ignore tags
34 };
35
36 /*!
37 @brief determine system byte order
38
39 @return true if and only if system's byte order is little endian
40
41 @note from https://stackoverflow.com/a/1001328/266378
42 */
little_endianess(int num=1)43 static inline bool little_endianess(int num = 1) noexcept
44 {
45 return *reinterpret_cast<char*>(&num) == 1;
46 }
47
48
49 ///////////////////
50 // binary reader //
51 ///////////////////
52
53 /*!
54 @brief deserialization of CBOR, MessagePack, and UBJSON values
55 */
56 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
57 class binary_reader
58 {
59 using number_integer_t = typename BasicJsonType::number_integer_t;
60 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
61 using number_float_t = typename BasicJsonType::number_float_t;
62 using string_t = typename BasicJsonType::string_t;
63 using binary_t = typename BasicJsonType::binary_t;
64 using json_sax_t = SAX;
65 using char_type = typename InputAdapterType::char_type;
66 using char_int_type = typename std::char_traits<char_type>::int_type;
67
68 public:
69 /*!
70 @brief create a binary reader
71
72 @param[in] adapter input adapter to read from
73 */
binary_reader(InputAdapterType && adapter)74 explicit binary_reader(InputAdapterType&& adapter) noexcept : ia(std::move(adapter))
75 {
76 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
77 }
78
79 // make class move-only
80 binary_reader(const binary_reader&) = delete;
81 binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
82 binary_reader& operator=(const binary_reader&) = delete;
83 binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
84 ~binary_reader() = default;
85
86 /*!
87 @param[in] format the binary format to parse
88 @param[in] sax_ a SAX event processor
89 @param[in] strict whether to expect the input to be consumed completed
90 @param[in] tag_handler how to treat CBOR tags
91
92 @return whether parsing was successful
93 */
94 JSON_HEDLEY_NON_NULL(3)
sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true,const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)95 bool sax_parse(const input_format_t format,
96 json_sax_t* sax_,
97 const bool strict = true,
98 const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
99 {
100 sax = sax_;
101 bool result = false;
102
103 switch (format)
104 {
105 case input_format_t::bson:
106 result = parse_bson_internal();
107 break;
108
109 case input_format_t::cbor:
110 result = parse_cbor_internal(true, tag_handler);
111 break;
112
113 case input_format_t::msgpack:
114 result = parse_msgpack_internal();
115 break;
116
117 case input_format_t::ubjson:
118 result = parse_ubjson_internal();
119 break;
120
121 default: // LCOV_EXCL_LINE
122 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
123 }
124
125 // strict mode: next byte must be EOF
126 if (result && strict)
127 {
128 if (format == input_format_t::ubjson)
129 {
130 get_ignore_noop();
131 }
132 else
133 {
134 get();
135 }
136
137 if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
138 {
139 return sax->parse_error(chars_read, get_token_string(),
140 parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"), BasicJsonType()));
141 }
142 }
143
144 return result;
145 }
146
147 private:
148 //////////
149 // BSON //
150 //////////
151
152 /*!
153 @brief Reads in a BSON-object and passes it to the SAX-parser.
154 @return whether a valid BSON-value was passed to the SAX parser
155 */
parse_bson_internal()156 bool parse_bson_internal()
157 {
158 std::int32_t document_size{};
159 get_number<std::int32_t, true>(input_format_t::bson, document_size);
160
161 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
162 {
163 return false;
164 }
165
166 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
167 {
168 return false;
169 }
170
171 return sax->end_object();
172 }
173
174 /*!
175 @brief Parses a C-style string from the BSON input.
176 @param[in,out] result A reference to the string variable where the read
177 string is to be stored.
178 @return `true` if the \x00-byte indicating the end of the string was
179 encountered before the EOF; false` indicates an unexpected EOF.
180 */
get_bson_cstr(string_t & result)181 bool get_bson_cstr(string_t& result)
182 {
183 auto out = std::back_inserter(result);
184 while (true)
185 {
186 get();
187 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
188 {
189 return false;
190 }
191 if (current == 0x00)
192 {
193 return true;
194 }
195 *out++ = static_cast<typename string_t::value_type>(current);
196 }
197 }
198
199 /*!
200 @brief Parses a zero-terminated string of length @a len from the BSON
201 input.
202 @param[in] len The length (including the zero-byte at the end) of the
203 string to be read.
204 @param[in,out] result A reference to the string variable where the read
205 string is to be stored.
206 @tparam NumberType The type of the length @a len
207 @pre len >= 1
208 @return `true` if the string was successfully parsed
209 */
210 template<typename NumberType>
get_bson_string(const NumberType len,string_t & result)211 bool get_bson_string(const NumberType len, string_t& result)
212 {
213 if (JSON_HEDLEY_UNLIKELY(len < 1))
214 {
215 auto last_token = get_token_string();
216 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"), BasicJsonType()));
217 }
218
219 return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
220 }
221
222 /*!
223 @brief Parses a byte array input of length @a len from the BSON input.
224 @param[in] len The length of the byte array to be read.
225 @param[in,out] result A reference to the binary variable where the read
226 array is to be stored.
227 @tparam NumberType The type of the length @a len
228 @pre len >= 0
229 @return `true` if the byte array was successfully parsed
230 */
231 template<typename NumberType>
get_bson_binary(const NumberType len,binary_t & result)232 bool get_bson_binary(const NumberType len, binary_t& result)
233 {
234 if (JSON_HEDLEY_UNLIKELY(len < 0))
235 {
236 auto last_token = get_token_string();
237 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"), BasicJsonType()));
238 }
239
240 // All BSON binary values have a subtype
241 std::uint8_t subtype{};
242 get_number<std::uint8_t>(input_format_t::bson, subtype);
243 result.set_subtype(subtype);
244
245 return get_binary(input_format_t::bson, len, result);
246 }
247
248 /*!
249 @brief Read a BSON document element of the given @a element_type.
250 @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
251 @param[in] element_type_parse_position The position in the input stream,
252 where the `element_type` was read.
253 @warning Not all BSON element types are supported yet. An unsupported
254 @a element_type will give rise to a parse_error.114:
255 Unsupported BSON record type 0x...
256 @return whether a valid BSON-object/array was passed to the SAX parser
257 */
parse_bson_element_internal(const char_int_type element_type,const std::size_t element_type_parse_position)258 bool parse_bson_element_internal(const char_int_type element_type,
259 const std::size_t element_type_parse_position)
260 {
261 switch (element_type)
262 {
263 case 0x01: // double
264 {
265 double number{};
266 return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
267 }
268
269 case 0x02: // string
270 {
271 std::int32_t len{};
272 string_t value;
273 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
274 }
275
276 case 0x03: // object
277 {
278 return parse_bson_internal();
279 }
280
281 case 0x04: // array
282 {
283 return parse_bson_array();
284 }
285
286 case 0x05: // binary
287 {
288 std::int32_t len{};
289 binary_t value;
290 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
291 }
292
293 case 0x08: // boolean
294 {
295 return sax->boolean(get() != 0);
296 }
297
298 case 0x0A: // null
299 {
300 return sax->null();
301 }
302
303 case 0x10: // int32
304 {
305 std::int32_t value{};
306 return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
307 }
308
309 case 0x12: // int64
310 {
311 std::int64_t value{};
312 return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
313 }
314
315 default: // anything else not supported (yet)
316 {
317 std::array<char, 3> cr{{}};
318 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
319 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()), BasicJsonType()));
320 }
321 }
322 }
323
324 /*!
325 @brief Read a BSON element list (as specified in the BSON-spec)
326
327 The same binary layout is used for objects and arrays, hence it must be
328 indicated with the argument @a is_array which one is expected
329 (true --> array, false --> object).
330
331 @param[in] is_array Determines if the element list being read is to be
332 treated as an object (@a is_array == false), or as an
333 array (@a is_array == true).
334 @return whether a valid BSON-object/array was passed to the SAX parser
335 */
parse_bson_element_list(const bool is_array)336 bool parse_bson_element_list(const bool is_array)
337 {
338 string_t key;
339
340 while (auto element_type = get())
341 {
342 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
343 {
344 return false;
345 }
346
347 const std::size_t element_type_parse_position = chars_read;
348 if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
349 {
350 return false;
351 }
352
353 if (!is_array && !sax->key(key))
354 {
355 return false;
356 }
357
358 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
359 {
360 return false;
361 }
362
363 // get_bson_cstr only appends
364 key.clear();
365 }
366
367 return true;
368 }
369
370 /*!
371 @brief Reads an array from the BSON input and passes it to the SAX-parser.
372 @return whether a valid BSON-array was passed to the SAX parser
373 */
parse_bson_array()374 bool parse_bson_array()
375 {
376 std::int32_t document_size{};
377 get_number<std::int32_t, true>(input_format_t::bson, document_size);
378
379 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
380 {
381 return false;
382 }
383
384 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
385 {
386 return false;
387 }
388
389 return sax->end_array();
390 }
391
392 //////////
393 // CBOR //
394 //////////
395
396 /*!
397 @param[in] get_char whether a new character should be retrieved from the
398 input (true) or whether the last read character should
399 be considered instead (false)
400 @param[in] tag_handler how CBOR tags should be treated
401
402 @return whether a valid CBOR value was passed to the SAX parser
403 */
parse_cbor_internal(const bool get_char,const cbor_tag_handler_t tag_handler)404 bool parse_cbor_internal(const bool get_char,
405 const cbor_tag_handler_t tag_handler)
406 {
407 switch (get_char ? get() : current)
408 {
409 // EOF
410 case std::char_traits<char_type>::eof():
411 return unexpect_eof(input_format_t::cbor, "value");
412
413 // Integer 0x00..0x17 (0..23)
414 case 0x00:
415 case 0x01:
416 case 0x02:
417 case 0x03:
418 case 0x04:
419 case 0x05:
420 case 0x06:
421 case 0x07:
422 case 0x08:
423 case 0x09:
424 case 0x0A:
425 case 0x0B:
426 case 0x0C:
427 case 0x0D:
428 case 0x0E:
429 case 0x0F:
430 case 0x10:
431 case 0x11:
432 case 0x12:
433 case 0x13:
434 case 0x14:
435 case 0x15:
436 case 0x16:
437 case 0x17:
438 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
439
440 case 0x18: // Unsigned integer (one-byte uint8_t follows)
441 {
442 std::uint8_t number{};
443 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
444 }
445
446 case 0x19: // Unsigned integer (two-byte uint16_t follows)
447 {
448 std::uint16_t number{};
449 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
450 }
451
452 case 0x1A: // Unsigned integer (four-byte uint32_t follows)
453 {
454 std::uint32_t number{};
455 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
456 }
457
458 case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
459 {
460 std::uint64_t number{};
461 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
462 }
463
464 // Negative integer -1-0x00..-1-0x17 (-1..-24)
465 case 0x20:
466 case 0x21:
467 case 0x22:
468 case 0x23:
469 case 0x24:
470 case 0x25:
471 case 0x26:
472 case 0x27:
473 case 0x28:
474 case 0x29:
475 case 0x2A:
476 case 0x2B:
477 case 0x2C:
478 case 0x2D:
479 case 0x2E:
480 case 0x2F:
481 case 0x30:
482 case 0x31:
483 case 0x32:
484 case 0x33:
485 case 0x34:
486 case 0x35:
487 case 0x36:
488 case 0x37:
489 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
490
491 case 0x38: // Negative integer (one-byte uint8_t follows)
492 {
493 std::uint8_t number{};
494 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
495 }
496
497 case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
498 {
499 std::uint16_t number{};
500 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
501 }
502
503 case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
504 {
505 std::uint32_t number{};
506 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
507 }
508
509 case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
510 {
511 std::uint64_t number{};
512 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
513 - static_cast<number_integer_t>(number));
514 }
515
516 // Binary data (0x00..0x17 bytes follow)
517 case 0x40:
518 case 0x41:
519 case 0x42:
520 case 0x43:
521 case 0x44:
522 case 0x45:
523 case 0x46:
524 case 0x47:
525 case 0x48:
526 case 0x49:
527 case 0x4A:
528 case 0x4B:
529 case 0x4C:
530 case 0x4D:
531 case 0x4E:
532 case 0x4F:
533 case 0x50:
534 case 0x51:
535 case 0x52:
536 case 0x53:
537 case 0x54:
538 case 0x55:
539 case 0x56:
540 case 0x57:
541 case 0x58: // Binary data (one-byte uint8_t for n follows)
542 case 0x59: // Binary data (two-byte uint16_t for n follow)
543 case 0x5A: // Binary data (four-byte uint32_t for n follow)
544 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
545 case 0x5F: // Binary data (indefinite length)
546 {
547 binary_t b;
548 return get_cbor_binary(b) && sax->binary(b);
549 }
550
551 // UTF-8 string (0x00..0x17 bytes follow)
552 case 0x60:
553 case 0x61:
554 case 0x62:
555 case 0x63:
556 case 0x64:
557 case 0x65:
558 case 0x66:
559 case 0x67:
560 case 0x68:
561 case 0x69:
562 case 0x6A:
563 case 0x6B:
564 case 0x6C:
565 case 0x6D:
566 case 0x6E:
567 case 0x6F:
568 case 0x70:
569 case 0x71:
570 case 0x72:
571 case 0x73:
572 case 0x74:
573 case 0x75:
574 case 0x76:
575 case 0x77:
576 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
577 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
578 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
579 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
580 case 0x7F: // UTF-8 string (indefinite length)
581 {
582 string_t s;
583 return get_cbor_string(s) && sax->string(s);
584 }
585
586 // array (0x00..0x17 data items follow)
587 case 0x80:
588 case 0x81:
589 case 0x82:
590 case 0x83:
591 case 0x84:
592 case 0x85:
593 case 0x86:
594 case 0x87:
595 case 0x88:
596 case 0x89:
597 case 0x8A:
598 case 0x8B:
599 case 0x8C:
600 case 0x8D:
601 case 0x8E:
602 case 0x8F:
603 case 0x90:
604 case 0x91:
605 case 0x92:
606 case 0x93:
607 case 0x94:
608 case 0x95:
609 case 0x96:
610 case 0x97:
611 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
612
613 case 0x98: // array (one-byte uint8_t for n follows)
614 {
615 std::uint8_t len{};
616 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
617 }
618
619 case 0x99: // array (two-byte uint16_t for n follow)
620 {
621 std::uint16_t len{};
622 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
623 }
624
625 case 0x9A: // array (four-byte uint32_t for n follow)
626 {
627 std::uint32_t len{};
628 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
629 }
630
631 case 0x9B: // array (eight-byte uint64_t for n follow)
632 {
633 std::uint64_t len{};
634 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
635 }
636
637 case 0x9F: // array (indefinite length)
638 return get_cbor_array(std::size_t(-1), tag_handler);
639
640 // map (0x00..0x17 pairs of data items follow)
641 case 0xA0:
642 case 0xA1:
643 case 0xA2:
644 case 0xA3:
645 case 0xA4:
646 case 0xA5:
647 case 0xA6:
648 case 0xA7:
649 case 0xA8:
650 case 0xA9:
651 case 0xAA:
652 case 0xAB:
653 case 0xAC:
654 case 0xAD:
655 case 0xAE:
656 case 0xAF:
657 case 0xB0:
658 case 0xB1:
659 case 0xB2:
660 case 0xB3:
661 case 0xB4:
662 case 0xB5:
663 case 0xB6:
664 case 0xB7:
665 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
666
667 case 0xB8: // map (one-byte uint8_t for n follows)
668 {
669 std::uint8_t len{};
670 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
671 }
672
673 case 0xB9: // map (two-byte uint16_t for n follow)
674 {
675 std::uint16_t len{};
676 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
677 }
678
679 case 0xBA: // map (four-byte uint32_t for n follow)
680 {
681 std::uint32_t len{};
682 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
683 }
684
685 case 0xBB: // map (eight-byte uint64_t for n follow)
686 {
687 std::uint64_t len{};
688 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
689 }
690
691 case 0xBF: // map (indefinite length)
692 return get_cbor_object(std::size_t(-1), tag_handler);
693
694 case 0xC6: // tagged item
695 case 0xC7:
696 case 0xC8:
697 case 0xC9:
698 case 0xCA:
699 case 0xCB:
700 case 0xCC:
701 case 0xCD:
702 case 0xCE:
703 case 0xCF:
704 case 0xD0:
705 case 0xD1:
706 case 0xD2:
707 case 0xD3:
708 case 0xD4:
709 case 0xD8: // tagged item (1 bytes follow)
710 case 0xD9: // tagged item (2 bytes follow)
711 case 0xDA: // tagged item (4 bytes follow)
712 case 0xDB: // tagged item (8 bytes follow)
713 {
714 switch (tag_handler)
715 {
716 case cbor_tag_handler_t::error:
717 {
718 auto last_token = get_token_string();
719 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
720 }
721
722 case cbor_tag_handler_t::ignore:
723 {
724 switch (current)
725 {
726 case 0xD8:
727 {
728 std::uint8_t len{};
729 get_number(input_format_t::cbor, len);
730 break;
731 }
732 case 0xD9:
733 {
734 std::uint16_t len{};
735 get_number(input_format_t::cbor, len);
736 break;
737 }
738 case 0xDA:
739 {
740 std::uint32_t len{};
741 get_number(input_format_t::cbor, len);
742 break;
743 }
744 case 0xDB:
745 {
746 std::uint64_t len{};
747 get_number(input_format_t::cbor, len);
748 break;
749 }
750 default:
751 break;
752 }
753 return parse_cbor_internal(true, tag_handler);
754 }
755
756 default: // LCOV_EXCL_LINE
757 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
758 return false; // LCOV_EXCL_LINE
759 }
760 }
761
762 case 0xF4: // false
763 return sax->boolean(false);
764
765 case 0xF5: // true
766 return sax->boolean(true);
767
768 case 0xF6: // null
769 return sax->null();
770
771 case 0xF9: // Half-Precision Float (two-byte IEEE 754)
772 {
773 const auto byte1_raw = get();
774 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
775 {
776 return false;
777 }
778 const auto byte2_raw = get();
779 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
780 {
781 return false;
782 }
783
784 const auto byte1 = static_cast<unsigned char>(byte1_raw);
785 const auto byte2 = static_cast<unsigned char>(byte2_raw);
786
787 // code from RFC 7049, Appendix D, Figure 3:
788 // As half-precision floating-point numbers were only added
789 // to IEEE 754 in 2008, today's programming platforms often
790 // still only have limited support for them. It is very
791 // easy to include at least decoding support for them even
792 // without such support. An example of a small decoder for
793 // half-precision floating-point numbers in the C language
794 // is shown in Fig. 3.
795 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
796 const double val = [&half]
797 {
798 const int exp = (half >> 10u) & 0x1Fu;
799 const unsigned int mant = half & 0x3FFu;
800 JSON_ASSERT(0 <= exp&& exp <= 32);
801 JSON_ASSERT(mant <= 1024);
802 switch (exp)
803 {
804 case 0:
805 return std::ldexp(mant, -24);
806 case 31:
807 return (mant == 0)
808 ? std::numeric_limits<double>::infinity()
809 : std::numeric_limits<double>::quiet_NaN();
810 default:
811 return std::ldexp(mant + 1024, exp - 25);
812 }
813 }();
814 return sax->number_float((half & 0x8000u) != 0
815 ? static_cast<number_float_t>(-val)
816 : static_cast<number_float_t>(val), "");
817 }
818
819 case 0xFA: // Single-Precision Float (four-byte IEEE 754)
820 {
821 float number{};
822 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
823 }
824
825 case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
826 {
827 double number{};
828 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
829 }
830
831 default: // anything else (0xFF is handled inside the other types)
832 {
833 auto last_token = get_token_string();
834 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
835 }
836 }
837 }
838
839 /*!
840 @brief reads a CBOR string
841
842 This function first reads starting bytes to determine the expected
843 string length and then copies this number of bytes into a string.
844 Additionally, CBOR's strings with indefinite lengths are supported.
845
846 @param[out] result created string
847
848 @return whether string creation completed
849 */
get_cbor_string(string_t & result)850 bool get_cbor_string(string_t& result)
851 {
852 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
853 {
854 return false;
855 }
856
857 switch (current)
858 {
859 // UTF-8 string (0x00..0x17 bytes follow)
860 case 0x60:
861 case 0x61:
862 case 0x62:
863 case 0x63:
864 case 0x64:
865 case 0x65:
866 case 0x66:
867 case 0x67:
868 case 0x68:
869 case 0x69:
870 case 0x6A:
871 case 0x6B:
872 case 0x6C:
873 case 0x6D:
874 case 0x6E:
875 case 0x6F:
876 case 0x70:
877 case 0x71:
878 case 0x72:
879 case 0x73:
880 case 0x74:
881 case 0x75:
882 case 0x76:
883 case 0x77:
884 {
885 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
886 }
887
888 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
889 {
890 std::uint8_t len{};
891 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
892 }
893
894 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
895 {
896 std::uint16_t len{};
897 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
898 }
899
900 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
901 {
902 std::uint32_t len{};
903 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
904 }
905
906 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
907 {
908 std::uint64_t len{};
909 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
910 }
911
912 case 0x7F: // UTF-8 string (indefinite length)
913 {
914 while (get() != 0xFF)
915 {
916 string_t chunk;
917 if (!get_cbor_string(chunk))
918 {
919 return false;
920 }
921 result.append(chunk);
922 }
923 return true;
924 }
925
926 default:
927 {
928 auto last_token = get_token_string();
929 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"), BasicJsonType()));
930 }
931 }
932 }
933
934 /*!
935 @brief reads a CBOR byte array
936
937 This function first reads starting bytes to determine the expected
938 byte array length and then copies this number of bytes into the byte array.
939 Additionally, CBOR's byte arrays with indefinite lengths are supported.
940
941 @param[out] result created byte array
942
943 @return whether byte array creation completed
944 */
get_cbor_binary(binary_t & result)945 bool get_cbor_binary(binary_t& result)
946 {
947 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
948 {
949 return false;
950 }
951
952 switch (current)
953 {
954 // Binary data (0x00..0x17 bytes follow)
955 case 0x40:
956 case 0x41:
957 case 0x42:
958 case 0x43:
959 case 0x44:
960 case 0x45:
961 case 0x46:
962 case 0x47:
963 case 0x48:
964 case 0x49:
965 case 0x4A:
966 case 0x4B:
967 case 0x4C:
968 case 0x4D:
969 case 0x4E:
970 case 0x4F:
971 case 0x50:
972 case 0x51:
973 case 0x52:
974 case 0x53:
975 case 0x54:
976 case 0x55:
977 case 0x56:
978 case 0x57:
979 {
980 return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
981 }
982
983 case 0x58: // Binary data (one-byte uint8_t for n follows)
984 {
985 std::uint8_t len{};
986 return get_number(input_format_t::cbor, len) &&
987 get_binary(input_format_t::cbor, len, result);
988 }
989
990 case 0x59: // Binary data (two-byte uint16_t for n follow)
991 {
992 std::uint16_t len{};
993 return get_number(input_format_t::cbor, len) &&
994 get_binary(input_format_t::cbor, len, result);
995 }
996
997 case 0x5A: // Binary data (four-byte uint32_t for n follow)
998 {
999 std::uint32_t len{};
1000 return get_number(input_format_t::cbor, len) &&
1001 get_binary(input_format_t::cbor, len, result);
1002 }
1003
1004 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1005 {
1006 std::uint64_t len{};
1007 return get_number(input_format_t::cbor, len) &&
1008 get_binary(input_format_t::cbor, len, result);
1009 }
1010
1011 case 0x5F: // Binary data (indefinite length)
1012 {
1013 while (get() != 0xFF)
1014 {
1015 binary_t chunk;
1016 if (!get_cbor_binary(chunk))
1017 {
1018 return false;
1019 }
1020 result.insert(result.end(), chunk.begin(), chunk.end());
1021 }
1022 return true;
1023 }
1024
1025 default:
1026 {
1027 auto last_token = get_token_string();
1028 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"), BasicJsonType()));
1029 }
1030 }
1031 }
1032
1033 /*!
1034 @param[in] len the length of the array or std::size_t(-1) for an
1035 array of indefinite size
1036 @param[in] tag_handler how CBOR tags should be treated
1037 @return whether array creation completed
1038 */
get_cbor_array(const std::size_t len,const cbor_tag_handler_t tag_handler)1039 bool get_cbor_array(const std::size_t len,
1040 const cbor_tag_handler_t tag_handler)
1041 {
1042 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1043 {
1044 return false;
1045 }
1046
1047 if (len != std::size_t(-1))
1048 {
1049 for (std::size_t i = 0; i < len; ++i)
1050 {
1051 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1052 {
1053 return false;
1054 }
1055 }
1056 }
1057 else
1058 {
1059 while (get() != 0xFF)
1060 {
1061 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1062 {
1063 return false;
1064 }
1065 }
1066 }
1067
1068 return sax->end_array();
1069 }
1070
1071 /*!
1072 @param[in] len the length of the object or std::size_t(-1) for an
1073 object of indefinite size
1074 @param[in] tag_handler how CBOR tags should be treated
1075 @return whether object creation completed
1076 */
get_cbor_object(const std::size_t len,const cbor_tag_handler_t tag_handler)1077 bool get_cbor_object(const std::size_t len,
1078 const cbor_tag_handler_t tag_handler)
1079 {
1080 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1081 {
1082 return false;
1083 }
1084
1085 string_t key;
1086 if (len != std::size_t(-1))
1087 {
1088 for (std::size_t i = 0; i < len; ++i)
1089 {
1090 get();
1091 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1092 {
1093 return false;
1094 }
1095
1096 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1097 {
1098 return false;
1099 }
1100 key.clear();
1101 }
1102 }
1103 else
1104 {
1105 while (get() != 0xFF)
1106 {
1107 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1108 {
1109 return false;
1110 }
1111
1112 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1113 {
1114 return false;
1115 }
1116 key.clear();
1117 }
1118 }
1119
1120 return sax->end_object();
1121 }
1122
1123 /////////////
1124 // MsgPack //
1125 /////////////
1126
1127 /*!
1128 @return whether a valid MessagePack value was passed to the SAX parser
1129 */
parse_msgpack_internal()1130 bool parse_msgpack_internal()
1131 {
1132 switch (get())
1133 {
1134 // EOF
1135 case std::char_traits<char_type>::eof():
1136 return unexpect_eof(input_format_t::msgpack, "value");
1137
1138 // positive fixint
1139 case 0x00:
1140 case 0x01:
1141 case 0x02:
1142 case 0x03:
1143 case 0x04:
1144 case 0x05:
1145 case 0x06:
1146 case 0x07:
1147 case 0x08:
1148 case 0x09:
1149 case 0x0A:
1150 case 0x0B:
1151 case 0x0C:
1152 case 0x0D:
1153 case 0x0E:
1154 case 0x0F:
1155 case 0x10:
1156 case 0x11:
1157 case 0x12:
1158 case 0x13:
1159 case 0x14:
1160 case 0x15:
1161 case 0x16:
1162 case 0x17:
1163 case 0x18:
1164 case 0x19:
1165 case 0x1A:
1166 case 0x1B:
1167 case 0x1C:
1168 case 0x1D:
1169 case 0x1E:
1170 case 0x1F:
1171 case 0x20:
1172 case 0x21:
1173 case 0x22:
1174 case 0x23:
1175 case 0x24:
1176 case 0x25:
1177 case 0x26:
1178 case 0x27:
1179 case 0x28:
1180 case 0x29:
1181 case 0x2A:
1182 case 0x2B:
1183 case 0x2C:
1184 case 0x2D:
1185 case 0x2E:
1186 case 0x2F:
1187 case 0x30:
1188 case 0x31:
1189 case 0x32:
1190 case 0x33:
1191 case 0x34:
1192 case 0x35:
1193 case 0x36:
1194 case 0x37:
1195 case 0x38:
1196 case 0x39:
1197 case 0x3A:
1198 case 0x3B:
1199 case 0x3C:
1200 case 0x3D:
1201 case 0x3E:
1202 case 0x3F:
1203 case 0x40:
1204 case 0x41:
1205 case 0x42:
1206 case 0x43:
1207 case 0x44:
1208 case 0x45:
1209 case 0x46:
1210 case 0x47:
1211 case 0x48:
1212 case 0x49:
1213 case 0x4A:
1214 case 0x4B:
1215 case 0x4C:
1216 case 0x4D:
1217 case 0x4E:
1218 case 0x4F:
1219 case 0x50:
1220 case 0x51:
1221 case 0x52:
1222 case 0x53:
1223 case 0x54:
1224 case 0x55:
1225 case 0x56:
1226 case 0x57:
1227 case 0x58:
1228 case 0x59:
1229 case 0x5A:
1230 case 0x5B:
1231 case 0x5C:
1232 case 0x5D:
1233 case 0x5E:
1234 case 0x5F:
1235 case 0x60:
1236 case 0x61:
1237 case 0x62:
1238 case 0x63:
1239 case 0x64:
1240 case 0x65:
1241 case 0x66:
1242 case 0x67:
1243 case 0x68:
1244 case 0x69:
1245 case 0x6A:
1246 case 0x6B:
1247 case 0x6C:
1248 case 0x6D:
1249 case 0x6E:
1250 case 0x6F:
1251 case 0x70:
1252 case 0x71:
1253 case 0x72:
1254 case 0x73:
1255 case 0x74:
1256 case 0x75:
1257 case 0x76:
1258 case 0x77:
1259 case 0x78:
1260 case 0x79:
1261 case 0x7A:
1262 case 0x7B:
1263 case 0x7C:
1264 case 0x7D:
1265 case 0x7E:
1266 case 0x7F:
1267 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1268
1269 // fixmap
1270 case 0x80:
1271 case 0x81:
1272 case 0x82:
1273 case 0x83:
1274 case 0x84:
1275 case 0x85:
1276 case 0x86:
1277 case 0x87:
1278 case 0x88:
1279 case 0x89:
1280 case 0x8A:
1281 case 0x8B:
1282 case 0x8C:
1283 case 0x8D:
1284 case 0x8E:
1285 case 0x8F:
1286 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1287
1288 // fixarray
1289 case 0x90:
1290 case 0x91:
1291 case 0x92:
1292 case 0x93:
1293 case 0x94:
1294 case 0x95:
1295 case 0x96:
1296 case 0x97:
1297 case 0x98:
1298 case 0x99:
1299 case 0x9A:
1300 case 0x9B:
1301 case 0x9C:
1302 case 0x9D:
1303 case 0x9E:
1304 case 0x9F:
1305 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1306
1307 // fixstr
1308 case 0xA0:
1309 case 0xA1:
1310 case 0xA2:
1311 case 0xA3:
1312 case 0xA4:
1313 case 0xA5:
1314 case 0xA6:
1315 case 0xA7:
1316 case 0xA8:
1317 case 0xA9:
1318 case 0xAA:
1319 case 0xAB:
1320 case 0xAC:
1321 case 0xAD:
1322 case 0xAE:
1323 case 0xAF:
1324 case 0xB0:
1325 case 0xB1:
1326 case 0xB2:
1327 case 0xB3:
1328 case 0xB4:
1329 case 0xB5:
1330 case 0xB6:
1331 case 0xB7:
1332 case 0xB8:
1333 case 0xB9:
1334 case 0xBA:
1335 case 0xBB:
1336 case 0xBC:
1337 case 0xBD:
1338 case 0xBE:
1339 case 0xBF:
1340 case 0xD9: // str 8
1341 case 0xDA: // str 16
1342 case 0xDB: // str 32
1343 {
1344 string_t s;
1345 return get_msgpack_string(s) && sax->string(s);
1346 }
1347
1348 case 0xC0: // nil
1349 return sax->null();
1350
1351 case 0xC2: // false
1352 return sax->boolean(false);
1353
1354 case 0xC3: // true
1355 return sax->boolean(true);
1356
1357 case 0xC4: // bin 8
1358 case 0xC5: // bin 16
1359 case 0xC6: // bin 32
1360 case 0xC7: // ext 8
1361 case 0xC8: // ext 16
1362 case 0xC9: // ext 32
1363 case 0xD4: // fixext 1
1364 case 0xD5: // fixext 2
1365 case 0xD6: // fixext 4
1366 case 0xD7: // fixext 8
1367 case 0xD8: // fixext 16
1368 {
1369 binary_t b;
1370 return get_msgpack_binary(b) && sax->binary(b);
1371 }
1372
1373 case 0xCA: // float 32
1374 {
1375 float number{};
1376 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1377 }
1378
1379 case 0xCB: // float 64
1380 {
1381 double number{};
1382 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1383 }
1384
1385 case 0xCC: // uint 8
1386 {
1387 std::uint8_t number{};
1388 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1389 }
1390
1391 case 0xCD: // uint 16
1392 {
1393 std::uint16_t number{};
1394 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1395 }
1396
1397 case 0xCE: // uint 32
1398 {
1399 std::uint32_t number{};
1400 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1401 }
1402
1403 case 0xCF: // uint 64
1404 {
1405 std::uint64_t number{};
1406 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1407 }
1408
1409 case 0xD0: // int 8
1410 {
1411 std::int8_t number{};
1412 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1413 }
1414
1415 case 0xD1: // int 16
1416 {
1417 std::int16_t number{};
1418 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1419 }
1420
1421 case 0xD2: // int 32
1422 {
1423 std::int32_t number{};
1424 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1425 }
1426
1427 case 0xD3: // int 64
1428 {
1429 std::int64_t number{};
1430 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1431 }
1432
1433 case 0xDC: // array 16
1434 {
1435 std::uint16_t len{};
1436 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1437 }
1438
1439 case 0xDD: // array 32
1440 {
1441 std::uint32_t len{};
1442 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1443 }
1444
1445 case 0xDE: // map 16
1446 {
1447 std::uint16_t len{};
1448 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1449 }
1450
1451 case 0xDF: // map 32
1452 {
1453 std::uint32_t len{};
1454 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1455 }
1456
1457 // negative fixint
1458 case 0xE0:
1459 case 0xE1:
1460 case 0xE2:
1461 case 0xE3:
1462 case 0xE4:
1463 case 0xE5:
1464 case 0xE6:
1465 case 0xE7:
1466 case 0xE8:
1467 case 0xE9:
1468 case 0xEA:
1469 case 0xEB:
1470 case 0xEC:
1471 case 0xED:
1472 case 0xEE:
1473 case 0xEF:
1474 case 0xF0:
1475 case 0xF1:
1476 case 0xF2:
1477 case 0xF3:
1478 case 0xF4:
1479 case 0xF5:
1480 case 0xF6:
1481 case 0xF7:
1482 case 0xF8:
1483 case 0xF9:
1484 case 0xFA:
1485 case 0xFB:
1486 case 0xFC:
1487 case 0xFD:
1488 case 0xFE:
1489 case 0xFF:
1490 return sax->number_integer(static_cast<std::int8_t>(current));
1491
1492 default: // anything else
1493 {
1494 auto last_token = get_token_string();
1495 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
1496 }
1497 }
1498 }
1499
1500 /*!
1501 @brief reads a MessagePack string
1502
1503 This function first reads starting bytes to determine the expected
1504 string length and then copies this number of bytes into a string.
1505
1506 @param[out] result created string
1507
1508 @return whether string creation completed
1509 */
get_msgpack_string(string_t & result)1510 bool get_msgpack_string(string_t& result)
1511 {
1512 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1513 {
1514 return false;
1515 }
1516
1517 switch (current)
1518 {
1519 // fixstr
1520 case 0xA0:
1521 case 0xA1:
1522 case 0xA2:
1523 case 0xA3:
1524 case 0xA4:
1525 case 0xA5:
1526 case 0xA6:
1527 case 0xA7:
1528 case 0xA8:
1529 case 0xA9:
1530 case 0xAA:
1531 case 0xAB:
1532 case 0xAC:
1533 case 0xAD:
1534 case 0xAE:
1535 case 0xAF:
1536 case 0xB0:
1537 case 0xB1:
1538 case 0xB2:
1539 case 0xB3:
1540 case 0xB4:
1541 case 0xB5:
1542 case 0xB6:
1543 case 0xB7:
1544 case 0xB8:
1545 case 0xB9:
1546 case 0xBA:
1547 case 0xBB:
1548 case 0xBC:
1549 case 0xBD:
1550 case 0xBE:
1551 case 0xBF:
1552 {
1553 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1554 }
1555
1556 case 0xD9: // str 8
1557 {
1558 std::uint8_t len{};
1559 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1560 }
1561
1562 case 0xDA: // str 16
1563 {
1564 std::uint16_t len{};
1565 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1566 }
1567
1568 case 0xDB: // str 32
1569 {
1570 std::uint32_t len{};
1571 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1572 }
1573
1574 default:
1575 {
1576 auto last_token = get_token_string();
1577 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"), BasicJsonType()));
1578 }
1579 }
1580 }
1581
1582 /*!
1583 @brief reads a MessagePack byte array
1584
1585 This function first reads starting bytes to determine the expected
1586 byte array length and then copies this number of bytes into a byte array.
1587
1588 @param[out] result created byte array
1589
1590 @return whether byte array creation completed
1591 */
get_msgpack_binary(binary_t & result)1592 bool get_msgpack_binary(binary_t& result)
1593 {
1594 // helper function to set the subtype
1595 auto assign_and_return_true = [&result](std::int8_t subtype)
1596 {
1597 result.set_subtype(static_cast<std::uint8_t>(subtype));
1598 return true;
1599 };
1600
1601 switch (current)
1602 {
1603 case 0xC4: // bin 8
1604 {
1605 std::uint8_t len{};
1606 return get_number(input_format_t::msgpack, len) &&
1607 get_binary(input_format_t::msgpack, len, result);
1608 }
1609
1610 case 0xC5: // bin 16
1611 {
1612 std::uint16_t len{};
1613 return get_number(input_format_t::msgpack, len) &&
1614 get_binary(input_format_t::msgpack, len, result);
1615 }
1616
1617 case 0xC6: // bin 32
1618 {
1619 std::uint32_t len{};
1620 return get_number(input_format_t::msgpack, len) &&
1621 get_binary(input_format_t::msgpack, len, result);
1622 }
1623
1624 case 0xC7: // ext 8
1625 {
1626 std::uint8_t len{};
1627 std::int8_t subtype{};
1628 return get_number(input_format_t::msgpack, len) &&
1629 get_number(input_format_t::msgpack, subtype) &&
1630 get_binary(input_format_t::msgpack, len, result) &&
1631 assign_and_return_true(subtype);
1632 }
1633
1634 case 0xC8: // ext 16
1635 {
1636 std::uint16_t len{};
1637 std::int8_t subtype{};
1638 return get_number(input_format_t::msgpack, len) &&
1639 get_number(input_format_t::msgpack, subtype) &&
1640 get_binary(input_format_t::msgpack, len, result) &&
1641 assign_and_return_true(subtype);
1642 }
1643
1644 case 0xC9: // ext 32
1645 {
1646 std::uint32_t len{};
1647 std::int8_t subtype{};
1648 return get_number(input_format_t::msgpack, len) &&
1649 get_number(input_format_t::msgpack, subtype) &&
1650 get_binary(input_format_t::msgpack, len, result) &&
1651 assign_and_return_true(subtype);
1652 }
1653
1654 case 0xD4: // fixext 1
1655 {
1656 std::int8_t subtype{};
1657 return get_number(input_format_t::msgpack, subtype) &&
1658 get_binary(input_format_t::msgpack, 1, result) &&
1659 assign_and_return_true(subtype);
1660 }
1661
1662 case 0xD5: // fixext 2
1663 {
1664 std::int8_t subtype{};
1665 return get_number(input_format_t::msgpack, subtype) &&
1666 get_binary(input_format_t::msgpack, 2, result) &&
1667 assign_and_return_true(subtype);
1668 }
1669
1670 case 0xD6: // fixext 4
1671 {
1672 std::int8_t subtype{};
1673 return get_number(input_format_t::msgpack, subtype) &&
1674 get_binary(input_format_t::msgpack, 4, result) &&
1675 assign_and_return_true(subtype);
1676 }
1677
1678 case 0xD7: // fixext 8
1679 {
1680 std::int8_t subtype{};
1681 return get_number(input_format_t::msgpack, subtype) &&
1682 get_binary(input_format_t::msgpack, 8, result) &&
1683 assign_and_return_true(subtype);
1684 }
1685
1686 case 0xD8: // fixext 16
1687 {
1688 std::int8_t subtype{};
1689 return get_number(input_format_t::msgpack, subtype) &&
1690 get_binary(input_format_t::msgpack, 16, result) &&
1691 assign_and_return_true(subtype);
1692 }
1693
1694 default: // LCOV_EXCL_LINE
1695 return false; // LCOV_EXCL_LINE
1696 }
1697 }
1698
1699 /*!
1700 @param[in] len the length of the array
1701 @return whether array creation completed
1702 */
get_msgpack_array(const std::size_t len)1703 bool get_msgpack_array(const std::size_t len)
1704 {
1705 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1706 {
1707 return false;
1708 }
1709
1710 for (std::size_t i = 0; i < len; ++i)
1711 {
1712 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1713 {
1714 return false;
1715 }
1716 }
1717
1718 return sax->end_array();
1719 }
1720
1721 /*!
1722 @param[in] len the length of the object
1723 @return whether object creation completed
1724 */
get_msgpack_object(const std::size_t len)1725 bool get_msgpack_object(const std::size_t len)
1726 {
1727 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1728 {
1729 return false;
1730 }
1731
1732 string_t key;
1733 for (std::size_t i = 0; i < len; ++i)
1734 {
1735 get();
1736 if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1737 {
1738 return false;
1739 }
1740
1741 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1742 {
1743 return false;
1744 }
1745 key.clear();
1746 }
1747
1748 return sax->end_object();
1749 }
1750
1751 ////////////
1752 // UBJSON //
1753 ////////////
1754
1755 /*!
1756 @param[in] get_char whether a new character should be retrieved from the
1757 input (true, default) or whether the last read
1758 character should be considered instead
1759
1760 @return whether a valid UBJSON value was passed to the SAX parser
1761 */
parse_ubjson_internal(const bool get_char=true)1762 bool parse_ubjson_internal(const bool get_char = true)
1763 {
1764 return get_ubjson_value(get_char ? get_ignore_noop() : current);
1765 }
1766
1767 /*!
1768 @brief reads a UBJSON string
1769
1770 This function is either called after reading the 'S' byte explicitly
1771 indicating a string, or in case of an object key where the 'S' byte can be
1772 left out.
1773
1774 @param[out] result created string
1775 @param[in] get_char whether a new character should be retrieved from the
1776 input (true, default) or whether the last read
1777 character should be considered instead
1778
1779 @return whether string creation completed
1780 */
get_ubjson_string(string_t & result,const bool get_char=true)1781 bool get_ubjson_string(string_t& result, const bool get_char = true)
1782 {
1783 if (get_char)
1784 {
1785 get(); // TODO(niels): may we ignore N here?
1786 }
1787
1788 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1789 {
1790 return false;
1791 }
1792
1793 switch (current)
1794 {
1795 case 'U':
1796 {
1797 std::uint8_t len{};
1798 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1799 }
1800
1801 case 'i':
1802 {
1803 std::int8_t len{};
1804 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1805 }
1806
1807 case 'I':
1808 {
1809 std::int16_t len{};
1810 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1811 }
1812
1813 case 'l':
1814 {
1815 std::int32_t len{};
1816 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1817 }
1818
1819 case 'L':
1820 {
1821 std::int64_t len{};
1822 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1823 }
1824
1825 default:
1826 auto last_token = get_token_string();
1827 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"), BasicJsonType()));
1828 }
1829 }
1830
1831 /*!
1832 @param[out] result determined size
1833 @return whether size determination completed
1834 */
get_ubjson_size_value(std::size_t & result)1835 bool get_ubjson_size_value(std::size_t& result)
1836 {
1837 switch (get_ignore_noop())
1838 {
1839 case 'U':
1840 {
1841 std::uint8_t number{};
1842 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1843 {
1844 return false;
1845 }
1846 result = static_cast<std::size_t>(number);
1847 return true;
1848 }
1849
1850 case 'i':
1851 {
1852 std::int8_t number{};
1853 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1854 {
1855 return false;
1856 }
1857 result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
1858 return true;
1859 }
1860
1861 case 'I':
1862 {
1863 std::int16_t number{};
1864 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1865 {
1866 return false;
1867 }
1868 result = static_cast<std::size_t>(number);
1869 return true;
1870 }
1871
1872 case 'l':
1873 {
1874 std::int32_t number{};
1875 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1876 {
1877 return false;
1878 }
1879 result = static_cast<std::size_t>(number);
1880 return true;
1881 }
1882
1883 case 'L':
1884 {
1885 std::int64_t number{};
1886 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1887 {
1888 return false;
1889 }
1890 result = static_cast<std::size_t>(number);
1891 return true;
1892 }
1893
1894 default:
1895 {
1896 auto last_token = get_token_string();
1897 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), BasicJsonType()));
1898 }
1899 }
1900 }
1901
1902 /*!
1903 @brief determine the type and size for a container
1904
1905 In the optimized UBJSON format, a type and a size can be provided to allow
1906 for a more compact representation.
1907
1908 @param[out] result pair of the size and the type
1909
1910 @return whether pair creation completed
1911 */
get_ubjson_size_type(std::pair<std::size_t,char_int_type> & result)1912 bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
1913 {
1914 result.first = string_t::npos; // size
1915 result.second = 0; // type
1916
1917 get_ignore_noop();
1918
1919 if (current == '$')
1920 {
1921 result.second = get(); // must not ignore 'N', because 'N' maybe the type
1922 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
1923 {
1924 return false;
1925 }
1926
1927 get_ignore_noop();
1928 if (JSON_HEDLEY_UNLIKELY(current != '#'))
1929 {
1930 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1931 {
1932 return false;
1933 }
1934 auto last_token = get_token_string();
1935 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"), BasicJsonType()));
1936 }
1937
1938 return get_ubjson_size_value(result.first);
1939 }
1940
1941 if (current == '#')
1942 {
1943 return get_ubjson_size_value(result.first);
1944 }
1945
1946 return true;
1947 }
1948
1949 /*!
1950 @param prefix the previously read or set type prefix
1951 @return whether value creation completed
1952 */
get_ubjson_value(const char_int_type prefix)1953 bool get_ubjson_value(const char_int_type prefix)
1954 {
1955 switch (prefix)
1956 {
1957 case std::char_traits<char_type>::eof(): // EOF
1958 return unexpect_eof(input_format_t::ubjson, "value");
1959
1960 case 'T': // true
1961 return sax->boolean(true);
1962 case 'F': // false
1963 return sax->boolean(false);
1964
1965 case 'Z': // null
1966 return sax->null();
1967
1968 case 'U':
1969 {
1970 std::uint8_t number{};
1971 return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
1972 }
1973
1974 case 'i':
1975 {
1976 std::int8_t number{};
1977 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1978 }
1979
1980 case 'I':
1981 {
1982 std::int16_t number{};
1983 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1984 }
1985
1986 case 'l':
1987 {
1988 std::int32_t number{};
1989 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1990 }
1991
1992 case 'L':
1993 {
1994 std::int64_t number{};
1995 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1996 }
1997
1998 case 'd':
1999 {
2000 float number{};
2001 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2002 }
2003
2004 case 'D':
2005 {
2006 double number{};
2007 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2008 }
2009
2010 case 'H':
2011 {
2012 return get_ubjson_high_precision_number();
2013 }
2014
2015 case 'C': // char
2016 {
2017 get();
2018 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
2019 {
2020 return false;
2021 }
2022 if (JSON_HEDLEY_UNLIKELY(current > 127))
2023 {
2024 auto last_token = get_token_string();
2025 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"), BasicJsonType()));
2026 }
2027 string_t s(1, static_cast<typename string_t::value_type>(current));
2028 return sax->string(s);
2029 }
2030
2031 case 'S': // string
2032 {
2033 string_t s;
2034 return get_ubjson_string(s) && sax->string(s);
2035 }
2036
2037 case '[': // array
2038 return get_ubjson_array();
2039
2040 case '{': // object
2041 return get_ubjson_object();
2042
2043 default: // anything else
2044 {
2045 auto last_token = get_token_string();
2046 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
2047 }
2048 }
2049 }
2050
2051 /*!
2052 @return whether array creation completed
2053 */
get_ubjson_array()2054 bool get_ubjson_array()
2055 {
2056 std::pair<std::size_t, char_int_type> size_and_type;
2057 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2058 {
2059 return false;
2060 }
2061
2062 if (size_and_type.first != string_t::npos)
2063 {
2064 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2065 {
2066 return false;
2067 }
2068
2069 if (size_and_type.second != 0)
2070 {
2071 if (size_and_type.second != 'N')
2072 {
2073 for (std::size_t i = 0; i < size_and_type.first; ++i)
2074 {
2075 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2076 {
2077 return false;
2078 }
2079 }
2080 }
2081 }
2082 else
2083 {
2084 for (std::size_t i = 0; i < size_and_type.first; ++i)
2085 {
2086 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2087 {
2088 return false;
2089 }
2090 }
2091 }
2092 }
2093 else
2094 {
2095 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
2096 {
2097 return false;
2098 }
2099
2100 while (current != ']')
2101 {
2102 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2103 {
2104 return false;
2105 }
2106 get_ignore_noop();
2107 }
2108 }
2109
2110 return sax->end_array();
2111 }
2112
2113 /*!
2114 @return whether object creation completed
2115 */
get_ubjson_object()2116 bool get_ubjson_object()
2117 {
2118 std::pair<std::size_t, char_int_type> size_and_type;
2119 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2120 {
2121 return false;
2122 }
2123
2124 string_t key;
2125 if (size_and_type.first != string_t::npos)
2126 {
2127 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2128 {
2129 return false;
2130 }
2131
2132 if (size_and_type.second != 0)
2133 {
2134 for (std::size_t i = 0; i < size_and_type.first; ++i)
2135 {
2136 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2137 {
2138 return false;
2139 }
2140 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2141 {
2142 return false;
2143 }
2144 key.clear();
2145 }
2146 }
2147 else
2148 {
2149 for (std::size_t i = 0; i < size_and_type.first; ++i)
2150 {
2151 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2152 {
2153 return false;
2154 }
2155 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2156 {
2157 return false;
2158 }
2159 key.clear();
2160 }
2161 }
2162 }
2163 else
2164 {
2165 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
2166 {
2167 return false;
2168 }
2169
2170 while (current != '}')
2171 {
2172 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2173 {
2174 return false;
2175 }
2176 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2177 {
2178 return false;
2179 }
2180 get_ignore_noop();
2181 key.clear();
2182 }
2183 }
2184
2185 return sax->end_object();
2186 }
2187
2188 // Note, no reader for UBJSON binary types is implemented because they do
2189 // not exist
2190
get_ubjson_high_precision_number()2191 bool get_ubjson_high_precision_number()
2192 {
2193 // get size of following number string
2194 std::size_t size{};
2195 auto res = get_ubjson_size_value(size);
2196 if (JSON_HEDLEY_UNLIKELY(!res))
2197 {
2198 return res;
2199 }
2200
2201 // get number string
2202 std::vector<char> number_vector;
2203 for (std::size_t i = 0; i < size; ++i)
2204 {
2205 get();
2206 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
2207 {
2208 return false;
2209 }
2210 number_vector.push_back(static_cast<char>(current));
2211 }
2212
2213 // parse number string
2214 using ia_type = decltype(detail::input_adapter(number_vector));
2215 auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false);
2216 const auto result_number = number_lexer.scan();
2217 const auto number_string = number_lexer.get_token_string();
2218 const auto result_remainder = number_lexer.scan();
2219
2220 using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2221
2222 if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2223 {
2224 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
2225 }
2226
2227 switch (result_number)
2228 {
2229 case token_type::value_integer:
2230 return sax->number_integer(number_lexer.get_number_integer());
2231 case token_type::value_unsigned:
2232 return sax->number_unsigned(number_lexer.get_number_unsigned());
2233 case token_type::value_float:
2234 return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2235 default:
2236 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
2237 }
2238 }
2239
2240 ///////////////////////
2241 // Utility functions //
2242 ///////////////////////
2243
2244 /*!
2245 @brief get next character from the input
2246
2247 This function provides the interface to the used input adapter. It does
2248 not throw in case the input reached EOF, but returns a -'ve valued
2249 `std::char_traits<char_type>::eof()` in that case.
2250
2251 @return character read from the input
2252 */
get()2253 char_int_type get()
2254 {
2255 ++chars_read;
2256 return current = ia.get_character();
2257 }
2258
2259 /*!
2260 @return character read from the input after ignoring all 'N' entries
2261 */
get_ignore_noop()2262 char_int_type get_ignore_noop()
2263 {
2264 do
2265 {
2266 get();
2267 }
2268 while (current == 'N');
2269
2270 return current;
2271 }
2272
2273 /*
2274 @brief read a number from the input
2275
2276 @tparam NumberType the type of the number
2277 @param[in] format the current format (for diagnostics)
2278 @param[out] result number of type @a NumberType
2279
2280 @return whether conversion completed
2281
2282 @note This function needs to respect the system's endianess, because
2283 bytes in CBOR, MessagePack, and UBJSON are stored in network order
2284 (big endian) and therefore need reordering on little endian systems.
2285 */
2286 template<typename NumberType, bool InputIsLittleEndian = false>
get_number(const input_format_t format,NumberType & result)2287 bool get_number(const input_format_t format, NumberType& result)
2288 {
2289 // step 1: read input into array with system's byte order
2290 std::array<std::uint8_t, sizeof(NumberType)> vec{};
2291 for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2292 {
2293 get();
2294 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2295 {
2296 return false;
2297 }
2298
2299 // reverse byte order prior to conversion if necessary
2300 if (is_little_endian != InputIsLittleEndian)
2301 {
2302 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2303 }
2304 else
2305 {
2306 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2307 }
2308 }
2309
2310 // step 2: convert array into number of type T and return
2311 std::memcpy(&result, vec.data(), sizeof(NumberType));
2312 return true;
2313 }
2314
2315 /*!
2316 @brief create a string by reading characters from the input
2317
2318 @tparam NumberType the type of the number
2319 @param[in] format the current format (for diagnostics)
2320 @param[in] len number of characters to read
2321 @param[out] result string created by reading @a len bytes
2322
2323 @return whether string creation completed
2324
2325 @note We can not reserve @a len bytes for the result, because @a len
2326 may be too large. Usually, @ref unexpect_eof() detects the end of
2327 the input before we run out of string memory.
2328 */
2329 template<typename NumberType>
get_string(const input_format_t format,const NumberType len,string_t & result)2330 bool get_string(const input_format_t format,
2331 const NumberType len,
2332 string_t& result)
2333 {
2334 bool success = true;
2335 for (NumberType i = 0; i < len; i++)
2336 {
2337 get();
2338 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2339 {
2340 success = false;
2341 break;
2342 }
2343 result.push_back(static_cast<typename string_t::value_type>(current));
2344 }
2345 return success;
2346 }
2347
2348 /*!
2349 @brief create a byte array by reading bytes from the input
2350
2351 @tparam NumberType the type of the number
2352 @param[in] format the current format (for diagnostics)
2353 @param[in] len number of bytes to read
2354 @param[out] result byte array created by reading @a len bytes
2355
2356 @return whether byte array creation completed
2357
2358 @note We can not reserve @a len bytes for the result, because @a len
2359 may be too large. Usually, @ref unexpect_eof() detects the end of
2360 the input before we run out of memory.
2361 */
2362 template<typename NumberType>
get_binary(const input_format_t format,const NumberType len,binary_t & result)2363 bool get_binary(const input_format_t format,
2364 const NumberType len,
2365 binary_t& result)
2366 {
2367 bool success = true;
2368 for (NumberType i = 0; i < len; i++)
2369 {
2370 get();
2371 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2372 {
2373 success = false;
2374 break;
2375 }
2376 result.push_back(static_cast<std::uint8_t>(current));
2377 }
2378 return success;
2379 }
2380
2381 /*!
2382 @param[in] format the current format (for diagnostics)
2383 @param[in] context further context information (for diagnostics)
2384 @return whether the last read character is not EOF
2385 */
2386 JSON_HEDLEY_NON_NULL(3)
unexpect_eof(const input_format_t format,const char * context) const2387 bool unexpect_eof(const input_format_t format, const char* context) const
2388 {
2389 if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2390 {
2391 return sax->parse_error(chars_read, "<end of file>",
2392 parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), BasicJsonType()));
2393 }
2394 return true;
2395 }
2396
2397 /*!
2398 @return a string representation of the last read byte
2399 */
get_token_string() const2400 std::string get_token_string() const
2401 {
2402 std::array<char, 3> cr{{}};
2403 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
2404 return std::string{cr.data()};
2405 }
2406
2407 /*!
2408 @param[in] format the current format
2409 @param[in] detail a detailed error message
2410 @param[in] context further context information
2411 @return a message string to use in the parse_error exceptions
2412 */
exception_message(const input_format_t format,const std::string & detail,const std::string & context) const2413 std::string exception_message(const input_format_t format,
2414 const std::string& detail,
2415 const std::string& context) const
2416 {
2417 std::string error_msg = "syntax error while parsing ";
2418
2419 switch (format)
2420 {
2421 case input_format_t::cbor:
2422 error_msg += "CBOR";
2423 break;
2424
2425 case input_format_t::msgpack:
2426 error_msg += "MessagePack";
2427 break;
2428
2429 case input_format_t::ubjson:
2430 error_msg += "UBJSON";
2431 break;
2432
2433 case input_format_t::bson:
2434 error_msg += "BSON";
2435 break;
2436
2437 default: // LCOV_EXCL_LINE
2438 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2439 }
2440
2441 return error_msg + " " + context + ": " + detail;
2442 }
2443
2444 private:
2445 /// input adapter
2446 InputAdapterType ia;
2447
2448 /// the current character
2449 char_int_type current = std::char_traits<char_type>::eof();
2450
2451 /// the number of characters read
2452 std::size_t chars_read = 0;
2453
2454 /// whether we can assume little endianess
2455 const bool is_little_endian = little_endianess();
2456
2457 /// the SAX parser
2458 json_sax_t* sax = nullptr;
2459 };
2460 } // namespace detail
2461 } // namespace nlohmann
2462