1 #pragma once
2
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cmath> // ldexp
6 #include <cstddef> // size_t
7 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
8 #include <cstdio> // snprintf
9 #include <cstring> // memcpy
10 #include <iterator> // back_inserter
11 #include <limits> // numeric_limits
12 #include <string> // char_traits, string
13 #include <utility> // make_pair, move
14
15 #include <nlohmann/detail/exceptions.hpp>
16 #include <nlohmann/detail/input/input_adapters.hpp>
17 #include <nlohmann/detail/input/json_sax.hpp>
18 #include <nlohmann/detail/input/lexer.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 #include <nlohmann/detail/meta/is_sax.hpp>
21 #include <nlohmann/detail/value_t.hpp>
22
23 namespace nlohmann
24 {
25 namespace detail
26 {
27
28 /// how to treat CBOR tags
29 enum class cbor_tag_handler_t
30 {
31 error, ///< throw a parse_error exception in case of a tag
32 ignore ///< ignore tags
33 };
34
35 /*!
36 @brief determine system byte order
37
38 @return true if and only if system's byte order is little endian
39
40 @note from https://stackoverflow.com/a/1001328/266378
41 */
little_endianess(int num=1)42 static inline bool little_endianess(int num = 1) noexcept
43 {
44 return *reinterpret_cast<char*>(&num) == 1;
45 }
46
47
48 ///////////////////
49 // binary reader //
50 ///////////////////
51
52 /*!
53 @brief deserialization of CBOR, MessagePack, and UBJSON values
54 */
55 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
56 class binary_reader
57 {
58 using number_integer_t = typename BasicJsonType::number_integer_t;
59 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
60 using number_float_t = typename BasicJsonType::number_float_t;
61 using string_t = typename BasicJsonType::string_t;
62 using binary_t = typename BasicJsonType::binary_t;
63 using json_sax_t = SAX;
64 using char_type = typename InputAdapterType::char_type;
65 using char_int_type = typename std::char_traits<char_type>::int_type;
66
67 public:
68 /*!
69 @brief create a binary reader
70
71 @param[in] adapter input adapter to read from
72 */
binary_reader(InputAdapterType && adapter)73 explicit binary_reader(InputAdapterType&& adapter) : ia(std::move(adapter))
74 {
75 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
76 }
77
78 // make class move-only
79 binary_reader(const binary_reader&) = delete;
80 binary_reader(binary_reader&&) = default;
81 binary_reader& operator=(const binary_reader&) = delete;
82 binary_reader& operator=(binary_reader&&) = default;
83 ~binary_reader() = default;
84
85 /*!
86 @param[in] format the binary format to parse
87 @param[in] sax_ a SAX event processor
88 @param[in] strict whether to expect the input to be consumed completed
89 @param[in] tag_handler how to treat CBOR tags
90
91 @return
92 */
93 JSON_HEDLEY_NON_NULL(3)
sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true,const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)94 bool sax_parse(const input_format_t format,
95 json_sax_t* sax_,
96 const bool strict = true,
97 const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
98 {
99 sax = sax_;
100 bool result = false;
101
102 switch (format)
103 {
104 case input_format_t::bson:
105 result = parse_bson_internal();
106 break;
107
108 case input_format_t::cbor:
109 result = parse_cbor_internal(true, tag_handler);
110 break;
111
112 case input_format_t::msgpack:
113 result = parse_msgpack_internal();
114 break;
115
116 case input_format_t::ubjson:
117 result = parse_ubjson_internal();
118 break;
119
120 default: // LCOV_EXCL_LINE
121 JSON_ASSERT(false); // LCOV_EXCL_LINE
122 }
123
124 // strict mode: next byte must be EOF
125 if (result && strict)
126 {
127 if (format == input_format_t::ubjson)
128 {
129 get_ignore_noop();
130 }
131 else
132 {
133 get();
134 }
135
136 if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
137 {
138 return sax->parse_error(chars_read, get_token_string(),
139 parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value")));
140 }
141 }
142
143 return result;
144 }
145
146 private:
147 //////////
148 // BSON //
149 //////////
150
151 /*!
152 @brief Reads in a BSON-object and passes it to the SAX-parser.
153 @return whether a valid BSON-value was passed to the SAX parser
154 */
parse_bson_internal()155 bool parse_bson_internal()
156 {
157 std::int32_t document_size{};
158 get_number<std::int32_t, true>(input_format_t::bson, document_size);
159
160 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
161 {
162 return false;
163 }
164
165 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
166 {
167 return false;
168 }
169
170 return sax->end_object();
171 }
172
173 /*!
174 @brief Parses a C-style string from the BSON input.
175 @param[in, out] result A reference to the string variable where the read
176 string is to be stored.
177 @return `true` if the \x00-byte indicating the end of the string was
178 encountered before the EOF; false` indicates an unexpected EOF.
179 */
get_bson_cstr(string_t & result)180 bool get_bson_cstr(string_t& result)
181 {
182 auto out = std::back_inserter(result);
183 while (true)
184 {
185 get();
186 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
187 {
188 return false;
189 }
190 if (current == 0x00)
191 {
192 return true;
193 }
194 *out++ = static_cast<typename string_t::value_type>(current);
195 }
196 }
197
198 /*!
199 @brief Parses a zero-terminated string of length @a len from the BSON
200 input.
201 @param[in] len The length (including the zero-byte at the end) of the
202 string to be read.
203 @param[in, out] result A reference to the string variable where the read
204 string is to be stored.
205 @tparam NumberType The type of the length @a len
206 @pre len >= 1
207 @return `true` if the string was successfully parsed
208 */
209 template<typename NumberType>
get_bson_string(const NumberType len,string_t & result)210 bool get_bson_string(const NumberType len, string_t& result)
211 {
212 if (JSON_HEDLEY_UNLIKELY(len < 1))
213 {
214 auto last_token = get_token_string();
215 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string")));
216 }
217
218 return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
219 }
220
221 /*!
222 @brief Parses a byte array input of length @a len from the BSON input.
223 @param[in] len The length of the byte array to be read.
224 @param[in, out] result A reference to the binary variable where the read
225 array is to be stored.
226 @tparam NumberType The type of the length @a len
227 @pre len >= 0
228 @return `true` if the byte array was successfully parsed
229 */
230 template<typename NumberType>
get_bson_binary(const NumberType len,binary_t & result)231 bool get_bson_binary(const NumberType len, binary_t& result)
232 {
233 if (JSON_HEDLEY_UNLIKELY(len < 0))
234 {
235 auto last_token = get_token_string();
236 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary")));
237 }
238
239 // All BSON binary values have a subtype
240 std::uint8_t subtype{};
241 get_number<std::uint8_t>(input_format_t::bson, subtype);
242 result.set_subtype(subtype);
243
244 return get_binary(input_format_t::bson, len, result);
245 }
246
247 /*!
248 @brief Read a BSON document element of the given @a element_type.
249 @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
250 @param[in] element_type_parse_position The position in the input stream,
251 where the `element_type` was read.
252 @warning Not all BSON element types are supported yet. An unsupported
253 @a element_type will give rise to a parse_error.114:
254 Unsupported BSON record type 0x...
255 @return whether a valid BSON-object/array was passed to the SAX parser
256 */
parse_bson_element_internal(const char_int_type element_type,const std::size_t element_type_parse_position)257 bool parse_bson_element_internal(const char_int_type element_type,
258 const std::size_t element_type_parse_position)
259 {
260 switch (element_type)
261 {
262 case 0x01: // double
263 {
264 double number{};
265 return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
266 }
267
268 case 0x02: // string
269 {
270 std::int32_t len{};
271 string_t value;
272 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
273 }
274
275 case 0x03: // object
276 {
277 return parse_bson_internal();
278 }
279
280 case 0x04: // array
281 {
282 return parse_bson_array();
283 }
284
285 case 0x05: // binary
286 {
287 std::int32_t len{};
288 binary_t value;
289 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
290 }
291
292 case 0x08: // boolean
293 {
294 return sax->boolean(get() != 0);
295 }
296
297 case 0x0A: // null
298 {
299 return sax->null();
300 }
301
302 case 0x10: // int32
303 {
304 std::int32_t value{};
305 return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
306 }
307
308 case 0x12: // int64
309 {
310 std::int64_t value{};
311 return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
312 }
313
314 default: // anything else not supported (yet)
315 {
316 std::array<char, 3> cr{{}};
317 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type));
318 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data())));
319 }
320 }
321 }
322
323 /*!
324 @brief Read a BSON element list (as specified in the BSON-spec)
325
326 The same binary layout is used for objects and arrays, hence it must be
327 indicated with the argument @a is_array which one is expected
328 (true --> array, false --> object).
329
330 @param[in] is_array Determines if the element list being read is to be
331 treated as an object (@a is_array == false), or as an
332 array (@a is_array == true).
333 @return whether a valid BSON-object/array was passed to the SAX parser
334 */
parse_bson_element_list(const bool is_array)335 bool parse_bson_element_list(const bool is_array)
336 {
337 string_t key;
338
339 while (auto element_type = get())
340 {
341 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
342 {
343 return false;
344 }
345
346 const std::size_t element_type_parse_position = chars_read;
347 if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
348 {
349 return false;
350 }
351
352 if (!is_array && !sax->key(key))
353 {
354 return false;
355 }
356
357 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
358 {
359 return false;
360 }
361
362 // get_bson_cstr only appends
363 key.clear();
364 }
365
366 return true;
367 }
368
369 /*!
370 @brief Reads an array from the BSON input and passes it to the SAX-parser.
371 @return whether a valid BSON-array was passed to the SAX parser
372 */
parse_bson_array()373 bool parse_bson_array()
374 {
375 std::int32_t document_size{};
376 get_number<std::int32_t, true>(input_format_t::bson, document_size);
377
378 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
379 {
380 return false;
381 }
382
383 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
384 {
385 return false;
386 }
387
388 return sax->end_array();
389 }
390
391 //////////
392 // CBOR //
393 //////////
394
395 /*!
396 @param[in] get_char whether a new character should be retrieved from the
397 input (true) or whether the last read character should
398 be considered instead (false)
399 @param[in] tag_handler how CBOR tags should be treated
400
401 @return whether a valid CBOR value was passed to the SAX parser
402 */
parse_cbor_internal(const bool get_char,const cbor_tag_handler_t tag_handler)403 bool parse_cbor_internal(const bool get_char,
404 const cbor_tag_handler_t tag_handler)
405 {
406 switch (get_char ? get() : current)
407 {
408 // EOF
409 case std::char_traits<char_type>::eof():
410 return unexpect_eof(input_format_t::cbor, "value");
411
412 // Integer 0x00..0x17 (0..23)
413 case 0x00:
414 case 0x01:
415 case 0x02:
416 case 0x03:
417 case 0x04:
418 case 0x05:
419 case 0x06:
420 case 0x07:
421 case 0x08:
422 case 0x09:
423 case 0x0A:
424 case 0x0B:
425 case 0x0C:
426 case 0x0D:
427 case 0x0E:
428 case 0x0F:
429 case 0x10:
430 case 0x11:
431 case 0x12:
432 case 0x13:
433 case 0x14:
434 case 0x15:
435 case 0x16:
436 case 0x17:
437 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
438
439 case 0x18: // Unsigned integer (one-byte uint8_t follows)
440 {
441 std::uint8_t number{};
442 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
443 }
444
445 case 0x19: // Unsigned integer (two-byte uint16_t follows)
446 {
447 std::uint16_t number{};
448 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
449 }
450
451 case 0x1A: // Unsigned integer (four-byte uint32_t follows)
452 {
453 std::uint32_t number{};
454 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
455 }
456
457 case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
458 {
459 std::uint64_t number{};
460 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
461 }
462
463 // Negative integer -1-0x00..-1-0x17 (-1..-24)
464 case 0x20:
465 case 0x21:
466 case 0x22:
467 case 0x23:
468 case 0x24:
469 case 0x25:
470 case 0x26:
471 case 0x27:
472 case 0x28:
473 case 0x29:
474 case 0x2A:
475 case 0x2B:
476 case 0x2C:
477 case 0x2D:
478 case 0x2E:
479 case 0x2F:
480 case 0x30:
481 case 0x31:
482 case 0x32:
483 case 0x33:
484 case 0x34:
485 case 0x35:
486 case 0x36:
487 case 0x37:
488 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
489
490 case 0x38: // Negative integer (one-byte uint8_t follows)
491 {
492 std::uint8_t number{};
493 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
494 }
495
496 case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
497 {
498 std::uint16_t number{};
499 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
500 }
501
502 case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
503 {
504 std::uint32_t number{};
505 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
506 }
507
508 case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
509 {
510 std::uint64_t number{};
511 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
512 - static_cast<number_integer_t>(number));
513 }
514
515 // Binary data (0x00..0x17 bytes follow)
516 case 0x40:
517 case 0x41:
518 case 0x42:
519 case 0x43:
520 case 0x44:
521 case 0x45:
522 case 0x46:
523 case 0x47:
524 case 0x48:
525 case 0x49:
526 case 0x4A:
527 case 0x4B:
528 case 0x4C:
529 case 0x4D:
530 case 0x4E:
531 case 0x4F:
532 case 0x50:
533 case 0x51:
534 case 0x52:
535 case 0x53:
536 case 0x54:
537 case 0x55:
538 case 0x56:
539 case 0x57:
540 case 0x58: // Binary data (one-byte uint8_t for n follows)
541 case 0x59: // Binary data (two-byte uint16_t for n follow)
542 case 0x5A: // Binary data (four-byte uint32_t for n follow)
543 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
544 case 0x5F: // Binary data (indefinite length)
545 {
546 binary_t b;
547 return get_cbor_binary(b) && sax->binary(b);
548 }
549
550 // UTF-8 string (0x00..0x17 bytes follow)
551 case 0x60:
552 case 0x61:
553 case 0x62:
554 case 0x63:
555 case 0x64:
556 case 0x65:
557 case 0x66:
558 case 0x67:
559 case 0x68:
560 case 0x69:
561 case 0x6A:
562 case 0x6B:
563 case 0x6C:
564 case 0x6D:
565 case 0x6E:
566 case 0x6F:
567 case 0x70:
568 case 0x71:
569 case 0x72:
570 case 0x73:
571 case 0x74:
572 case 0x75:
573 case 0x76:
574 case 0x77:
575 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
576 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
577 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
578 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
579 case 0x7F: // UTF-8 string (indefinite length)
580 {
581 string_t s;
582 return get_cbor_string(s) && sax->string(s);
583 }
584
585 // array (0x00..0x17 data items follow)
586 case 0x80:
587 case 0x81:
588 case 0x82:
589 case 0x83:
590 case 0x84:
591 case 0x85:
592 case 0x86:
593 case 0x87:
594 case 0x88:
595 case 0x89:
596 case 0x8A:
597 case 0x8B:
598 case 0x8C:
599 case 0x8D:
600 case 0x8E:
601 case 0x8F:
602 case 0x90:
603 case 0x91:
604 case 0x92:
605 case 0x93:
606 case 0x94:
607 case 0x95:
608 case 0x96:
609 case 0x97:
610 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
611
612 case 0x98: // array (one-byte uint8_t for n follows)
613 {
614 std::uint8_t len{};
615 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
616 }
617
618 case 0x99: // array (two-byte uint16_t for n follow)
619 {
620 std::uint16_t len{};
621 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
622 }
623
624 case 0x9A: // array (four-byte uint32_t for n follow)
625 {
626 std::uint32_t len{};
627 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
628 }
629
630 case 0x9B: // array (eight-byte uint64_t for n follow)
631 {
632 std::uint64_t len{};
633 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
634 }
635
636 case 0x9F: // array (indefinite length)
637 return get_cbor_array(std::size_t(-1), tag_handler);
638
639 // map (0x00..0x17 pairs of data items follow)
640 case 0xA0:
641 case 0xA1:
642 case 0xA2:
643 case 0xA3:
644 case 0xA4:
645 case 0xA5:
646 case 0xA6:
647 case 0xA7:
648 case 0xA8:
649 case 0xA9:
650 case 0xAA:
651 case 0xAB:
652 case 0xAC:
653 case 0xAD:
654 case 0xAE:
655 case 0xAF:
656 case 0xB0:
657 case 0xB1:
658 case 0xB2:
659 case 0xB3:
660 case 0xB4:
661 case 0xB5:
662 case 0xB6:
663 case 0xB7:
664 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
665
666 case 0xB8: // map (one-byte uint8_t for n follows)
667 {
668 std::uint8_t len{};
669 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
670 }
671
672 case 0xB9: // map (two-byte uint16_t for n follow)
673 {
674 std::uint16_t len{};
675 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
676 }
677
678 case 0xBA: // map (four-byte uint32_t for n follow)
679 {
680 std::uint32_t len{};
681 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
682 }
683
684 case 0xBB: // map (eight-byte uint64_t for n follow)
685 {
686 std::uint64_t len{};
687 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
688 }
689
690 case 0xBF: // map (indefinite length)
691 return get_cbor_object(std::size_t(-1), tag_handler);
692
693 case 0xC6: // tagged item
694 case 0xC7:
695 case 0xC8:
696 case 0xC9:
697 case 0xCA:
698 case 0xCB:
699 case 0xCC:
700 case 0xCD:
701 case 0xCE:
702 case 0xCF:
703 case 0xD0:
704 case 0xD1:
705 case 0xD2:
706 case 0xD3:
707 case 0xD4:
708 case 0xD8: // tagged item (1 bytes follow)
709 case 0xD9: // tagged item (2 bytes follow)
710 case 0xDA: // tagged item (4 bytes follow)
711 case 0xDB: // tagged item (8 bytes follow)
712 {
713 switch (tag_handler)
714 {
715 case cbor_tag_handler_t::error:
716 {
717 auto last_token = get_token_string();
718 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
719 }
720
721 case cbor_tag_handler_t::ignore:
722 {
723 switch (current)
724 {
725 case 0xD8:
726 {
727 std::uint8_t len{};
728 get_number(input_format_t::cbor, len);
729 break;
730 }
731 case 0xD9:
732 {
733 std::uint16_t len{};
734 get_number(input_format_t::cbor, len);
735 break;
736 }
737 case 0xDA:
738 {
739 std::uint32_t len{};
740 get_number(input_format_t::cbor, len);
741 break;
742 }
743 case 0xDB:
744 {
745 std::uint64_t len{};
746 get_number(input_format_t::cbor, len);
747 break;
748 }
749 default:
750 break;
751 }
752 return parse_cbor_internal(true, tag_handler);
753 }
754
755 default: // LCOV_EXCL_LINE
756 JSON_ASSERT(false); // LCOV_EXCL_LINE
757 }
758 }
759
760 case 0xF4: // false
761 return sax->boolean(false);
762
763 case 0xF5: // true
764 return sax->boolean(true);
765
766 case 0xF6: // null
767 return sax->null();
768
769 case 0xF9: // Half-Precision Float (two-byte IEEE 754)
770 {
771 const auto byte1_raw = get();
772 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
773 {
774 return false;
775 }
776 const auto byte2_raw = get();
777 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
778 {
779 return false;
780 }
781
782 const auto byte1 = static_cast<unsigned char>(byte1_raw);
783 const auto byte2 = static_cast<unsigned char>(byte2_raw);
784
785 // code from RFC 7049, Appendix D, Figure 3:
786 // As half-precision floating-point numbers were only added
787 // to IEEE 754 in 2008, today's programming platforms often
788 // still only have limited support for them. It is very
789 // easy to include at least decoding support for them even
790 // without such support. An example of a small decoder for
791 // half-precision floating-point numbers in the C language
792 // is shown in Fig. 3.
793 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
794 const double val = [&half]
795 {
796 const int exp = (half >> 10u) & 0x1Fu;
797 const unsigned int mant = half & 0x3FFu;
798 JSON_ASSERT(0 <= exp&& exp <= 32);
799 JSON_ASSERT(mant <= 1024);
800 switch (exp)
801 {
802 case 0:
803 return std::ldexp(mant, -24);
804 case 31:
805 return (mant == 0)
806 ? std::numeric_limits<double>::infinity()
807 : std::numeric_limits<double>::quiet_NaN();
808 default:
809 return std::ldexp(mant + 1024, exp - 25);
810 }
811 }();
812 return sax->number_float((half & 0x8000u) != 0
813 ? static_cast<number_float_t>(-val)
814 : static_cast<number_float_t>(val), "");
815 }
816
817 case 0xFA: // Single-Precision Float (four-byte IEEE 754)
818 {
819 float number{};
820 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
821 }
822
823 case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
824 {
825 double number{};
826 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
827 }
828
829 default: // anything else (0xFF is handled inside the other types)
830 {
831 auto last_token = get_token_string();
832 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
833 }
834 }
835 }
836
837 /*!
838 @brief reads a CBOR string
839
840 This function first reads starting bytes to determine the expected
841 string length and then copies this number of bytes into a string.
842 Additionally, CBOR's strings with indefinite lengths are supported.
843
844 @param[out] result created string
845
846 @return whether string creation completed
847 */
get_cbor_string(string_t & result)848 bool get_cbor_string(string_t& result)
849 {
850 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
851 {
852 return false;
853 }
854
855 switch (current)
856 {
857 // UTF-8 string (0x00..0x17 bytes follow)
858 case 0x60:
859 case 0x61:
860 case 0x62:
861 case 0x63:
862 case 0x64:
863 case 0x65:
864 case 0x66:
865 case 0x67:
866 case 0x68:
867 case 0x69:
868 case 0x6A:
869 case 0x6B:
870 case 0x6C:
871 case 0x6D:
872 case 0x6E:
873 case 0x6F:
874 case 0x70:
875 case 0x71:
876 case 0x72:
877 case 0x73:
878 case 0x74:
879 case 0x75:
880 case 0x76:
881 case 0x77:
882 {
883 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
884 }
885
886 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
887 {
888 std::uint8_t len{};
889 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
890 }
891
892 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
893 {
894 std::uint16_t len{};
895 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
896 }
897
898 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
899 {
900 std::uint32_t len{};
901 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
902 }
903
904 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
905 {
906 std::uint64_t len{};
907 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
908 }
909
910 case 0x7F: // UTF-8 string (indefinite length)
911 {
912 while (get() != 0xFF)
913 {
914 string_t chunk;
915 if (!get_cbor_string(chunk))
916 {
917 return false;
918 }
919 result.append(chunk);
920 }
921 return true;
922 }
923
924 default:
925 {
926 auto last_token = get_token_string();
927 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
928 }
929 }
930 }
931
932 /*!
933 @brief reads a CBOR byte array
934
935 This function first reads starting bytes to determine the expected
936 byte array length and then copies this number of bytes into the byte array.
937 Additionally, CBOR's byte arrays with indefinite lengths are supported.
938
939 @param[out] result created byte array
940
941 @return whether byte array creation completed
942 */
get_cbor_binary(binary_t & result)943 bool get_cbor_binary(binary_t& result)
944 {
945 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
946 {
947 return false;
948 }
949
950 switch (current)
951 {
952 // Binary data (0x00..0x17 bytes follow)
953 case 0x40:
954 case 0x41:
955 case 0x42:
956 case 0x43:
957 case 0x44:
958 case 0x45:
959 case 0x46:
960 case 0x47:
961 case 0x48:
962 case 0x49:
963 case 0x4A:
964 case 0x4B:
965 case 0x4C:
966 case 0x4D:
967 case 0x4E:
968 case 0x4F:
969 case 0x50:
970 case 0x51:
971 case 0x52:
972 case 0x53:
973 case 0x54:
974 case 0x55:
975 case 0x56:
976 case 0x57:
977 {
978 return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
979 }
980
981 case 0x58: // Binary data (one-byte uint8_t for n follows)
982 {
983 std::uint8_t len{};
984 return get_number(input_format_t::cbor, len) &&
985 get_binary(input_format_t::cbor, len, result);
986 }
987
988 case 0x59: // Binary data (two-byte uint16_t for n follow)
989 {
990 std::uint16_t len{};
991 return get_number(input_format_t::cbor, len) &&
992 get_binary(input_format_t::cbor, len, result);
993 }
994
995 case 0x5A: // Binary data (four-byte uint32_t for n follow)
996 {
997 std::uint32_t len{};
998 return get_number(input_format_t::cbor, len) &&
999 get_binary(input_format_t::cbor, len, result);
1000 }
1001
1002 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1003 {
1004 std::uint64_t len{};
1005 return get_number(input_format_t::cbor, len) &&
1006 get_binary(input_format_t::cbor, len, result);
1007 }
1008
1009 case 0x5F: // Binary data (indefinite length)
1010 {
1011 while (get() != 0xFF)
1012 {
1013 binary_t chunk;
1014 if (!get_cbor_binary(chunk))
1015 {
1016 return false;
1017 }
1018 result.insert(result.end(), chunk.begin(), chunk.end());
1019 }
1020 return true;
1021 }
1022
1023 default:
1024 {
1025 auto last_token = get_token_string();
1026 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary")));
1027 }
1028 }
1029 }
1030
1031 /*!
1032 @param[in] len the length of the array or std::size_t(-1) for an
1033 array of indefinite size
1034 @param[in] tag_handler how CBOR tags should be treated
1035 @return whether array creation completed
1036 */
get_cbor_array(const std::size_t len,const cbor_tag_handler_t tag_handler)1037 bool get_cbor_array(const std::size_t len,
1038 const cbor_tag_handler_t tag_handler)
1039 {
1040 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1041 {
1042 return false;
1043 }
1044
1045 if (len != std::size_t(-1))
1046 {
1047 for (std::size_t i = 0; i < len; ++i)
1048 {
1049 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1050 {
1051 return false;
1052 }
1053 }
1054 }
1055 else
1056 {
1057 while (get() != 0xFF)
1058 {
1059 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1060 {
1061 return false;
1062 }
1063 }
1064 }
1065
1066 return sax->end_array();
1067 }
1068
1069 /*!
1070 @param[in] len the length of the object or std::size_t(-1) for an
1071 object of indefinite size
1072 @param[in] tag_handler how CBOR tags should be treated
1073 @return whether object creation completed
1074 */
get_cbor_object(const std::size_t len,const cbor_tag_handler_t tag_handler)1075 bool get_cbor_object(const std::size_t len,
1076 const cbor_tag_handler_t tag_handler)
1077 {
1078 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1079 {
1080 return false;
1081 }
1082
1083 string_t key;
1084 if (len != std::size_t(-1))
1085 {
1086 for (std::size_t i = 0; i < len; ++i)
1087 {
1088 get();
1089 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1090 {
1091 return false;
1092 }
1093
1094 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1095 {
1096 return false;
1097 }
1098 key.clear();
1099 }
1100 }
1101 else
1102 {
1103 while (get() != 0xFF)
1104 {
1105 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1106 {
1107 return false;
1108 }
1109
1110 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1111 {
1112 return false;
1113 }
1114 key.clear();
1115 }
1116 }
1117
1118 return sax->end_object();
1119 }
1120
1121 /////////////
1122 // MsgPack //
1123 /////////////
1124
1125 /*!
1126 @return whether a valid MessagePack value was passed to the SAX parser
1127 */
parse_msgpack_internal()1128 bool parse_msgpack_internal()
1129 {
1130 switch (get())
1131 {
1132 // EOF
1133 case std::char_traits<char_type>::eof():
1134 return unexpect_eof(input_format_t::msgpack, "value");
1135
1136 // positive fixint
1137 case 0x00:
1138 case 0x01:
1139 case 0x02:
1140 case 0x03:
1141 case 0x04:
1142 case 0x05:
1143 case 0x06:
1144 case 0x07:
1145 case 0x08:
1146 case 0x09:
1147 case 0x0A:
1148 case 0x0B:
1149 case 0x0C:
1150 case 0x0D:
1151 case 0x0E:
1152 case 0x0F:
1153 case 0x10:
1154 case 0x11:
1155 case 0x12:
1156 case 0x13:
1157 case 0x14:
1158 case 0x15:
1159 case 0x16:
1160 case 0x17:
1161 case 0x18:
1162 case 0x19:
1163 case 0x1A:
1164 case 0x1B:
1165 case 0x1C:
1166 case 0x1D:
1167 case 0x1E:
1168 case 0x1F:
1169 case 0x20:
1170 case 0x21:
1171 case 0x22:
1172 case 0x23:
1173 case 0x24:
1174 case 0x25:
1175 case 0x26:
1176 case 0x27:
1177 case 0x28:
1178 case 0x29:
1179 case 0x2A:
1180 case 0x2B:
1181 case 0x2C:
1182 case 0x2D:
1183 case 0x2E:
1184 case 0x2F:
1185 case 0x30:
1186 case 0x31:
1187 case 0x32:
1188 case 0x33:
1189 case 0x34:
1190 case 0x35:
1191 case 0x36:
1192 case 0x37:
1193 case 0x38:
1194 case 0x39:
1195 case 0x3A:
1196 case 0x3B:
1197 case 0x3C:
1198 case 0x3D:
1199 case 0x3E:
1200 case 0x3F:
1201 case 0x40:
1202 case 0x41:
1203 case 0x42:
1204 case 0x43:
1205 case 0x44:
1206 case 0x45:
1207 case 0x46:
1208 case 0x47:
1209 case 0x48:
1210 case 0x49:
1211 case 0x4A:
1212 case 0x4B:
1213 case 0x4C:
1214 case 0x4D:
1215 case 0x4E:
1216 case 0x4F:
1217 case 0x50:
1218 case 0x51:
1219 case 0x52:
1220 case 0x53:
1221 case 0x54:
1222 case 0x55:
1223 case 0x56:
1224 case 0x57:
1225 case 0x58:
1226 case 0x59:
1227 case 0x5A:
1228 case 0x5B:
1229 case 0x5C:
1230 case 0x5D:
1231 case 0x5E:
1232 case 0x5F:
1233 case 0x60:
1234 case 0x61:
1235 case 0x62:
1236 case 0x63:
1237 case 0x64:
1238 case 0x65:
1239 case 0x66:
1240 case 0x67:
1241 case 0x68:
1242 case 0x69:
1243 case 0x6A:
1244 case 0x6B:
1245 case 0x6C:
1246 case 0x6D:
1247 case 0x6E:
1248 case 0x6F:
1249 case 0x70:
1250 case 0x71:
1251 case 0x72:
1252 case 0x73:
1253 case 0x74:
1254 case 0x75:
1255 case 0x76:
1256 case 0x77:
1257 case 0x78:
1258 case 0x79:
1259 case 0x7A:
1260 case 0x7B:
1261 case 0x7C:
1262 case 0x7D:
1263 case 0x7E:
1264 case 0x7F:
1265 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1266
1267 // fixmap
1268 case 0x80:
1269 case 0x81:
1270 case 0x82:
1271 case 0x83:
1272 case 0x84:
1273 case 0x85:
1274 case 0x86:
1275 case 0x87:
1276 case 0x88:
1277 case 0x89:
1278 case 0x8A:
1279 case 0x8B:
1280 case 0x8C:
1281 case 0x8D:
1282 case 0x8E:
1283 case 0x8F:
1284 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1285
1286 // fixarray
1287 case 0x90:
1288 case 0x91:
1289 case 0x92:
1290 case 0x93:
1291 case 0x94:
1292 case 0x95:
1293 case 0x96:
1294 case 0x97:
1295 case 0x98:
1296 case 0x99:
1297 case 0x9A:
1298 case 0x9B:
1299 case 0x9C:
1300 case 0x9D:
1301 case 0x9E:
1302 case 0x9F:
1303 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1304
1305 // fixstr
1306 case 0xA0:
1307 case 0xA1:
1308 case 0xA2:
1309 case 0xA3:
1310 case 0xA4:
1311 case 0xA5:
1312 case 0xA6:
1313 case 0xA7:
1314 case 0xA8:
1315 case 0xA9:
1316 case 0xAA:
1317 case 0xAB:
1318 case 0xAC:
1319 case 0xAD:
1320 case 0xAE:
1321 case 0xAF:
1322 case 0xB0:
1323 case 0xB1:
1324 case 0xB2:
1325 case 0xB3:
1326 case 0xB4:
1327 case 0xB5:
1328 case 0xB6:
1329 case 0xB7:
1330 case 0xB8:
1331 case 0xB9:
1332 case 0xBA:
1333 case 0xBB:
1334 case 0xBC:
1335 case 0xBD:
1336 case 0xBE:
1337 case 0xBF:
1338 case 0xD9: // str 8
1339 case 0xDA: // str 16
1340 case 0xDB: // str 32
1341 {
1342 string_t s;
1343 return get_msgpack_string(s) && sax->string(s);
1344 }
1345
1346 case 0xC0: // nil
1347 return sax->null();
1348
1349 case 0xC2: // false
1350 return sax->boolean(false);
1351
1352 case 0xC3: // true
1353 return sax->boolean(true);
1354
1355 case 0xC4: // bin 8
1356 case 0xC5: // bin 16
1357 case 0xC6: // bin 32
1358 case 0xC7: // ext 8
1359 case 0xC8: // ext 16
1360 case 0xC9: // ext 32
1361 case 0xD4: // fixext 1
1362 case 0xD5: // fixext 2
1363 case 0xD6: // fixext 4
1364 case 0xD7: // fixext 8
1365 case 0xD8: // fixext 16
1366 {
1367 binary_t b;
1368 return get_msgpack_binary(b) && sax->binary(b);
1369 }
1370
1371 case 0xCA: // float 32
1372 {
1373 float number{};
1374 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1375 }
1376
1377 case 0xCB: // float 64
1378 {
1379 double number{};
1380 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1381 }
1382
1383 case 0xCC: // uint 8
1384 {
1385 std::uint8_t number{};
1386 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1387 }
1388
1389 case 0xCD: // uint 16
1390 {
1391 std::uint16_t number{};
1392 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1393 }
1394
1395 case 0xCE: // uint 32
1396 {
1397 std::uint32_t number{};
1398 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1399 }
1400
1401 case 0xCF: // uint 64
1402 {
1403 std::uint64_t number{};
1404 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1405 }
1406
1407 case 0xD0: // int 8
1408 {
1409 std::int8_t number{};
1410 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1411 }
1412
1413 case 0xD1: // int 16
1414 {
1415 std::int16_t number{};
1416 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1417 }
1418
1419 case 0xD2: // int 32
1420 {
1421 std::int32_t number{};
1422 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1423 }
1424
1425 case 0xD3: // int 64
1426 {
1427 std::int64_t number{};
1428 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1429 }
1430
1431 case 0xDC: // array 16
1432 {
1433 std::uint16_t len{};
1434 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1435 }
1436
1437 case 0xDD: // array 32
1438 {
1439 std::uint32_t len{};
1440 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1441 }
1442
1443 case 0xDE: // map 16
1444 {
1445 std::uint16_t len{};
1446 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1447 }
1448
1449 case 0xDF: // map 32
1450 {
1451 std::uint32_t len{};
1452 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1453 }
1454
1455 // negative fixint
1456 case 0xE0:
1457 case 0xE1:
1458 case 0xE2:
1459 case 0xE3:
1460 case 0xE4:
1461 case 0xE5:
1462 case 0xE6:
1463 case 0xE7:
1464 case 0xE8:
1465 case 0xE9:
1466 case 0xEA:
1467 case 0xEB:
1468 case 0xEC:
1469 case 0xED:
1470 case 0xEE:
1471 case 0xEF:
1472 case 0xF0:
1473 case 0xF1:
1474 case 0xF2:
1475 case 0xF3:
1476 case 0xF4:
1477 case 0xF5:
1478 case 0xF6:
1479 case 0xF7:
1480 case 0xF8:
1481 case 0xF9:
1482 case 0xFA:
1483 case 0xFB:
1484 case 0xFC:
1485 case 0xFD:
1486 case 0xFE:
1487 case 0xFF:
1488 return sax->number_integer(static_cast<std::int8_t>(current));
1489
1490 default: // anything else
1491 {
1492 auto last_token = get_token_string();
1493 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
1494 }
1495 }
1496 }
1497
1498 /*!
1499 @brief reads a MessagePack string
1500
1501 This function first reads starting bytes to determine the expected
1502 string length and then copies this number of bytes into a string.
1503
1504 @param[out] result created string
1505
1506 @return whether string creation completed
1507 */
get_msgpack_string(string_t & result)1508 bool get_msgpack_string(string_t& result)
1509 {
1510 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1511 {
1512 return false;
1513 }
1514
1515 switch (current)
1516 {
1517 // fixstr
1518 case 0xA0:
1519 case 0xA1:
1520 case 0xA2:
1521 case 0xA3:
1522 case 0xA4:
1523 case 0xA5:
1524 case 0xA6:
1525 case 0xA7:
1526 case 0xA8:
1527 case 0xA9:
1528 case 0xAA:
1529 case 0xAB:
1530 case 0xAC:
1531 case 0xAD:
1532 case 0xAE:
1533 case 0xAF:
1534 case 0xB0:
1535 case 0xB1:
1536 case 0xB2:
1537 case 0xB3:
1538 case 0xB4:
1539 case 0xB5:
1540 case 0xB6:
1541 case 0xB7:
1542 case 0xB8:
1543 case 0xB9:
1544 case 0xBA:
1545 case 0xBB:
1546 case 0xBC:
1547 case 0xBD:
1548 case 0xBE:
1549 case 0xBF:
1550 {
1551 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1552 }
1553
1554 case 0xD9: // str 8
1555 {
1556 std::uint8_t len{};
1557 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1558 }
1559
1560 case 0xDA: // str 16
1561 {
1562 std::uint16_t len{};
1563 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1564 }
1565
1566 case 0xDB: // str 32
1567 {
1568 std::uint32_t len{};
1569 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1570 }
1571
1572 default:
1573 {
1574 auto last_token = get_token_string();
1575 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string")));
1576 }
1577 }
1578 }
1579
1580 /*!
1581 @brief reads a MessagePack byte array
1582
1583 This function first reads starting bytes to determine the expected
1584 byte array length and then copies this number of bytes into a byte array.
1585
1586 @param[out] result created byte array
1587
1588 @return whether byte array creation completed
1589 */
get_msgpack_binary(binary_t & result)1590 bool get_msgpack_binary(binary_t& result)
1591 {
1592 // helper function to set the subtype
1593 auto assign_and_return_true = [&result](std::int8_t subtype)
1594 {
1595 result.set_subtype(static_cast<std::uint8_t>(subtype));
1596 return true;
1597 };
1598
1599 switch (current)
1600 {
1601 case 0xC4: // bin 8
1602 {
1603 std::uint8_t len{};
1604 return get_number(input_format_t::msgpack, len) &&
1605 get_binary(input_format_t::msgpack, len, result);
1606 }
1607
1608 case 0xC5: // bin 16
1609 {
1610 std::uint16_t len{};
1611 return get_number(input_format_t::msgpack, len) &&
1612 get_binary(input_format_t::msgpack, len, result);
1613 }
1614
1615 case 0xC6: // bin 32
1616 {
1617 std::uint32_t len{};
1618 return get_number(input_format_t::msgpack, len) &&
1619 get_binary(input_format_t::msgpack, len, result);
1620 }
1621
1622 case 0xC7: // ext 8
1623 {
1624 std::uint8_t len{};
1625 std::int8_t subtype{};
1626 return get_number(input_format_t::msgpack, len) &&
1627 get_number(input_format_t::msgpack, subtype) &&
1628 get_binary(input_format_t::msgpack, len, result) &&
1629 assign_and_return_true(subtype);
1630 }
1631
1632 case 0xC8: // ext 16
1633 {
1634 std::uint16_t len{};
1635 std::int8_t subtype{};
1636 return get_number(input_format_t::msgpack, len) &&
1637 get_number(input_format_t::msgpack, subtype) &&
1638 get_binary(input_format_t::msgpack, len, result) &&
1639 assign_and_return_true(subtype);
1640 }
1641
1642 case 0xC9: // ext 32
1643 {
1644 std::uint32_t len{};
1645 std::int8_t subtype{};
1646 return get_number(input_format_t::msgpack, len) &&
1647 get_number(input_format_t::msgpack, subtype) &&
1648 get_binary(input_format_t::msgpack, len, result) &&
1649 assign_and_return_true(subtype);
1650 }
1651
1652 case 0xD4: // fixext 1
1653 {
1654 std::int8_t subtype{};
1655 return get_number(input_format_t::msgpack, subtype) &&
1656 get_binary(input_format_t::msgpack, 1, result) &&
1657 assign_and_return_true(subtype);
1658 }
1659
1660 case 0xD5: // fixext 2
1661 {
1662 std::int8_t subtype{};
1663 return get_number(input_format_t::msgpack, subtype) &&
1664 get_binary(input_format_t::msgpack, 2, result) &&
1665 assign_and_return_true(subtype);
1666 }
1667
1668 case 0xD6: // fixext 4
1669 {
1670 std::int8_t subtype{};
1671 return get_number(input_format_t::msgpack, subtype) &&
1672 get_binary(input_format_t::msgpack, 4, result) &&
1673 assign_and_return_true(subtype);
1674 }
1675
1676 case 0xD7: // fixext 8
1677 {
1678 std::int8_t subtype{};
1679 return get_number(input_format_t::msgpack, subtype) &&
1680 get_binary(input_format_t::msgpack, 8, result) &&
1681 assign_and_return_true(subtype);
1682 }
1683
1684 case 0xD8: // fixext 16
1685 {
1686 std::int8_t subtype{};
1687 return get_number(input_format_t::msgpack, subtype) &&
1688 get_binary(input_format_t::msgpack, 16, result) &&
1689 assign_and_return_true(subtype);
1690 }
1691
1692 default: // LCOV_EXCL_LINE
1693 return false; // LCOV_EXCL_LINE
1694 }
1695 }
1696
1697 /*!
1698 @param[in] len the length of the array
1699 @return whether array creation completed
1700 */
get_msgpack_array(const std::size_t len)1701 bool get_msgpack_array(const std::size_t len)
1702 {
1703 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1704 {
1705 return false;
1706 }
1707
1708 for (std::size_t i = 0; i < len; ++i)
1709 {
1710 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1711 {
1712 return false;
1713 }
1714 }
1715
1716 return sax->end_array();
1717 }
1718
1719 /*!
1720 @param[in] len the length of the object
1721 @return whether object creation completed
1722 */
get_msgpack_object(const std::size_t len)1723 bool get_msgpack_object(const std::size_t len)
1724 {
1725 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1726 {
1727 return false;
1728 }
1729
1730 string_t key;
1731 for (std::size_t i = 0; i < len; ++i)
1732 {
1733 get();
1734 if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1735 {
1736 return false;
1737 }
1738
1739 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1740 {
1741 return false;
1742 }
1743 key.clear();
1744 }
1745
1746 return sax->end_object();
1747 }
1748
1749 ////////////
1750 // UBJSON //
1751 ////////////
1752
1753 /*!
1754 @param[in] get_char whether a new character should be retrieved from the
1755 input (true, default) or whether the last read
1756 character should be considered instead
1757
1758 @return whether a valid UBJSON value was passed to the SAX parser
1759 */
parse_ubjson_internal(const bool get_char=true)1760 bool parse_ubjson_internal(const bool get_char = true)
1761 {
1762 return get_ubjson_value(get_char ? get_ignore_noop() : current);
1763 }
1764
1765 /*!
1766 @brief reads a UBJSON string
1767
1768 This function is either called after reading the 'S' byte explicitly
1769 indicating a string, or in case of an object key where the 'S' byte can be
1770 left out.
1771
1772 @param[out] result created string
1773 @param[in] get_char whether a new character should be retrieved from the
1774 input (true, default) or whether the last read
1775 character should be considered instead
1776
1777 @return whether string creation completed
1778 */
get_ubjson_string(string_t & result,const bool get_char=true)1779 bool get_ubjson_string(string_t& result, const bool get_char = true)
1780 {
1781 if (get_char)
1782 {
1783 get(); // TODO(niels): may we ignore N here?
1784 }
1785
1786 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1787 {
1788 return false;
1789 }
1790
1791 switch (current)
1792 {
1793 case 'U':
1794 {
1795 std::uint8_t len{};
1796 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1797 }
1798
1799 case 'i':
1800 {
1801 std::int8_t len{};
1802 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1803 }
1804
1805 case 'I':
1806 {
1807 std::int16_t len{};
1808 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1809 }
1810
1811 case 'l':
1812 {
1813 std::int32_t len{};
1814 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1815 }
1816
1817 case 'L':
1818 {
1819 std::int64_t len{};
1820 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1821 }
1822
1823 default:
1824 auto last_token = get_token_string();
1825 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string")));
1826 }
1827 }
1828
1829 /*!
1830 @param[out] result determined size
1831 @return whether size determination completed
1832 */
get_ubjson_size_value(std::size_t & result)1833 bool get_ubjson_size_value(std::size_t& result)
1834 {
1835 switch (get_ignore_noop())
1836 {
1837 case 'U':
1838 {
1839 std::uint8_t number{};
1840 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1841 {
1842 return false;
1843 }
1844 result = static_cast<std::size_t>(number);
1845 return true;
1846 }
1847
1848 case 'i':
1849 {
1850 std::int8_t number{};
1851 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1852 {
1853 return false;
1854 }
1855 result = static_cast<std::size_t>(number);
1856 return true;
1857 }
1858
1859 case 'I':
1860 {
1861 std::int16_t number{};
1862 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1863 {
1864 return false;
1865 }
1866 result = static_cast<std::size_t>(number);
1867 return true;
1868 }
1869
1870 case 'l':
1871 {
1872 std::int32_t number{};
1873 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1874 {
1875 return false;
1876 }
1877 result = static_cast<std::size_t>(number);
1878 return true;
1879 }
1880
1881 case 'L':
1882 {
1883 std::int64_t number{};
1884 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1885 {
1886 return false;
1887 }
1888 result = static_cast<std::size_t>(number);
1889 return true;
1890 }
1891
1892 default:
1893 {
1894 auto last_token = get_token_string();
1895 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size")));
1896 }
1897 }
1898 }
1899
1900 /*!
1901 @brief determine the type and size for a container
1902
1903 In the optimized UBJSON format, a type and a size can be provided to allow
1904 for a more compact representation.
1905
1906 @param[out] result pair of the size and the type
1907
1908 @return whether pair creation completed
1909 */
get_ubjson_size_type(std::pair<std::size_t,char_int_type> & result)1910 bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
1911 {
1912 result.first = string_t::npos; // size
1913 result.second = 0; // type
1914
1915 get_ignore_noop();
1916
1917 if (current == '$')
1918 {
1919 result.second = get(); // must not ignore 'N', because 'N' maybe the type
1920 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
1921 {
1922 return false;
1923 }
1924
1925 get_ignore_noop();
1926 if (JSON_HEDLEY_UNLIKELY(current != '#'))
1927 {
1928 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1929 {
1930 return false;
1931 }
1932 auto last_token = get_token_string();
1933 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size")));
1934 }
1935
1936 return get_ubjson_size_value(result.first);
1937 }
1938
1939 if (current == '#')
1940 {
1941 return get_ubjson_size_value(result.first);
1942 }
1943
1944 return true;
1945 }
1946
1947 /*!
1948 @param prefix the previously read or set type prefix
1949 @return whether value creation completed
1950 */
get_ubjson_value(const char_int_type prefix)1951 bool get_ubjson_value(const char_int_type prefix)
1952 {
1953 switch (prefix)
1954 {
1955 case std::char_traits<char_type>::eof(): // EOF
1956 return unexpect_eof(input_format_t::ubjson, "value");
1957
1958 case 'T': // true
1959 return sax->boolean(true);
1960 case 'F': // false
1961 return sax->boolean(false);
1962
1963 case 'Z': // null
1964 return sax->null();
1965
1966 case 'U':
1967 {
1968 std::uint8_t number{};
1969 return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
1970 }
1971
1972 case 'i':
1973 {
1974 std::int8_t number{};
1975 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1976 }
1977
1978 case 'I':
1979 {
1980 std::int16_t number{};
1981 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1982 }
1983
1984 case 'l':
1985 {
1986 std::int32_t number{};
1987 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1988 }
1989
1990 case 'L':
1991 {
1992 std::int64_t number{};
1993 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1994 }
1995
1996 case 'd':
1997 {
1998 float number{};
1999 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2000 }
2001
2002 case 'D':
2003 {
2004 double number{};
2005 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2006 }
2007
2008 case 'H':
2009 {
2010 return get_ubjson_high_precision_number();
2011 }
2012
2013 case 'C': // char
2014 {
2015 get();
2016 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
2017 {
2018 return false;
2019 }
2020 if (JSON_HEDLEY_UNLIKELY(current > 127))
2021 {
2022 auto last_token = get_token_string();
2023 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char")));
2024 }
2025 string_t s(1, static_cast<typename string_t::value_type>(current));
2026 return sax->string(s);
2027 }
2028
2029 case 'S': // string
2030 {
2031 string_t s;
2032 return get_ubjson_string(s) && sax->string(s);
2033 }
2034
2035 case '[': // array
2036 return get_ubjson_array();
2037
2038 case '{': // object
2039 return get_ubjson_object();
2040
2041 default: // anything else
2042 {
2043 auto last_token = get_token_string();
2044 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value")));
2045 }
2046 }
2047 }
2048
2049 /*!
2050 @return whether array creation completed
2051 */
get_ubjson_array()2052 bool get_ubjson_array()
2053 {
2054 std::pair<std::size_t, char_int_type> size_and_type;
2055 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2056 {
2057 return false;
2058 }
2059
2060 if (size_and_type.first != string_t::npos)
2061 {
2062 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2063 {
2064 return false;
2065 }
2066
2067 if (size_and_type.second != 0)
2068 {
2069 if (size_and_type.second != 'N')
2070 {
2071 for (std::size_t i = 0; i < size_and_type.first; ++i)
2072 {
2073 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2074 {
2075 return false;
2076 }
2077 }
2078 }
2079 }
2080 else
2081 {
2082 for (std::size_t i = 0; i < size_and_type.first; ++i)
2083 {
2084 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2085 {
2086 return false;
2087 }
2088 }
2089 }
2090 }
2091 else
2092 {
2093 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
2094 {
2095 return false;
2096 }
2097
2098 while (current != ']')
2099 {
2100 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2101 {
2102 return false;
2103 }
2104 get_ignore_noop();
2105 }
2106 }
2107
2108 return sax->end_array();
2109 }
2110
2111 /*!
2112 @return whether object creation completed
2113 */
get_ubjson_object()2114 bool get_ubjson_object()
2115 {
2116 std::pair<std::size_t, char_int_type> size_and_type;
2117 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2118 {
2119 return false;
2120 }
2121
2122 string_t key;
2123 if (size_and_type.first != string_t::npos)
2124 {
2125 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2126 {
2127 return false;
2128 }
2129
2130 if (size_and_type.second != 0)
2131 {
2132 for (std::size_t i = 0; i < size_and_type.first; ++i)
2133 {
2134 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2135 {
2136 return false;
2137 }
2138 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2139 {
2140 return false;
2141 }
2142 key.clear();
2143 }
2144 }
2145 else
2146 {
2147 for (std::size_t i = 0; i < size_and_type.first; ++i)
2148 {
2149 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2150 {
2151 return false;
2152 }
2153 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2154 {
2155 return false;
2156 }
2157 key.clear();
2158 }
2159 }
2160 }
2161 else
2162 {
2163 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
2164 {
2165 return false;
2166 }
2167
2168 while (current != '}')
2169 {
2170 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2171 {
2172 return false;
2173 }
2174 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2175 {
2176 return false;
2177 }
2178 get_ignore_noop();
2179 key.clear();
2180 }
2181 }
2182
2183 return sax->end_object();
2184 }
2185
2186 // Note, no reader for UBJSON binary types is implemented because they do
2187 // not exist
2188
get_ubjson_high_precision_number()2189 bool get_ubjson_high_precision_number()
2190 {
2191 // get size of following number string
2192 std::size_t size{};
2193 auto res = get_ubjson_size_value(size);
2194 if (JSON_HEDLEY_UNLIKELY(!res))
2195 {
2196 return res;
2197 }
2198
2199 // get number string
2200 std::vector<char> number_vector;
2201 for (std::size_t i = 0; i < size; ++i)
2202 {
2203 get();
2204 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
2205 {
2206 return false;
2207 }
2208 number_vector.push_back(static_cast<char>(current));
2209 }
2210
2211 // parse number string
2212 auto number_ia = detail::input_adapter(std::forward<decltype(number_vector)>(number_vector));
2213 auto number_lexer = detail::lexer<BasicJsonType, decltype(number_ia)>(std::move(number_ia), false);
2214 const auto result_number = number_lexer.scan();
2215 const auto number_string = number_lexer.get_token_string();
2216 const auto result_remainder = number_lexer.scan();
2217
2218 using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2219
2220 if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2221 {
2222 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
2223 }
2224
2225 switch (result_number)
2226 {
2227 case token_type::value_integer:
2228 return sax->number_integer(number_lexer.get_number_integer());
2229 case token_type::value_unsigned:
2230 return sax->number_unsigned(number_lexer.get_number_unsigned());
2231 case token_type::value_float:
2232 return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2233 default:
2234 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
2235 }
2236 }
2237
2238 ///////////////////////
2239 // Utility functions //
2240 ///////////////////////
2241
2242 /*!
2243 @brief get next character from the input
2244
2245 This function provides the interface to the used input adapter. It does
2246 not throw in case the input reached EOF, but returns a -'ve valued
2247 `std::char_traits<char_type>::eof()` in that case.
2248
2249 @return character read from the input
2250 */
get()2251 char_int_type get()
2252 {
2253 ++chars_read;
2254 return current = ia.get_character();
2255 }
2256
2257 /*!
2258 @return character read from the input after ignoring all 'N' entries
2259 */
get_ignore_noop()2260 char_int_type get_ignore_noop()
2261 {
2262 do
2263 {
2264 get();
2265 }
2266 while (current == 'N');
2267
2268 return current;
2269 }
2270
2271 /*
2272 @brief read a number from the input
2273
2274 @tparam NumberType the type of the number
2275 @param[in] format the current format (for diagnostics)
2276 @param[out] result number of type @a NumberType
2277
2278 @return whether conversion completed
2279
2280 @note This function needs to respect the system's endianess, because
2281 bytes in CBOR, MessagePack, and UBJSON are stored in network order
2282 (big endian) and therefore need reordering on little endian systems.
2283 */
2284 template<typename NumberType, bool InputIsLittleEndian = false>
get_number(const input_format_t format,NumberType & result)2285 bool get_number(const input_format_t format, NumberType& result)
2286 {
2287 // step 1: read input into array with system's byte order
2288 std::array<std::uint8_t, sizeof(NumberType)> vec;
2289 for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2290 {
2291 get();
2292 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2293 {
2294 return false;
2295 }
2296
2297 // reverse byte order prior to conversion if necessary
2298 if (is_little_endian != InputIsLittleEndian)
2299 {
2300 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2301 }
2302 else
2303 {
2304 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2305 }
2306 }
2307
2308 // step 2: convert array into number of type T and return
2309 std::memcpy(&result, vec.data(), sizeof(NumberType));
2310 return true;
2311 }
2312
2313 /*!
2314 @brief create a string by reading characters from the input
2315
2316 @tparam NumberType the type of the number
2317 @param[in] format the current format (for diagnostics)
2318 @param[in] len number of characters to read
2319 @param[out] result string created by reading @a len bytes
2320
2321 @return whether string creation completed
2322
2323 @note We can not reserve @a len bytes for the result, because @a len
2324 may be too large. Usually, @ref unexpect_eof() detects the end of
2325 the input before we run out of string memory.
2326 */
2327 template<typename NumberType>
get_string(const input_format_t format,const NumberType len,string_t & result)2328 bool get_string(const input_format_t format,
2329 const NumberType len,
2330 string_t& result)
2331 {
2332 bool success = true;
2333 for (NumberType i = 0; i < len; i++)
2334 {
2335 get();
2336 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2337 {
2338 success = false;
2339 break;
2340 }
2341 result.push_back(static_cast<typename string_t::value_type>(current));
2342 };
2343 return success;
2344 }
2345
2346 /*!
2347 @brief create a byte array by reading bytes from the input
2348
2349 @tparam NumberType the type of the number
2350 @param[in] format the current format (for diagnostics)
2351 @param[in] len number of bytes to read
2352 @param[out] result byte array created by reading @a len bytes
2353
2354 @return whether byte array creation completed
2355
2356 @note We can not reserve @a len bytes for the result, because @a len
2357 may be too large. Usually, @ref unexpect_eof() detects the end of
2358 the input before we run out of memory.
2359 */
2360 template<typename NumberType>
get_binary(const input_format_t format,const NumberType len,binary_t & result)2361 bool get_binary(const input_format_t format,
2362 const NumberType len,
2363 binary_t& result)
2364 {
2365 bool success = true;
2366 for (NumberType i = 0; i < len; i++)
2367 {
2368 get();
2369 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2370 {
2371 success = false;
2372 break;
2373 }
2374 result.push_back(static_cast<std::uint8_t>(current));
2375 }
2376 return success;
2377 }
2378
2379 /*!
2380 @param[in] format the current format (for diagnostics)
2381 @param[in] context further context information (for diagnostics)
2382 @return whether the last read character is not EOF
2383 */
2384 JSON_HEDLEY_NON_NULL(3)
unexpect_eof(const input_format_t format,const char * context) const2385 bool unexpect_eof(const input_format_t format, const char* context) const
2386 {
2387 if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2388 {
2389 return sax->parse_error(chars_read, "<end of file>",
2390 parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context)));
2391 }
2392 return true;
2393 }
2394
2395 /*!
2396 @return a string representation of the last read byte
2397 */
get_token_string() const2398 std::string get_token_string() const
2399 {
2400 std::array<char, 3> cr{{}};
2401 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current));
2402 return std::string{cr.data()};
2403 }
2404
2405 /*!
2406 @param[in] format the current format
2407 @param[in] detail a detailed error message
2408 @param[in] context further context information
2409 @return a message string to use in the parse_error exceptions
2410 */
exception_message(const input_format_t format,const std::string & detail,const std::string & context) const2411 std::string exception_message(const input_format_t format,
2412 const std::string& detail,
2413 const std::string& context) const
2414 {
2415 std::string error_msg = "syntax error while parsing ";
2416
2417 switch (format)
2418 {
2419 case input_format_t::cbor:
2420 error_msg += "CBOR";
2421 break;
2422
2423 case input_format_t::msgpack:
2424 error_msg += "MessagePack";
2425 break;
2426
2427 case input_format_t::ubjson:
2428 error_msg += "UBJSON";
2429 break;
2430
2431 case input_format_t::bson:
2432 error_msg += "BSON";
2433 break;
2434
2435 default: // LCOV_EXCL_LINE
2436 JSON_ASSERT(false); // LCOV_EXCL_LINE
2437 }
2438
2439 return error_msg + " " + context + ": " + detail;
2440 }
2441
2442 private:
2443 /// input adapter
2444 InputAdapterType ia;
2445
2446 /// the current character
2447 char_int_type current = std::char_traits<char_type>::eof();
2448
2449 /// the number of characters read
2450 std::size_t chars_read = 0;
2451
2452 /// whether we can assume little endianess
2453 const bool is_little_endian = little_endianess();
2454
2455 /// the SAX parser
2456 json_sax_t* sax = nullptr;
2457 };
2458 } // namespace detail
2459 } // namespace nlohmann
2460