1 #pragma once
2 
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cmath> // ldexp
6 #include <cstddef> // size_t
7 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
8 #include <cstdio> // snprintf
9 #include <cstring> // memcpy
10 #include <iterator> // back_inserter
11 #include <limits> // numeric_limits
12 #include <string> // char_traits, string
13 #include <utility> // make_pair, move
14 #include <vector> // vector
15 
16 #include <nlohmann/detail/exceptions.hpp>
17 #include <nlohmann/detail/input/input_adapters.hpp>
18 #include <nlohmann/detail/input/json_sax.hpp>
19 #include <nlohmann/detail/input/lexer.hpp>
20 #include <nlohmann/detail/macro_scope.hpp>
21 #include <nlohmann/detail/meta/is_sax.hpp>
22 #include <nlohmann/detail/value_t.hpp>
23 
24 namespace nlohmann
25 {
26 namespace detail
27 {
28 
29 /// how to treat CBOR tags
30 enum class cbor_tag_handler_t
31 {
32     error,  ///< throw a parse_error exception in case of a tag
33     ignore   ///< ignore tags
34 };
35 
36 /*!
37 @brief determine system byte order
38 
39 @return true if and only if system's byte order is little endian
40 
41 @note from https://stackoverflow.com/a/1001328/266378
42 */
little_endianess(int num=1)43 static inline bool little_endianess(int num = 1) noexcept
44 {
45     return *reinterpret_cast<char*>(&num) == 1;
46 }
47 
48 
49 ///////////////////
50 // binary reader //
51 ///////////////////
52 
53 /*!
54 @brief deserialization of CBOR, MessagePack, and UBJSON values
55 */
56 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
57 class binary_reader
58 {
59     using number_integer_t = typename BasicJsonType::number_integer_t;
60     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
61     using number_float_t = typename BasicJsonType::number_float_t;
62     using string_t = typename BasicJsonType::string_t;
63     using binary_t = typename BasicJsonType::binary_t;
64     using json_sax_t = SAX;
65     using char_type = typename InputAdapterType::char_type;
66     using char_int_type = typename std::char_traits<char_type>::int_type;
67 
68   public:
69     /*!
70     @brief create a binary reader
71 
72     @param[in] adapter  input adapter to read from
73     */
binary_reader(InputAdapterType && adapter)74     explicit binary_reader(InputAdapterType&& adapter) noexcept : ia(std::move(adapter))
75     {
76         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
77     }
78 
79     // make class move-only
80     binary_reader(const binary_reader&) = delete;
81     binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
82     binary_reader& operator=(const binary_reader&) = delete;
83     binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
84     ~binary_reader() = default;
85 
86     /*!
87     @param[in] format  the binary format to parse
88     @param[in] sax_    a SAX event processor
89     @param[in] strict  whether to expect the input to be consumed completed
90     @param[in] tag_handler  how to treat CBOR tags
91 
92     @return whether parsing was successful
93     */
94     JSON_HEDLEY_NON_NULL(3)
sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true,const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)95     bool sax_parse(const input_format_t format,
96                    json_sax_t* sax_,
97                    const bool strict = true,
98                    const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
99     {
100         sax = sax_;
101         bool result = false;
102 
103         switch (format)
104         {
105             case input_format_t::bson:
106                 result = parse_bson_internal();
107                 break;
108 
109             case input_format_t::cbor:
110                 result = parse_cbor_internal(true, tag_handler);
111                 break;
112 
113             case input_format_t::msgpack:
114                 result = parse_msgpack_internal();
115                 break;
116 
117             case input_format_t::ubjson:
118                 result = parse_ubjson_internal();
119                 break;
120 
121             default:            // LCOV_EXCL_LINE
122                 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
123         }
124 
125         // strict mode: next byte must be EOF
126         if (result && strict)
127         {
128             if (format == input_format_t::ubjson)
129             {
130                 get_ignore_noop();
131             }
132             else
133             {
134                 get();
135             }
136 
137             if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
138             {
139                 return sax->parse_error(chars_read, get_token_string(),
140                                         parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"), BasicJsonType()));
141             }
142         }
143 
144         return result;
145     }
146 
147   private:
148     //////////
149     // BSON //
150     //////////
151 
152     /*!
153     @brief Reads in a BSON-object and passes it to the SAX-parser.
154     @return whether a valid BSON-value was passed to the SAX parser
155     */
parse_bson_internal()156     bool parse_bson_internal()
157     {
158         std::int32_t document_size{};
159         get_number<std::int32_t, true>(input_format_t::bson, document_size);
160 
161         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
162         {
163             return false;
164         }
165 
166         if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
167         {
168             return false;
169         }
170 
171         return sax->end_object();
172     }
173 
174     /*!
175     @brief Parses a C-style string from the BSON input.
176     @param[in,out] result  A reference to the string variable where the read
177                             string is to be stored.
178     @return `true` if the \x00-byte indicating the end of the string was
179              encountered before the EOF; false` indicates an unexpected EOF.
180     */
get_bson_cstr(string_t & result)181     bool get_bson_cstr(string_t& result)
182     {
183         auto out = std::back_inserter(result);
184         while (true)
185         {
186             get();
187             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
188             {
189                 return false;
190             }
191             if (current == 0x00)
192             {
193                 return true;
194             }
195             *out++ = static_cast<typename string_t::value_type>(current);
196         }
197     }
198 
199     /*!
200     @brief Parses a zero-terminated string of length @a len from the BSON
201            input.
202     @param[in] len  The length (including the zero-byte at the end) of the
203                     string to be read.
204     @param[in,out] result  A reference to the string variable where the read
205                             string is to be stored.
206     @tparam NumberType The type of the length @a len
207     @pre len >= 1
208     @return `true` if the string was successfully parsed
209     */
210     template<typename NumberType>
get_bson_string(const NumberType len,string_t & result)211     bool get_bson_string(const NumberType len, string_t& result)
212     {
213         if (JSON_HEDLEY_UNLIKELY(len < 1))
214         {
215             auto last_token = get_token_string();
216             return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"), BasicJsonType()));
217         }
218 
219         return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
220     }
221 
222     /*!
223     @brief Parses a byte array input of length @a len from the BSON input.
224     @param[in] len  The length of the byte array to be read.
225     @param[in,out] result  A reference to the binary variable where the read
226                             array is to be stored.
227     @tparam NumberType The type of the length @a len
228     @pre len >= 0
229     @return `true` if the byte array was successfully parsed
230     */
231     template<typename NumberType>
get_bson_binary(const NumberType len,binary_t & result)232     bool get_bson_binary(const NumberType len, binary_t& result)
233     {
234         if (JSON_HEDLEY_UNLIKELY(len < 0))
235         {
236             auto last_token = get_token_string();
237             return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary"), BasicJsonType()));
238         }
239 
240         // All BSON binary values have a subtype
241         std::uint8_t subtype{};
242         get_number<std::uint8_t>(input_format_t::bson, subtype);
243         result.set_subtype(subtype);
244 
245         return get_binary(input_format_t::bson, len, result);
246     }
247 
248     /*!
249     @brief Read a BSON document element of the given @a element_type.
250     @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
251     @param[in] element_type_parse_position The position in the input stream,
252                where the `element_type` was read.
253     @warning Not all BSON element types are supported yet. An unsupported
254              @a element_type will give rise to a parse_error.114:
255              Unsupported BSON record type 0x...
256     @return whether a valid BSON-object/array was passed to the SAX parser
257     */
parse_bson_element_internal(const char_int_type element_type,const std::size_t element_type_parse_position)258     bool parse_bson_element_internal(const char_int_type element_type,
259                                      const std::size_t element_type_parse_position)
260     {
261         switch (element_type)
262         {
263             case 0x01: // double
264             {
265                 double number{};
266                 return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
267             }
268 
269             case 0x02: // string
270             {
271                 std::int32_t len{};
272                 string_t value;
273                 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
274             }
275 
276             case 0x03: // object
277             {
278                 return parse_bson_internal();
279             }
280 
281             case 0x04: // array
282             {
283                 return parse_bson_array();
284             }
285 
286             case 0x05: // binary
287             {
288                 std::int32_t len{};
289                 binary_t value;
290                 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
291             }
292 
293             case 0x08: // boolean
294             {
295                 return sax->boolean(get() != 0);
296             }
297 
298             case 0x0A: // null
299             {
300                 return sax->null();
301             }
302 
303             case 0x10: // int32
304             {
305                 std::int32_t value{};
306                 return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
307             }
308 
309             case 0x12: // int64
310             {
311                 std::int64_t value{};
312                 return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
313             }
314 
315             default: // anything else not supported (yet)
316             {
317                 std::array<char, 3> cr{{}};
318                 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
319                 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data()), BasicJsonType()));
320             }
321         }
322     }
323 
324     /*!
325     @brief Read a BSON element list (as specified in the BSON-spec)
326 
327     The same binary layout is used for objects and arrays, hence it must be
328     indicated with the argument @a is_array which one is expected
329     (true --> array, false --> object).
330 
331     @param[in] is_array Determines if the element list being read is to be
332                         treated as an object (@a is_array == false), or as an
333                         array (@a is_array == true).
334     @return whether a valid BSON-object/array was passed to the SAX parser
335     */
parse_bson_element_list(const bool is_array)336     bool parse_bson_element_list(const bool is_array)
337     {
338         string_t key;
339 
340         while (auto element_type = get())
341         {
342             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
343             {
344                 return false;
345             }
346 
347             const std::size_t element_type_parse_position = chars_read;
348             if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
349             {
350                 return false;
351             }
352 
353             if (!is_array && !sax->key(key))
354             {
355                 return false;
356             }
357 
358             if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
359             {
360                 return false;
361             }
362 
363             // get_bson_cstr only appends
364             key.clear();
365         }
366 
367         return true;
368     }
369 
370     /*!
371     @brief Reads an array from the BSON input and passes it to the SAX-parser.
372     @return whether a valid BSON-array was passed to the SAX parser
373     */
parse_bson_array()374     bool parse_bson_array()
375     {
376         std::int32_t document_size{};
377         get_number<std::int32_t, true>(input_format_t::bson, document_size);
378 
379         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
380         {
381             return false;
382         }
383 
384         if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
385         {
386             return false;
387         }
388 
389         return sax->end_array();
390     }
391 
392     //////////
393     // CBOR //
394     //////////
395 
396     /*!
397     @param[in] get_char  whether a new character should be retrieved from the
398                          input (true) or whether the last read character should
399                          be considered instead (false)
400     @param[in] tag_handler how CBOR tags should be treated
401 
402     @return whether a valid CBOR value was passed to the SAX parser
403     */
parse_cbor_internal(const bool get_char,const cbor_tag_handler_t tag_handler)404     bool parse_cbor_internal(const bool get_char,
405                              const cbor_tag_handler_t tag_handler)
406     {
407         switch (get_char ? get() : current)
408         {
409             // EOF
410             case std::char_traits<char_type>::eof():
411                 return unexpect_eof(input_format_t::cbor, "value");
412 
413             // Integer 0x00..0x17 (0..23)
414             case 0x00:
415             case 0x01:
416             case 0x02:
417             case 0x03:
418             case 0x04:
419             case 0x05:
420             case 0x06:
421             case 0x07:
422             case 0x08:
423             case 0x09:
424             case 0x0A:
425             case 0x0B:
426             case 0x0C:
427             case 0x0D:
428             case 0x0E:
429             case 0x0F:
430             case 0x10:
431             case 0x11:
432             case 0x12:
433             case 0x13:
434             case 0x14:
435             case 0x15:
436             case 0x16:
437             case 0x17:
438                 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
439 
440             case 0x18: // Unsigned integer (one-byte uint8_t follows)
441             {
442                 std::uint8_t number{};
443                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
444             }
445 
446             case 0x19: // Unsigned integer (two-byte uint16_t follows)
447             {
448                 std::uint16_t number{};
449                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
450             }
451 
452             case 0x1A: // Unsigned integer (four-byte uint32_t follows)
453             {
454                 std::uint32_t number{};
455                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
456             }
457 
458             case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
459             {
460                 std::uint64_t number{};
461                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
462             }
463 
464             // Negative integer -1-0x00..-1-0x17 (-1..-24)
465             case 0x20:
466             case 0x21:
467             case 0x22:
468             case 0x23:
469             case 0x24:
470             case 0x25:
471             case 0x26:
472             case 0x27:
473             case 0x28:
474             case 0x29:
475             case 0x2A:
476             case 0x2B:
477             case 0x2C:
478             case 0x2D:
479             case 0x2E:
480             case 0x2F:
481             case 0x30:
482             case 0x31:
483             case 0x32:
484             case 0x33:
485             case 0x34:
486             case 0x35:
487             case 0x36:
488             case 0x37:
489                 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
490 
491             case 0x38: // Negative integer (one-byte uint8_t follows)
492             {
493                 std::uint8_t number{};
494                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
495             }
496 
497             case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
498             {
499                 std::uint16_t number{};
500                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
501             }
502 
503             case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
504             {
505                 std::uint32_t number{};
506                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
507             }
508 
509             case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
510             {
511                 std::uint64_t number{};
512                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
513                         - static_cast<number_integer_t>(number));
514             }
515 
516             // Binary data (0x00..0x17 bytes follow)
517             case 0x40:
518             case 0x41:
519             case 0x42:
520             case 0x43:
521             case 0x44:
522             case 0x45:
523             case 0x46:
524             case 0x47:
525             case 0x48:
526             case 0x49:
527             case 0x4A:
528             case 0x4B:
529             case 0x4C:
530             case 0x4D:
531             case 0x4E:
532             case 0x4F:
533             case 0x50:
534             case 0x51:
535             case 0x52:
536             case 0x53:
537             case 0x54:
538             case 0x55:
539             case 0x56:
540             case 0x57:
541             case 0x58: // Binary data (one-byte uint8_t for n follows)
542             case 0x59: // Binary data (two-byte uint16_t for n follow)
543             case 0x5A: // Binary data (four-byte uint32_t for n follow)
544             case 0x5B: // Binary data (eight-byte uint64_t for n follow)
545             case 0x5F: // Binary data (indefinite length)
546             {
547                 binary_t b;
548                 return get_cbor_binary(b) && sax->binary(b);
549             }
550 
551             // UTF-8 string (0x00..0x17 bytes follow)
552             case 0x60:
553             case 0x61:
554             case 0x62:
555             case 0x63:
556             case 0x64:
557             case 0x65:
558             case 0x66:
559             case 0x67:
560             case 0x68:
561             case 0x69:
562             case 0x6A:
563             case 0x6B:
564             case 0x6C:
565             case 0x6D:
566             case 0x6E:
567             case 0x6F:
568             case 0x70:
569             case 0x71:
570             case 0x72:
571             case 0x73:
572             case 0x74:
573             case 0x75:
574             case 0x76:
575             case 0x77:
576             case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
577             case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
578             case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
579             case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
580             case 0x7F: // UTF-8 string (indefinite length)
581             {
582                 string_t s;
583                 return get_cbor_string(s) && sax->string(s);
584             }
585 
586             // array (0x00..0x17 data items follow)
587             case 0x80:
588             case 0x81:
589             case 0x82:
590             case 0x83:
591             case 0x84:
592             case 0x85:
593             case 0x86:
594             case 0x87:
595             case 0x88:
596             case 0x89:
597             case 0x8A:
598             case 0x8B:
599             case 0x8C:
600             case 0x8D:
601             case 0x8E:
602             case 0x8F:
603             case 0x90:
604             case 0x91:
605             case 0x92:
606             case 0x93:
607             case 0x94:
608             case 0x95:
609             case 0x96:
610             case 0x97:
611                 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
612 
613             case 0x98: // array (one-byte uint8_t for n follows)
614             {
615                 std::uint8_t len{};
616                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
617             }
618 
619             case 0x99: // array (two-byte uint16_t for n follow)
620             {
621                 std::uint16_t len{};
622                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
623             }
624 
625             case 0x9A: // array (four-byte uint32_t for n follow)
626             {
627                 std::uint32_t len{};
628                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
629             }
630 
631             case 0x9B: // array (eight-byte uint64_t for n follow)
632             {
633                 std::uint64_t len{};
634                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
635             }
636 
637             case 0x9F: // array (indefinite length)
638                 return get_cbor_array(std::size_t(-1), tag_handler);
639 
640             // map (0x00..0x17 pairs of data items follow)
641             case 0xA0:
642             case 0xA1:
643             case 0xA2:
644             case 0xA3:
645             case 0xA4:
646             case 0xA5:
647             case 0xA6:
648             case 0xA7:
649             case 0xA8:
650             case 0xA9:
651             case 0xAA:
652             case 0xAB:
653             case 0xAC:
654             case 0xAD:
655             case 0xAE:
656             case 0xAF:
657             case 0xB0:
658             case 0xB1:
659             case 0xB2:
660             case 0xB3:
661             case 0xB4:
662             case 0xB5:
663             case 0xB6:
664             case 0xB7:
665                 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
666 
667             case 0xB8: // map (one-byte uint8_t for n follows)
668             {
669                 std::uint8_t len{};
670                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
671             }
672 
673             case 0xB9: // map (two-byte uint16_t for n follow)
674             {
675                 std::uint16_t len{};
676                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
677             }
678 
679             case 0xBA: // map (four-byte uint32_t for n follow)
680             {
681                 std::uint32_t len{};
682                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
683             }
684 
685             case 0xBB: // map (eight-byte uint64_t for n follow)
686             {
687                 std::uint64_t len{};
688                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
689             }
690 
691             case 0xBF: // map (indefinite length)
692                 return get_cbor_object(std::size_t(-1), tag_handler);
693 
694             case 0xC6: // tagged item
695             case 0xC7:
696             case 0xC8:
697             case 0xC9:
698             case 0xCA:
699             case 0xCB:
700             case 0xCC:
701             case 0xCD:
702             case 0xCE:
703             case 0xCF:
704             case 0xD0:
705             case 0xD1:
706             case 0xD2:
707             case 0xD3:
708             case 0xD4:
709             case 0xD8: // tagged item (1 bytes follow)
710             case 0xD9: // tagged item (2 bytes follow)
711             case 0xDA: // tagged item (4 bytes follow)
712             case 0xDB: // tagged item (8 bytes follow)
713             {
714                 switch (tag_handler)
715                 {
716                     case cbor_tag_handler_t::error:
717                     {
718                         auto last_token = get_token_string();
719                         return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
720                     }
721 
722                     case cbor_tag_handler_t::ignore:
723                     {
724                         switch (current)
725                         {
726                             case 0xD8:
727                             {
728                                 std::uint8_t len{};
729                                 get_number(input_format_t::cbor, len);
730                                 break;
731                             }
732                             case 0xD9:
733                             {
734                                 std::uint16_t len{};
735                                 get_number(input_format_t::cbor, len);
736                                 break;
737                             }
738                             case 0xDA:
739                             {
740                                 std::uint32_t len{};
741                                 get_number(input_format_t::cbor, len);
742                                 break;
743                             }
744                             case 0xDB:
745                             {
746                                 std::uint64_t len{};
747                                 get_number(input_format_t::cbor, len);
748                                 break;
749                             }
750                             default:
751                                 break;
752                         }
753                         return parse_cbor_internal(true, tag_handler);
754                     }
755 
756                     default:                 // LCOV_EXCL_LINE
757                         JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
758                         return false;        // LCOV_EXCL_LINE
759                 }
760             }
761 
762             case 0xF4: // false
763                 return sax->boolean(false);
764 
765             case 0xF5: // true
766                 return sax->boolean(true);
767 
768             case 0xF6: // null
769                 return sax->null();
770 
771             case 0xF9: // Half-Precision Float (two-byte IEEE 754)
772             {
773                 const auto byte1_raw = get();
774                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
775                 {
776                     return false;
777                 }
778                 const auto byte2_raw = get();
779                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
780                 {
781                     return false;
782                 }
783 
784                 const auto byte1 = static_cast<unsigned char>(byte1_raw);
785                 const auto byte2 = static_cast<unsigned char>(byte2_raw);
786 
787                 // code from RFC 7049, Appendix D, Figure 3:
788                 // As half-precision floating-point numbers were only added
789                 // to IEEE 754 in 2008, today's programming platforms often
790                 // still only have limited support for them. It is very
791                 // easy to include at least decoding support for them even
792                 // without such support. An example of a small decoder for
793                 // half-precision floating-point numbers in the C language
794                 // is shown in Fig. 3.
795                 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
796                 const double val = [&half]
797                 {
798                     const int exp = (half >> 10u) & 0x1Fu;
799                     const unsigned int mant = half & 0x3FFu;
800                     JSON_ASSERT(0 <= exp&& exp <= 32);
801                     JSON_ASSERT(mant <= 1024);
802                     switch (exp)
803                     {
804                         case 0:
805                             return std::ldexp(mant, -24);
806                         case 31:
807                             return (mant == 0)
808                             ? std::numeric_limits<double>::infinity()
809                             : std::numeric_limits<double>::quiet_NaN();
810                         default:
811                             return std::ldexp(mant + 1024, exp - 25);
812                     }
813                 }();
814                 return sax->number_float((half & 0x8000u) != 0
815                                          ? static_cast<number_float_t>(-val)
816                                          : static_cast<number_float_t>(val), "");
817             }
818 
819             case 0xFA: // Single-Precision Float (four-byte IEEE 754)
820             {
821                 float number{};
822                 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
823             }
824 
825             case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
826             {
827                 double number{};
828                 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
829             }
830 
831             default: // anything else (0xFF is handled inside the other types)
832             {
833                 auto last_token = get_token_string();
834                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
835             }
836         }
837     }
838 
839     /*!
840     @brief reads a CBOR string
841 
842     This function first reads starting bytes to determine the expected
843     string length and then copies this number of bytes into a string.
844     Additionally, CBOR's strings with indefinite lengths are supported.
845 
846     @param[out] result  created string
847 
848     @return whether string creation completed
849     */
get_cbor_string(string_t & result)850     bool get_cbor_string(string_t& result)
851     {
852         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
853         {
854             return false;
855         }
856 
857         switch (current)
858         {
859             // UTF-8 string (0x00..0x17 bytes follow)
860             case 0x60:
861             case 0x61:
862             case 0x62:
863             case 0x63:
864             case 0x64:
865             case 0x65:
866             case 0x66:
867             case 0x67:
868             case 0x68:
869             case 0x69:
870             case 0x6A:
871             case 0x6B:
872             case 0x6C:
873             case 0x6D:
874             case 0x6E:
875             case 0x6F:
876             case 0x70:
877             case 0x71:
878             case 0x72:
879             case 0x73:
880             case 0x74:
881             case 0x75:
882             case 0x76:
883             case 0x77:
884             {
885                 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
886             }
887 
888             case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
889             {
890                 std::uint8_t len{};
891                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
892             }
893 
894             case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
895             {
896                 std::uint16_t len{};
897                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
898             }
899 
900             case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
901             {
902                 std::uint32_t len{};
903                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
904             }
905 
906             case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
907             {
908                 std::uint64_t len{};
909                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
910             }
911 
912             case 0x7F: // UTF-8 string (indefinite length)
913             {
914                 while (get() != 0xFF)
915                 {
916                     string_t chunk;
917                     if (!get_cbor_string(chunk))
918                     {
919                         return false;
920                     }
921                     result.append(chunk);
922                 }
923                 return true;
924             }
925 
926             default:
927             {
928                 auto last_token = get_token_string();
929                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"), BasicJsonType()));
930             }
931         }
932     }
933 
934     /*!
935     @brief reads a CBOR byte array
936 
937     This function first reads starting bytes to determine the expected
938     byte array length and then copies this number of bytes into the byte array.
939     Additionally, CBOR's byte arrays with indefinite lengths are supported.
940 
941     @param[out] result  created byte array
942 
943     @return whether byte array creation completed
944     */
get_cbor_binary(binary_t & result)945     bool get_cbor_binary(binary_t& result)
946     {
947         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
948         {
949             return false;
950         }
951 
952         switch (current)
953         {
954             // Binary data (0x00..0x17 bytes follow)
955             case 0x40:
956             case 0x41:
957             case 0x42:
958             case 0x43:
959             case 0x44:
960             case 0x45:
961             case 0x46:
962             case 0x47:
963             case 0x48:
964             case 0x49:
965             case 0x4A:
966             case 0x4B:
967             case 0x4C:
968             case 0x4D:
969             case 0x4E:
970             case 0x4F:
971             case 0x50:
972             case 0x51:
973             case 0x52:
974             case 0x53:
975             case 0x54:
976             case 0x55:
977             case 0x56:
978             case 0x57:
979             {
980                 return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
981             }
982 
983             case 0x58: // Binary data (one-byte uint8_t for n follows)
984             {
985                 std::uint8_t len{};
986                 return get_number(input_format_t::cbor, len) &&
987                        get_binary(input_format_t::cbor, len, result);
988             }
989 
990             case 0x59: // Binary data (two-byte uint16_t for n follow)
991             {
992                 std::uint16_t len{};
993                 return get_number(input_format_t::cbor, len) &&
994                        get_binary(input_format_t::cbor, len, result);
995             }
996 
997             case 0x5A: // Binary data (four-byte uint32_t for n follow)
998             {
999                 std::uint32_t len{};
1000                 return get_number(input_format_t::cbor, len) &&
1001                        get_binary(input_format_t::cbor, len, result);
1002             }
1003 
1004             case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1005             {
1006                 std::uint64_t len{};
1007                 return get_number(input_format_t::cbor, len) &&
1008                        get_binary(input_format_t::cbor, len, result);
1009             }
1010 
1011             case 0x5F: // Binary data (indefinite length)
1012             {
1013                 while (get() != 0xFF)
1014                 {
1015                     binary_t chunk;
1016                     if (!get_cbor_binary(chunk))
1017                     {
1018                         return false;
1019                     }
1020                     result.insert(result.end(), chunk.begin(), chunk.end());
1021                 }
1022                 return true;
1023             }
1024 
1025             default:
1026             {
1027                 auto last_token = get_token_string();
1028                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary"), BasicJsonType()));
1029             }
1030         }
1031     }
1032 
1033     /*!
1034     @param[in] len  the length of the array or std::size_t(-1) for an
1035                     array of indefinite size
1036     @param[in] tag_handler how CBOR tags should be treated
1037     @return whether array creation completed
1038     */
get_cbor_array(const std::size_t len,const cbor_tag_handler_t tag_handler)1039     bool get_cbor_array(const std::size_t len,
1040                         const cbor_tag_handler_t tag_handler)
1041     {
1042         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1043         {
1044             return false;
1045         }
1046 
1047         if (len != std::size_t(-1))
1048         {
1049             for (std::size_t i = 0; i < len; ++i)
1050             {
1051                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1052                 {
1053                     return false;
1054                 }
1055             }
1056         }
1057         else
1058         {
1059             while (get() != 0xFF)
1060             {
1061                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1062                 {
1063                     return false;
1064                 }
1065             }
1066         }
1067 
1068         return sax->end_array();
1069     }
1070 
1071     /*!
1072     @param[in] len  the length of the object or std::size_t(-1) for an
1073                     object of indefinite size
1074     @param[in] tag_handler how CBOR tags should be treated
1075     @return whether object creation completed
1076     */
get_cbor_object(const std::size_t len,const cbor_tag_handler_t tag_handler)1077     bool get_cbor_object(const std::size_t len,
1078                          const cbor_tag_handler_t tag_handler)
1079     {
1080         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1081         {
1082             return false;
1083         }
1084 
1085         string_t key;
1086         if (len != std::size_t(-1))
1087         {
1088             for (std::size_t i = 0; i < len; ++i)
1089             {
1090                 get();
1091                 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1092                 {
1093                     return false;
1094                 }
1095 
1096                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1097                 {
1098                     return false;
1099                 }
1100                 key.clear();
1101             }
1102         }
1103         else
1104         {
1105             while (get() != 0xFF)
1106             {
1107                 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1108                 {
1109                     return false;
1110                 }
1111 
1112                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1113                 {
1114                     return false;
1115                 }
1116                 key.clear();
1117             }
1118         }
1119 
1120         return sax->end_object();
1121     }
1122 
1123     /////////////
1124     // MsgPack //
1125     /////////////
1126 
1127     /*!
1128     @return whether a valid MessagePack value was passed to the SAX parser
1129     */
parse_msgpack_internal()1130     bool parse_msgpack_internal()
1131     {
1132         switch (get())
1133         {
1134             // EOF
1135             case std::char_traits<char_type>::eof():
1136                 return unexpect_eof(input_format_t::msgpack, "value");
1137 
1138             // positive fixint
1139             case 0x00:
1140             case 0x01:
1141             case 0x02:
1142             case 0x03:
1143             case 0x04:
1144             case 0x05:
1145             case 0x06:
1146             case 0x07:
1147             case 0x08:
1148             case 0x09:
1149             case 0x0A:
1150             case 0x0B:
1151             case 0x0C:
1152             case 0x0D:
1153             case 0x0E:
1154             case 0x0F:
1155             case 0x10:
1156             case 0x11:
1157             case 0x12:
1158             case 0x13:
1159             case 0x14:
1160             case 0x15:
1161             case 0x16:
1162             case 0x17:
1163             case 0x18:
1164             case 0x19:
1165             case 0x1A:
1166             case 0x1B:
1167             case 0x1C:
1168             case 0x1D:
1169             case 0x1E:
1170             case 0x1F:
1171             case 0x20:
1172             case 0x21:
1173             case 0x22:
1174             case 0x23:
1175             case 0x24:
1176             case 0x25:
1177             case 0x26:
1178             case 0x27:
1179             case 0x28:
1180             case 0x29:
1181             case 0x2A:
1182             case 0x2B:
1183             case 0x2C:
1184             case 0x2D:
1185             case 0x2E:
1186             case 0x2F:
1187             case 0x30:
1188             case 0x31:
1189             case 0x32:
1190             case 0x33:
1191             case 0x34:
1192             case 0x35:
1193             case 0x36:
1194             case 0x37:
1195             case 0x38:
1196             case 0x39:
1197             case 0x3A:
1198             case 0x3B:
1199             case 0x3C:
1200             case 0x3D:
1201             case 0x3E:
1202             case 0x3F:
1203             case 0x40:
1204             case 0x41:
1205             case 0x42:
1206             case 0x43:
1207             case 0x44:
1208             case 0x45:
1209             case 0x46:
1210             case 0x47:
1211             case 0x48:
1212             case 0x49:
1213             case 0x4A:
1214             case 0x4B:
1215             case 0x4C:
1216             case 0x4D:
1217             case 0x4E:
1218             case 0x4F:
1219             case 0x50:
1220             case 0x51:
1221             case 0x52:
1222             case 0x53:
1223             case 0x54:
1224             case 0x55:
1225             case 0x56:
1226             case 0x57:
1227             case 0x58:
1228             case 0x59:
1229             case 0x5A:
1230             case 0x5B:
1231             case 0x5C:
1232             case 0x5D:
1233             case 0x5E:
1234             case 0x5F:
1235             case 0x60:
1236             case 0x61:
1237             case 0x62:
1238             case 0x63:
1239             case 0x64:
1240             case 0x65:
1241             case 0x66:
1242             case 0x67:
1243             case 0x68:
1244             case 0x69:
1245             case 0x6A:
1246             case 0x6B:
1247             case 0x6C:
1248             case 0x6D:
1249             case 0x6E:
1250             case 0x6F:
1251             case 0x70:
1252             case 0x71:
1253             case 0x72:
1254             case 0x73:
1255             case 0x74:
1256             case 0x75:
1257             case 0x76:
1258             case 0x77:
1259             case 0x78:
1260             case 0x79:
1261             case 0x7A:
1262             case 0x7B:
1263             case 0x7C:
1264             case 0x7D:
1265             case 0x7E:
1266             case 0x7F:
1267                 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1268 
1269             // fixmap
1270             case 0x80:
1271             case 0x81:
1272             case 0x82:
1273             case 0x83:
1274             case 0x84:
1275             case 0x85:
1276             case 0x86:
1277             case 0x87:
1278             case 0x88:
1279             case 0x89:
1280             case 0x8A:
1281             case 0x8B:
1282             case 0x8C:
1283             case 0x8D:
1284             case 0x8E:
1285             case 0x8F:
1286                 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1287 
1288             // fixarray
1289             case 0x90:
1290             case 0x91:
1291             case 0x92:
1292             case 0x93:
1293             case 0x94:
1294             case 0x95:
1295             case 0x96:
1296             case 0x97:
1297             case 0x98:
1298             case 0x99:
1299             case 0x9A:
1300             case 0x9B:
1301             case 0x9C:
1302             case 0x9D:
1303             case 0x9E:
1304             case 0x9F:
1305                 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1306 
1307             // fixstr
1308             case 0xA0:
1309             case 0xA1:
1310             case 0xA2:
1311             case 0xA3:
1312             case 0xA4:
1313             case 0xA5:
1314             case 0xA6:
1315             case 0xA7:
1316             case 0xA8:
1317             case 0xA9:
1318             case 0xAA:
1319             case 0xAB:
1320             case 0xAC:
1321             case 0xAD:
1322             case 0xAE:
1323             case 0xAF:
1324             case 0xB0:
1325             case 0xB1:
1326             case 0xB2:
1327             case 0xB3:
1328             case 0xB4:
1329             case 0xB5:
1330             case 0xB6:
1331             case 0xB7:
1332             case 0xB8:
1333             case 0xB9:
1334             case 0xBA:
1335             case 0xBB:
1336             case 0xBC:
1337             case 0xBD:
1338             case 0xBE:
1339             case 0xBF:
1340             case 0xD9: // str 8
1341             case 0xDA: // str 16
1342             case 0xDB: // str 32
1343             {
1344                 string_t s;
1345                 return get_msgpack_string(s) && sax->string(s);
1346             }
1347 
1348             case 0xC0: // nil
1349                 return sax->null();
1350 
1351             case 0xC2: // false
1352                 return sax->boolean(false);
1353 
1354             case 0xC3: // true
1355                 return sax->boolean(true);
1356 
1357             case 0xC4: // bin 8
1358             case 0xC5: // bin 16
1359             case 0xC6: // bin 32
1360             case 0xC7: // ext 8
1361             case 0xC8: // ext 16
1362             case 0xC9: // ext 32
1363             case 0xD4: // fixext 1
1364             case 0xD5: // fixext 2
1365             case 0xD6: // fixext 4
1366             case 0xD7: // fixext 8
1367             case 0xD8: // fixext 16
1368             {
1369                 binary_t b;
1370                 return get_msgpack_binary(b) && sax->binary(b);
1371             }
1372 
1373             case 0xCA: // float 32
1374             {
1375                 float number{};
1376                 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1377             }
1378 
1379             case 0xCB: // float 64
1380             {
1381                 double number{};
1382                 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1383             }
1384 
1385             case 0xCC: // uint 8
1386             {
1387                 std::uint8_t number{};
1388                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1389             }
1390 
1391             case 0xCD: // uint 16
1392             {
1393                 std::uint16_t number{};
1394                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1395             }
1396 
1397             case 0xCE: // uint 32
1398             {
1399                 std::uint32_t number{};
1400                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1401             }
1402 
1403             case 0xCF: // uint 64
1404             {
1405                 std::uint64_t number{};
1406                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1407             }
1408 
1409             case 0xD0: // int 8
1410             {
1411                 std::int8_t number{};
1412                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1413             }
1414 
1415             case 0xD1: // int 16
1416             {
1417                 std::int16_t number{};
1418                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1419             }
1420 
1421             case 0xD2: // int 32
1422             {
1423                 std::int32_t number{};
1424                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1425             }
1426 
1427             case 0xD3: // int 64
1428             {
1429                 std::int64_t number{};
1430                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1431             }
1432 
1433             case 0xDC: // array 16
1434             {
1435                 std::uint16_t len{};
1436                 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1437             }
1438 
1439             case 0xDD: // array 32
1440             {
1441                 std::uint32_t len{};
1442                 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1443             }
1444 
1445             case 0xDE: // map 16
1446             {
1447                 std::uint16_t len{};
1448                 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1449             }
1450 
1451             case 0xDF: // map 32
1452             {
1453                 std::uint32_t len{};
1454                 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1455             }
1456 
1457             // negative fixint
1458             case 0xE0:
1459             case 0xE1:
1460             case 0xE2:
1461             case 0xE3:
1462             case 0xE4:
1463             case 0xE5:
1464             case 0xE6:
1465             case 0xE7:
1466             case 0xE8:
1467             case 0xE9:
1468             case 0xEA:
1469             case 0xEB:
1470             case 0xEC:
1471             case 0xED:
1472             case 0xEE:
1473             case 0xEF:
1474             case 0xF0:
1475             case 0xF1:
1476             case 0xF2:
1477             case 0xF3:
1478             case 0xF4:
1479             case 0xF5:
1480             case 0xF6:
1481             case 0xF7:
1482             case 0xF8:
1483             case 0xF9:
1484             case 0xFA:
1485             case 0xFB:
1486             case 0xFC:
1487             case 0xFD:
1488             case 0xFE:
1489             case 0xFF:
1490                 return sax->number_integer(static_cast<std::int8_t>(current));
1491 
1492             default: // anything else
1493             {
1494                 auto last_token = get_token_string();
1495                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
1496             }
1497         }
1498     }
1499 
1500     /*!
1501     @brief reads a MessagePack string
1502 
1503     This function first reads starting bytes to determine the expected
1504     string length and then copies this number of bytes into a string.
1505 
1506     @param[out] result  created string
1507 
1508     @return whether string creation completed
1509     */
get_msgpack_string(string_t & result)1510     bool get_msgpack_string(string_t& result)
1511     {
1512         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1513         {
1514             return false;
1515         }
1516 
1517         switch (current)
1518         {
1519             // fixstr
1520             case 0xA0:
1521             case 0xA1:
1522             case 0xA2:
1523             case 0xA3:
1524             case 0xA4:
1525             case 0xA5:
1526             case 0xA6:
1527             case 0xA7:
1528             case 0xA8:
1529             case 0xA9:
1530             case 0xAA:
1531             case 0xAB:
1532             case 0xAC:
1533             case 0xAD:
1534             case 0xAE:
1535             case 0xAF:
1536             case 0xB0:
1537             case 0xB1:
1538             case 0xB2:
1539             case 0xB3:
1540             case 0xB4:
1541             case 0xB5:
1542             case 0xB6:
1543             case 0xB7:
1544             case 0xB8:
1545             case 0xB9:
1546             case 0xBA:
1547             case 0xBB:
1548             case 0xBC:
1549             case 0xBD:
1550             case 0xBE:
1551             case 0xBF:
1552             {
1553                 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1554             }
1555 
1556             case 0xD9: // str 8
1557             {
1558                 std::uint8_t len{};
1559                 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1560             }
1561 
1562             case 0xDA: // str 16
1563             {
1564                 std::uint16_t len{};
1565                 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1566             }
1567 
1568             case 0xDB: // str 32
1569             {
1570                 std::uint32_t len{};
1571                 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1572             }
1573 
1574             default:
1575             {
1576                 auto last_token = get_token_string();
1577                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"), BasicJsonType()));
1578             }
1579         }
1580     }
1581 
1582     /*!
1583     @brief reads a MessagePack byte array
1584 
1585     This function first reads starting bytes to determine the expected
1586     byte array length and then copies this number of bytes into a byte array.
1587 
1588     @param[out] result  created byte array
1589 
1590     @return whether byte array creation completed
1591     */
get_msgpack_binary(binary_t & result)1592     bool get_msgpack_binary(binary_t& result)
1593     {
1594         // helper function to set the subtype
1595         auto assign_and_return_true = [&result](std::int8_t subtype)
1596         {
1597             result.set_subtype(static_cast<std::uint8_t>(subtype));
1598             return true;
1599         };
1600 
1601         switch (current)
1602         {
1603             case 0xC4: // bin 8
1604             {
1605                 std::uint8_t len{};
1606                 return get_number(input_format_t::msgpack, len) &&
1607                        get_binary(input_format_t::msgpack, len, result);
1608             }
1609 
1610             case 0xC5: // bin 16
1611             {
1612                 std::uint16_t len{};
1613                 return get_number(input_format_t::msgpack, len) &&
1614                        get_binary(input_format_t::msgpack, len, result);
1615             }
1616 
1617             case 0xC6: // bin 32
1618             {
1619                 std::uint32_t len{};
1620                 return get_number(input_format_t::msgpack, len) &&
1621                        get_binary(input_format_t::msgpack, len, result);
1622             }
1623 
1624             case 0xC7: // ext 8
1625             {
1626                 std::uint8_t len{};
1627                 std::int8_t subtype{};
1628                 return get_number(input_format_t::msgpack, len) &&
1629                        get_number(input_format_t::msgpack, subtype) &&
1630                        get_binary(input_format_t::msgpack, len, result) &&
1631                        assign_and_return_true(subtype);
1632             }
1633 
1634             case 0xC8: // ext 16
1635             {
1636                 std::uint16_t len{};
1637                 std::int8_t subtype{};
1638                 return get_number(input_format_t::msgpack, len) &&
1639                        get_number(input_format_t::msgpack, subtype) &&
1640                        get_binary(input_format_t::msgpack, len, result) &&
1641                        assign_and_return_true(subtype);
1642             }
1643 
1644             case 0xC9: // ext 32
1645             {
1646                 std::uint32_t len{};
1647                 std::int8_t subtype{};
1648                 return get_number(input_format_t::msgpack, len) &&
1649                        get_number(input_format_t::msgpack, subtype) &&
1650                        get_binary(input_format_t::msgpack, len, result) &&
1651                        assign_and_return_true(subtype);
1652             }
1653 
1654             case 0xD4: // fixext 1
1655             {
1656                 std::int8_t subtype{};
1657                 return get_number(input_format_t::msgpack, subtype) &&
1658                        get_binary(input_format_t::msgpack, 1, result) &&
1659                        assign_and_return_true(subtype);
1660             }
1661 
1662             case 0xD5: // fixext 2
1663             {
1664                 std::int8_t subtype{};
1665                 return get_number(input_format_t::msgpack, subtype) &&
1666                        get_binary(input_format_t::msgpack, 2, result) &&
1667                        assign_and_return_true(subtype);
1668             }
1669 
1670             case 0xD6: // fixext 4
1671             {
1672                 std::int8_t subtype{};
1673                 return get_number(input_format_t::msgpack, subtype) &&
1674                        get_binary(input_format_t::msgpack, 4, result) &&
1675                        assign_and_return_true(subtype);
1676             }
1677 
1678             case 0xD7: // fixext 8
1679             {
1680                 std::int8_t subtype{};
1681                 return get_number(input_format_t::msgpack, subtype) &&
1682                        get_binary(input_format_t::msgpack, 8, result) &&
1683                        assign_and_return_true(subtype);
1684             }
1685 
1686             case 0xD8: // fixext 16
1687             {
1688                 std::int8_t subtype{};
1689                 return get_number(input_format_t::msgpack, subtype) &&
1690                        get_binary(input_format_t::msgpack, 16, result) &&
1691                        assign_and_return_true(subtype);
1692             }
1693 
1694             default:           // LCOV_EXCL_LINE
1695                 return false;  // LCOV_EXCL_LINE
1696         }
1697     }
1698 
1699     /*!
1700     @param[in] len  the length of the array
1701     @return whether array creation completed
1702     */
get_msgpack_array(const std::size_t len)1703     bool get_msgpack_array(const std::size_t len)
1704     {
1705         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1706         {
1707             return false;
1708         }
1709 
1710         for (std::size_t i = 0; i < len; ++i)
1711         {
1712             if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1713             {
1714                 return false;
1715             }
1716         }
1717 
1718         return sax->end_array();
1719     }
1720 
1721     /*!
1722     @param[in] len  the length of the object
1723     @return whether object creation completed
1724     */
get_msgpack_object(const std::size_t len)1725     bool get_msgpack_object(const std::size_t len)
1726     {
1727         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1728         {
1729             return false;
1730         }
1731 
1732         string_t key;
1733         for (std::size_t i = 0; i < len; ++i)
1734         {
1735             get();
1736             if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1737             {
1738                 return false;
1739             }
1740 
1741             if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1742             {
1743                 return false;
1744             }
1745             key.clear();
1746         }
1747 
1748         return sax->end_object();
1749     }
1750 
1751     ////////////
1752     // UBJSON //
1753     ////////////
1754 
1755     /*!
1756     @param[in] get_char  whether a new character should be retrieved from the
1757                          input (true, default) or whether the last read
1758                          character should be considered instead
1759 
1760     @return whether a valid UBJSON value was passed to the SAX parser
1761     */
parse_ubjson_internal(const bool get_char=true)1762     bool parse_ubjson_internal(const bool get_char = true)
1763     {
1764         return get_ubjson_value(get_char ? get_ignore_noop() : current);
1765     }
1766 
1767     /*!
1768     @brief reads a UBJSON string
1769 
1770     This function is either called after reading the 'S' byte explicitly
1771     indicating a string, or in case of an object key where the 'S' byte can be
1772     left out.
1773 
1774     @param[out] result   created string
1775     @param[in] get_char  whether a new character should be retrieved from the
1776                          input (true, default) or whether the last read
1777                          character should be considered instead
1778 
1779     @return whether string creation completed
1780     */
get_ubjson_string(string_t & result,const bool get_char=true)1781     bool get_ubjson_string(string_t& result, const bool get_char = true)
1782     {
1783         if (get_char)
1784         {
1785             get();  // TODO(niels): may we ignore N here?
1786         }
1787 
1788         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1789         {
1790             return false;
1791         }
1792 
1793         switch (current)
1794         {
1795             case 'U':
1796             {
1797                 std::uint8_t len{};
1798                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1799             }
1800 
1801             case 'i':
1802             {
1803                 std::int8_t len{};
1804                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1805             }
1806 
1807             case 'I':
1808             {
1809                 std::int16_t len{};
1810                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1811             }
1812 
1813             case 'l':
1814             {
1815                 std::int32_t len{};
1816                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1817             }
1818 
1819             case 'L':
1820             {
1821                 std::int64_t len{};
1822                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1823             }
1824 
1825             default:
1826                 auto last_token = get_token_string();
1827                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"), BasicJsonType()));
1828         }
1829     }
1830 
1831     /*!
1832     @param[out] result  determined size
1833     @return whether size determination completed
1834     */
get_ubjson_size_value(std::size_t & result)1835     bool get_ubjson_size_value(std::size_t& result)
1836     {
1837         switch (get_ignore_noop())
1838         {
1839             case 'U':
1840             {
1841                 std::uint8_t number{};
1842                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1843                 {
1844                     return false;
1845                 }
1846                 result = static_cast<std::size_t>(number);
1847                 return true;
1848             }
1849 
1850             case 'i':
1851             {
1852                 std::int8_t number{};
1853                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1854                 {
1855                     return false;
1856                 }
1857                 result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
1858                 return true;
1859             }
1860 
1861             case 'I':
1862             {
1863                 std::int16_t number{};
1864                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1865                 {
1866                     return false;
1867                 }
1868                 result = static_cast<std::size_t>(number);
1869                 return true;
1870             }
1871 
1872             case 'l':
1873             {
1874                 std::int32_t number{};
1875                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1876                 {
1877                     return false;
1878                 }
1879                 result = static_cast<std::size_t>(number);
1880                 return true;
1881             }
1882 
1883             case 'L':
1884             {
1885                 std::int64_t number{};
1886                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1887                 {
1888                     return false;
1889                 }
1890                 result = static_cast<std::size_t>(number);
1891                 return true;
1892             }
1893 
1894             default:
1895             {
1896                 auto last_token = get_token_string();
1897                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"), BasicJsonType()));
1898             }
1899         }
1900     }
1901 
1902     /*!
1903     @brief determine the type and size for a container
1904 
1905     In the optimized UBJSON format, a type and a size can be provided to allow
1906     for a more compact representation.
1907 
1908     @param[out] result  pair of the size and the type
1909 
1910     @return whether pair creation completed
1911     */
get_ubjson_size_type(std::pair<std::size_t,char_int_type> & result)1912     bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
1913     {
1914         result.first = string_t::npos; // size
1915         result.second = 0; // type
1916 
1917         get_ignore_noop();
1918 
1919         if (current == '$')
1920         {
1921             result.second = get();  // must not ignore 'N', because 'N' maybe the type
1922             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
1923             {
1924                 return false;
1925             }
1926 
1927             get_ignore_noop();
1928             if (JSON_HEDLEY_UNLIKELY(current != '#'))
1929             {
1930                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1931                 {
1932                     return false;
1933                 }
1934                 auto last_token = get_token_string();
1935                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"), BasicJsonType()));
1936             }
1937 
1938             return get_ubjson_size_value(result.first);
1939         }
1940 
1941         if (current == '#')
1942         {
1943             return get_ubjson_size_value(result.first);
1944         }
1945 
1946         return true;
1947     }
1948 
1949     /*!
1950     @param prefix  the previously read or set type prefix
1951     @return whether value creation completed
1952     */
get_ubjson_value(const char_int_type prefix)1953     bool get_ubjson_value(const char_int_type prefix)
1954     {
1955         switch (prefix)
1956         {
1957             case std::char_traits<char_type>::eof():  // EOF
1958                 return unexpect_eof(input_format_t::ubjson, "value");
1959 
1960             case 'T':  // true
1961                 return sax->boolean(true);
1962             case 'F':  // false
1963                 return sax->boolean(false);
1964 
1965             case 'Z':  // null
1966                 return sax->null();
1967 
1968             case 'U':
1969             {
1970                 std::uint8_t number{};
1971                 return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
1972             }
1973 
1974             case 'i':
1975             {
1976                 std::int8_t number{};
1977                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1978             }
1979 
1980             case 'I':
1981             {
1982                 std::int16_t number{};
1983                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1984             }
1985 
1986             case 'l':
1987             {
1988                 std::int32_t number{};
1989                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1990             }
1991 
1992             case 'L':
1993             {
1994                 std::int64_t number{};
1995                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1996             }
1997 
1998             case 'd':
1999             {
2000                 float number{};
2001                 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2002             }
2003 
2004             case 'D':
2005             {
2006                 double number{};
2007                 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2008             }
2009 
2010             case 'H':
2011             {
2012                 return get_ubjson_high_precision_number();
2013             }
2014 
2015             case 'C':  // char
2016             {
2017                 get();
2018                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
2019                 {
2020                     return false;
2021                 }
2022                 if (JSON_HEDLEY_UNLIKELY(current > 127))
2023                 {
2024                     auto last_token = get_token_string();
2025                     return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"), BasicJsonType()));
2026                 }
2027                 string_t s(1, static_cast<typename string_t::value_type>(current));
2028                 return sax->string(s);
2029             }
2030 
2031             case 'S':  // string
2032             {
2033                 string_t s;
2034                 return get_ubjson_string(s) && sax->string(s);
2035             }
2036 
2037             case '[':  // array
2038                 return get_ubjson_array();
2039 
2040             case '{':  // object
2041                 return get_ubjson_object();
2042 
2043             default: // anything else
2044             {
2045                 auto last_token = get_token_string();
2046                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"), BasicJsonType()));
2047             }
2048         }
2049     }
2050 
2051     /*!
2052     @return whether array creation completed
2053     */
get_ubjson_array()2054     bool get_ubjson_array()
2055     {
2056         std::pair<std::size_t, char_int_type> size_and_type;
2057         if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2058         {
2059             return false;
2060         }
2061 
2062         if (size_and_type.first != string_t::npos)
2063         {
2064             if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2065             {
2066                 return false;
2067             }
2068 
2069             if (size_and_type.second != 0)
2070             {
2071                 if (size_and_type.second != 'N')
2072                 {
2073                     for (std::size_t i = 0; i < size_and_type.first; ++i)
2074                     {
2075                         if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2076                         {
2077                             return false;
2078                         }
2079                     }
2080                 }
2081             }
2082             else
2083             {
2084                 for (std::size_t i = 0; i < size_and_type.first; ++i)
2085                 {
2086                     if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2087                     {
2088                         return false;
2089                     }
2090                 }
2091             }
2092         }
2093         else
2094         {
2095             if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
2096             {
2097                 return false;
2098             }
2099 
2100             while (current != ']')
2101             {
2102                 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2103                 {
2104                     return false;
2105                 }
2106                 get_ignore_noop();
2107             }
2108         }
2109 
2110         return sax->end_array();
2111     }
2112 
2113     /*!
2114     @return whether object creation completed
2115     */
get_ubjson_object()2116     bool get_ubjson_object()
2117     {
2118         std::pair<std::size_t, char_int_type> size_and_type;
2119         if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2120         {
2121             return false;
2122         }
2123 
2124         string_t key;
2125         if (size_and_type.first != string_t::npos)
2126         {
2127             if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2128             {
2129                 return false;
2130             }
2131 
2132             if (size_and_type.second != 0)
2133             {
2134                 for (std::size_t i = 0; i < size_and_type.first; ++i)
2135                 {
2136                     if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2137                     {
2138                         return false;
2139                     }
2140                     if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2141                     {
2142                         return false;
2143                     }
2144                     key.clear();
2145                 }
2146             }
2147             else
2148             {
2149                 for (std::size_t i = 0; i < size_and_type.first; ++i)
2150                 {
2151                     if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2152                     {
2153                         return false;
2154                     }
2155                     if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2156                     {
2157                         return false;
2158                     }
2159                     key.clear();
2160                 }
2161             }
2162         }
2163         else
2164         {
2165             if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
2166             {
2167                 return false;
2168             }
2169 
2170             while (current != '}')
2171             {
2172                 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2173                 {
2174                     return false;
2175                 }
2176                 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2177                 {
2178                     return false;
2179                 }
2180                 get_ignore_noop();
2181                 key.clear();
2182             }
2183         }
2184 
2185         return sax->end_object();
2186     }
2187 
2188     // Note, no reader for UBJSON binary types is implemented because they do
2189     // not exist
2190 
get_ubjson_high_precision_number()2191     bool get_ubjson_high_precision_number()
2192     {
2193         // get size of following number string
2194         std::size_t size{};
2195         auto res = get_ubjson_size_value(size);
2196         if (JSON_HEDLEY_UNLIKELY(!res))
2197         {
2198             return res;
2199         }
2200 
2201         // get number string
2202         std::vector<char> number_vector;
2203         for (std::size_t i = 0; i < size; ++i)
2204         {
2205             get();
2206             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
2207             {
2208                 return false;
2209             }
2210             number_vector.push_back(static_cast<char>(current));
2211         }
2212 
2213         // parse number string
2214         using ia_type = decltype(detail::input_adapter(number_vector));
2215         auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false);
2216         const auto result_number = number_lexer.scan();
2217         const auto number_string = number_lexer.get_token_string();
2218         const auto result_remainder = number_lexer.scan();
2219 
2220         using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2221 
2222         if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2223         {
2224             return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
2225         }
2226 
2227         switch (result_number)
2228         {
2229             case token_type::value_integer:
2230                 return sax->number_integer(number_lexer.get_number_integer());
2231             case token_type::value_unsigned:
2232                 return sax->number_unsigned(number_lexer.get_number_unsigned());
2233             case token_type::value_float:
2234                 return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2235             default:
2236                 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number"), BasicJsonType()));
2237         }
2238     }
2239 
2240     ///////////////////////
2241     // Utility functions //
2242     ///////////////////////
2243 
2244     /*!
2245     @brief get next character from the input
2246 
2247     This function provides the interface to the used input adapter. It does
2248     not throw in case the input reached EOF, but returns a -'ve valued
2249     `std::char_traits<char_type>::eof()` in that case.
2250 
2251     @return character read from the input
2252     */
get()2253     char_int_type get()
2254     {
2255         ++chars_read;
2256         return current = ia.get_character();
2257     }
2258 
2259     /*!
2260     @return character read from the input after ignoring all 'N' entries
2261     */
get_ignore_noop()2262     char_int_type get_ignore_noop()
2263     {
2264         do
2265         {
2266             get();
2267         }
2268         while (current == 'N');
2269 
2270         return current;
2271     }
2272 
2273     /*
2274     @brief read a number from the input
2275 
2276     @tparam NumberType the type of the number
2277     @param[in] format   the current format (for diagnostics)
2278     @param[out] result  number of type @a NumberType
2279 
2280     @return whether conversion completed
2281 
2282     @note This function needs to respect the system's endianess, because
2283           bytes in CBOR, MessagePack, and UBJSON are stored in network order
2284           (big endian) and therefore need reordering on little endian systems.
2285     */
2286     template<typename NumberType, bool InputIsLittleEndian = false>
get_number(const input_format_t format,NumberType & result)2287     bool get_number(const input_format_t format, NumberType& result)
2288     {
2289         // step 1: read input into array with system's byte order
2290         std::array<std::uint8_t, sizeof(NumberType)> vec{};
2291         for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2292         {
2293             get();
2294             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2295             {
2296                 return false;
2297             }
2298 
2299             // reverse byte order prior to conversion if necessary
2300             if (is_little_endian != InputIsLittleEndian)
2301             {
2302                 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2303             }
2304             else
2305             {
2306                 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2307             }
2308         }
2309 
2310         // step 2: convert array into number of type T and return
2311         std::memcpy(&result, vec.data(), sizeof(NumberType));
2312         return true;
2313     }
2314 
2315     /*!
2316     @brief create a string by reading characters from the input
2317 
2318     @tparam NumberType the type of the number
2319     @param[in] format the current format (for diagnostics)
2320     @param[in] len number of characters to read
2321     @param[out] result string created by reading @a len bytes
2322 
2323     @return whether string creation completed
2324 
2325     @note We can not reserve @a len bytes for the result, because @a len
2326           may be too large. Usually, @ref unexpect_eof() detects the end of
2327           the input before we run out of string memory.
2328     */
2329     template<typename NumberType>
get_string(const input_format_t format,const NumberType len,string_t & result)2330     bool get_string(const input_format_t format,
2331                     const NumberType len,
2332                     string_t& result)
2333     {
2334         bool success = true;
2335         for (NumberType i = 0; i < len; i++)
2336         {
2337             get();
2338             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2339             {
2340                 success = false;
2341                 break;
2342             }
2343             result.push_back(static_cast<typename string_t::value_type>(current));
2344         }
2345         return success;
2346     }
2347 
2348     /*!
2349     @brief create a byte array by reading bytes from the input
2350 
2351     @tparam NumberType the type of the number
2352     @param[in] format the current format (for diagnostics)
2353     @param[in] len number of bytes to read
2354     @param[out] result byte array created by reading @a len bytes
2355 
2356     @return whether byte array creation completed
2357 
2358     @note We can not reserve @a len bytes for the result, because @a len
2359           may be too large. Usually, @ref unexpect_eof() detects the end of
2360           the input before we run out of memory.
2361     */
2362     template<typename NumberType>
get_binary(const input_format_t format,const NumberType len,binary_t & result)2363     bool get_binary(const input_format_t format,
2364                     const NumberType len,
2365                     binary_t& result)
2366     {
2367         bool success = true;
2368         for (NumberType i = 0; i < len; i++)
2369         {
2370             get();
2371             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2372             {
2373                 success = false;
2374                 break;
2375             }
2376             result.push_back(static_cast<std::uint8_t>(current));
2377         }
2378         return success;
2379     }
2380 
2381     /*!
2382     @param[in] format   the current format (for diagnostics)
2383     @param[in] context  further context information (for diagnostics)
2384     @return whether the last read character is not EOF
2385     */
2386     JSON_HEDLEY_NON_NULL(3)
unexpect_eof(const input_format_t format,const char * context) const2387     bool unexpect_eof(const input_format_t format, const char* context) const
2388     {
2389         if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2390         {
2391             return sax->parse_error(chars_read, "<end of file>",
2392                                     parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), BasicJsonType()));
2393         }
2394         return true;
2395     }
2396 
2397     /*!
2398     @return a string representation of the last read byte
2399     */
get_token_string() const2400     std::string get_token_string() const
2401     {
2402         std::array<char, 3> cr{{}};
2403         (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current)); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
2404         return std::string{cr.data()};
2405     }
2406 
2407     /*!
2408     @param[in] format   the current format
2409     @param[in] detail   a detailed error message
2410     @param[in] context  further context information
2411     @return a message string to use in the parse_error exceptions
2412     */
exception_message(const input_format_t format,const std::string & detail,const std::string & context) const2413     std::string exception_message(const input_format_t format,
2414                                   const std::string& detail,
2415                                   const std::string& context) const
2416     {
2417         std::string error_msg = "syntax error while parsing ";
2418 
2419         switch (format)
2420         {
2421             case input_format_t::cbor:
2422                 error_msg += "CBOR";
2423                 break;
2424 
2425             case input_format_t::msgpack:
2426                 error_msg += "MessagePack";
2427                 break;
2428 
2429             case input_format_t::ubjson:
2430                 error_msg += "UBJSON";
2431                 break;
2432 
2433             case input_format_t::bson:
2434                 error_msg += "BSON";
2435                 break;
2436 
2437             default:            // LCOV_EXCL_LINE
2438                 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2439         }
2440 
2441         return error_msg + " " + context + ": " + detail;
2442     }
2443 
2444   private:
2445     /// input adapter
2446     InputAdapterType ia;
2447 
2448     /// the current character
2449     char_int_type current = std::char_traits<char_type>::eof();
2450 
2451     /// the number of characters read
2452     std::size_t chars_read = 0;
2453 
2454     /// whether we can assume little endianess
2455     const bool is_little_endian = little_endianess();
2456 
2457     /// the SAX parser
2458     json_sax_t* sax = nullptr;
2459 };
2460 }  // namespace detail
2461 }  // namespace nlohmann
2462