1 #pragma once
2 
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cmath> // ldexp
6 #include <cstddef> // size_t
7 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
8 #include <cstdio> // snprintf
9 #include <cstring> // memcpy
10 #include <iterator> // back_inserter
11 #include <limits> // numeric_limits
12 #include <string> // char_traits, string
13 #include <utility> // make_pair, move
14 
15 #include <nlohmann/detail/exceptions.hpp>
16 #include <nlohmann/detail/input/input_adapters.hpp>
17 #include <nlohmann/detail/input/json_sax.hpp>
18 #include <nlohmann/detail/input/lexer.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 #include <nlohmann/detail/meta/is_sax.hpp>
21 #include <nlohmann/detail/value_t.hpp>
22 
23 namespace nlohmann
24 {
25 namespace detail
26 {
27 
28 /// how to treat CBOR tags
29 enum class cbor_tag_handler_t
30 {
31     error,  ///< throw a parse_error exception in case of a tag
32     ignore   ///< ignore tags
33 };
34 
35 /*!
36 @brief determine system byte order
37 
38 @return true if and only if system's byte order is little endian
39 
40 @note from https://stackoverflow.com/a/1001328/266378
41 */
little_endianess(int num=1)42 static inline bool little_endianess(int num = 1) noexcept
43 {
44     return *reinterpret_cast<char*>(&num) == 1;
45 }
46 
47 
48 ///////////////////
49 // binary reader //
50 ///////////////////
51 
52 /*!
53 @brief deserialization of CBOR, MessagePack, and UBJSON values
54 */
55 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
56 class binary_reader
57 {
58     using number_integer_t = typename BasicJsonType::number_integer_t;
59     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
60     using number_float_t = typename BasicJsonType::number_float_t;
61     using string_t = typename BasicJsonType::string_t;
62     using binary_t = typename BasicJsonType::binary_t;
63     using json_sax_t = SAX;
64     using char_type = typename InputAdapterType::char_type;
65     using char_int_type = typename std::char_traits<char_type>::int_type;
66 
67   public:
68     /*!
69     @brief create a binary reader
70 
71     @param[in] adapter  input adapter to read from
72     */
binary_reader(InputAdapterType && adapter)73     explicit binary_reader(InputAdapterType&& adapter) : ia(std::move(adapter))
74     {
75         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
76     }
77 
78     // make class move-only
79     binary_reader(const binary_reader&) = delete;
80     binary_reader(binary_reader&&) = default;
81     binary_reader& operator=(const binary_reader&) = delete;
82     binary_reader& operator=(binary_reader&&) = default;
83     ~binary_reader() = default;
84 
85     /*!
86     @param[in] format  the binary format to parse
87     @param[in] sax_    a SAX event processor
88     @param[in] strict  whether to expect the input to be consumed completed
89     @param[in] tag_handler  how to treat CBOR tags
90 
91     @return
92     */
93     JSON_HEDLEY_NON_NULL(3)
sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true,const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)94     bool sax_parse(const input_format_t format,
95                    json_sax_t* sax_,
96                    const bool strict = true,
97                    const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
98     {
99         sax = sax_;
100         bool result = false;
101 
102         switch (format)
103         {
104             case input_format_t::bson:
105                 result = parse_bson_internal();
106                 break;
107 
108             case input_format_t::cbor:
109                 result = parse_cbor_internal(true, tag_handler);
110                 break;
111 
112             case input_format_t::msgpack:
113                 result = parse_msgpack_internal();
114                 break;
115 
116             case input_format_t::ubjson:
117                 result = parse_ubjson_internal();
118                 break;
119 
120             default:            // LCOV_EXCL_LINE
121                 JSON_ASSERT(false);  // LCOV_EXCL_LINE
122         }
123 
124         // strict mode: next byte must be EOF
125         if (result && strict)
126         {
127             if (format == input_format_t::ubjson)
128             {
129                 get_ignore_noop();
130             }
131             else
132             {
133                 get();
134             }
135 
136             if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
137             {
138                 return sax->parse_error(chars_read, get_token_string(),
139                                         parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value")));
140             }
141         }
142 
143         return result;
144     }
145 
146   private:
147     //////////
148     // BSON //
149     //////////
150 
151     /*!
152     @brief Reads in a BSON-object and passes it to the SAX-parser.
153     @return whether a valid BSON-value was passed to the SAX parser
154     */
parse_bson_internal()155     bool parse_bson_internal()
156     {
157         std::int32_t document_size{};
158         get_number<std::int32_t, true>(input_format_t::bson, document_size);
159 
160         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
161         {
162             return false;
163         }
164 
165         if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
166         {
167             return false;
168         }
169 
170         return sax->end_object();
171     }
172 
173     /*!
174     @brief Parses a C-style string from the BSON input.
175     @param[in, out] result  A reference to the string variable where the read
176                             string is to be stored.
177     @return `true` if the \x00-byte indicating the end of the string was
178              encountered before the EOF; false` indicates an unexpected EOF.
179     */
get_bson_cstr(string_t & result)180     bool get_bson_cstr(string_t& result)
181     {
182         auto out = std::back_inserter(result);
183         while (true)
184         {
185             get();
186             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
187             {
188                 return false;
189             }
190             if (current == 0x00)
191             {
192                 return true;
193             }
194             *out++ = static_cast<typename string_t::value_type>(current);
195         }
196     }
197 
198     /*!
199     @brief Parses a zero-terminated string of length @a len from the BSON
200            input.
201     @param[in] len  The length (including the zero-byte at the end) of the
202                     string to be read.
203     @param[in, out] result  A reference to the string variable where the read
204                             string is to be stored.
205     @tparam NumberType The type of the length @a len
206     @pre len >= 1
207     @return `true` if the string was successfully parsed
208     */
209     template<typename NumberType>
get_bson_string(const NumberType len,string_t & result)210     bool get_bson_string(const NumberType len, string_t& result)
211     {
212         if (JSON_HEDLEY_UNLIKELY(len < 1))
213         {
214             auto last_token = get_token_string();
215             return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string")));
216         }
217 
218         return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
219     }
220 
221     /*!
222     @brief Parses a byte array input of length @a len from the BSON input.
223     @param[in] len  The length of the byte array to be read.
224     @param[in, out] result  A reference to the binary variable where the read
225                             array is to be stored.
226     @tparam NumberType The type of the length @a len
227     @pre len >= 0
228     @return `true` if the byte array was successfully parsed
229     */
230     template<typename NumberType>
get_bson_binary(const NumberType len,binary_t & result)231     bool get_bson_binary(const NumberType len, binary_t& result)
232     {
233         if (JSON_HEDLEY_UNLIKELY(len < 0))
234         {
235             auto last_token = get_token_string();
236             return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "byte array length cannot be negative, is " + std::to_string(len), "binary")));
237         }
238 
239         // All BSON binary values have a subtype
240         std::uint8_t subtype{};
241         get_number<std::uint8_t>(input_format_t::bson, subtype);
242         result.set_subtype(subtype);
243 
244         return get_binary(input_format_t::bson, len, result);
245     }
246 
247     /*!
248     @brief Read a BSON document element of the given @a element_type.
249     @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
250     @param[in] element_type_parse_position The position in the input stream,
251                where the `element_type` was read.
252     @warning Not all BSON element types are supported yet. An unsupported
253              @a element_type will give rise to a parse_error.114:
254              Unsupported BSON record type 0x...
255     @return whether a valid BSON-object/array was passed to the SAX parser
256     */
parse_bson_element_internal(const char_int_type element_type,const std::size_t element_type_parse_position)257     bool parse_bson_element_internal(const char_int_type element_type,
258                                      const std::size_t element_type_parse_position)
259     {
260         switch (element_type)
261         {
262             case 0x01: // double
263             {
264                 double number{};
265                 return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
266             }
267 
268             case 0x02: // string
269             {
270                 std::int32_t len{};
271                 string_t value;
272                 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
273             }
274 
275             case 0x03: // object
276             {
277                 return parse_bson_internal();
278             }
279 
280             case 0x04: // array
281             {
282                 return parse_bson_array();
283             }
284 
285             case 0x05: // binary
286             {
287                 std::int32_t len{};
288                 binary_t value;
289                 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
290             }
291 
292             case 0x08: // boolean
293             {
294                 return sax->boolean(get() != 0);
295             }
296 
297             case 0x0A: // null
298             {
299                 return sax->null();
300             }
301 
302             case 0x10: // int32
303             {
304                 std::int32_t value{};
305                 return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
306             }
307 
308             case 0x12: // int64
309             {
310                 std::int64_t value{};
311                 return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
312             }
313 
314             default: // anything else not supported (yet)
315             {
316                 std::array<char, 3> cr{{}};
317                 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type));
318                 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data())));
319             }
320         }
321     }
322 
323     /*!
324     @brief Read a BSON element list (as specified in the BSON-spec)
325 
326     The same binary layout is used for objects and arrays, hence it must be
327     indicated with the argument @a is_array which one is expected
328     (true --> array, false --> object).
329 
330     @param[in] is_array Determines if the element list being read is to be
331                         treated as an object (@a is_array == false), or as an
332                         array (@a is_array == true).
333     @return whether a valid BSON-object/array was passed to the SAX parser
334     */
parse_bson_element_list(const bool is_array)335     bool parse_bson_element_list(const bool is_array)
336     {
337         string_t key;
338 
339         while (auto element_type = get())
340         {
341             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
342             {
343                 return false;
344             }
345 
346             const std::size_t element_type_parse_position = chars_read;
347             if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
348             {
349                 return false;
350             }
351 
352             if (!is_array && !sax->key(key))
353             {
354                 return false;
355             }
356 
357             if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
358             {
359                 return false;
360             }
361 
362             // get_bson_cstr only appends
363             key.clear();
364         }
365 
366         return true;
367     }
368 
369     /*!
370     @brief Reads an array from the BSON input and passes it to the SAX-parser.
371     @return whether a valid BSON-array was passed to the SAX parser
372     */
parse_bson_array()373     bool parse_bson_array()
374     {
375         std::int32_t document_size{};
376         get_number<std::int32_t, true>(input_format_t::bson, document_size);
377 
378         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
379         {
380             return false;
381         }
382 
383         if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
384         {
385             return false;
386         }
387 
388         return sax->end_array();
389     }
390 
391     //////////
392     // CBOR //
393     //////////
394 
395     /*!
396     @param[in] get_char  whether a new character should be retrieved from the
397                          input (true) or whether the last read character should
398                          be considered instead (false)
399     @param[in] tag_handler how CBOR tags should be treated
400 
401     @return whether a valid CBOR value was passed to the SAX parser
402     */
parse_cbor_internal(const bool get_char,const cbor_tag_handler_t tag_handler)403     bool parse_cbor_internal(const bool get_char,
404                              const cbor_tag_handler_t tag_handler)
405     {
406         switch (get_char ? get() : current)
407         {
408             // EOF
409             case std::char_traits<char_type>::eof():
410                 return unexpect_eof(input_format_t::cbor, "value");
411 
412             // Integer 0x00..0x17 (0..23)
413             case 0x00:
414             case 0x01:
415             case 0x02:
416             case 0x03:
417             case 0x04:
418             case 0x05:
419             case 0x06:
420             case 0x07:
421             case 0x08:
422             case 0x09:
423             case 0x0A:
424             case 0x0B:
425             case 0x0C:
426             case 0x0D:
427             case 0x0E:
428             case 0x0F:
429             case 0x10:
430             case 0x11:
431             case 0x12:
432             case 0x13:
433             case 0x14:
434             case 0x15:
435             case 0x16:
436             case 0x17:
437                 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
438 
439             case 0x18: // Unsigned integer (one-byte uint8_t follows)
440             {
441                 std::uint8_t number{};
442                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
443             }
444 
445             case 0x19: // Unsigned integer (two-byte uint16_t follows)
446             {
447                 std::uint16_t number{};
448                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
449             }
450 
451             case 0x1A: // Unsigned integer (four-byte uint32_t follows)
452             {
453                 std::uint32_t number{};
454                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
455             }
456 
457             case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
458             {
459                 std::uint64_t number{};
460                 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
461             }
462 
463             // Negative integer -1-0x00..-1-0x17 (-1..-24)
464             case 0x20:
465             case 0x21:
466             case 0x22:
467             case 0x23:
468             case 0x24:
469             case 0x25:
470             case 0x26:
471             case 0x27:
472             case 0x28:
473             case 0x29:
474             case 0x2A:
475             case 0x2B:
476             case 0x2C:
477             case 0x2D:
478             case 0x2E:
479             case 0x2F:
480             case 0x30:
481             case 0x31:
482             case 0x32:
483             case 0x33:
484             case 0x34:
485             case 0x35:
486             case 0x36:
487             case 0x37:
488                 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
489 
490             case 0x38: // Negative integer (one-byte uint8_t follows)
491             {
492                 std::uint8_t number{};
493                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
494             }
495 
496             case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
497             {
498                 std::uint16_t number{};
499                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
500             }
501 
502             case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
503             {
504                 std::uint32_t number{};
505                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
506             }
507 
508             case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
509             {
510                 std::uint64_t number{};
511                 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
512                         - static_cast<number_integer_t>(number));
513             }
514 
515             // Binary data (0x00..0x17 bytes follow)
516             case 0x40:
517             case 0x41:
518             case 0x42:
519             case 0x43:
520             case 0x44:
521             case 0x45:
522             case 0x46:
523             case 0x47:
524             case 0x48:
525             case 0x49:
526             case 0x4A:
527             case 0x4B:
528             case 0x4C:
529             case 0x4D:
530             case 0x4E:
531             case 0x4F:
532             case 0x50:
533             case 0x51:
534             case 0x52:
535             case 0x53:
536             case 0x54:
537             case 0x55:
538             case 0x56:
539             case 0x57:
540             case 0x58: // Binary data (one-byte uint8_t for n follows)
541             case 0x59: // Binary data (two-byte uint16_t for n follow)
542             case 0x5A: // Binary data (four-byte uint32_t for n follow)
543             case 0x5B: // Binary data (eight-byte uint64_t for n follow)
544             case 0x5F: // Binary data (indefinite length)
545             {
546                 binary_t b;
547                 return get_cbor_binary(b) && sax->binary(b);
548             }
549 
550             // UTF-8 string (0x00..0x17 bytes follow)
551             case 0x60:
552             case 0x61:
553             case 0x62:
554             case 0x63:
555             case 0x64:
556             case 0x65:
557             case 0x66:
558             case 0x67:
559             case 0x68:
560             case 0x69:
561             case 0x6A:
562             case 0x6B:
563             case 0x6C:
564             case 0x6D:
565             case 0x6E:
566             case 0x6F:
567             case 0x70:
568             case 0x71:
569             case 0x72:
570             case 0x73:
571             case 0x74:
572             case 0x75:
573             case 0x76:
574             case 0x77:
575             case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
576             case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
577             case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
578             case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
579             case 0x7F: // UTF-8 string (indefinite length)
580             {
581                 string_t s;
582                 return get_cbor_string(s) && sax->string(s);
583             }
584 
585             // array (0x00..0x17 data items follow)
586             case 0x80:
587             case 0x81:
588             case 0x82:
589             case 0x83:
590             case 0x84:
591             case 0x85:
592             case 0x86:
593             case 0x87:
594             case 0x88:
595             case 0x89:
596             case 0x8A:
597             case 0x8B:
598             case 0x8C:
599             case 0x8D:
600             case 0x8E:
601             case 0x8F:
602             case 0x90:
603             case 0x91:
604             case 0x92:
605             case 0x93:
606             case 0x94:
607             case 0x95:
608             case 0x96:
609             case 0x97:
610                 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
611 
612             case 0x98: // array (one-byte uint8_t for n follows)
613             {
614                 std::uint8_t len{};
615                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
616             }
617 
618             case 0x99: // array (two-byte uint16_t for n follow)
619             {
620                 std::uint16_t len{};
621                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
622             }
623 
624             case 0x9A: // array (four-byte uint32_t for n follow)
625             {
626                 std::uint32_t len{};
627                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
628             }
629 
630             case 0x9B: // array (eight-byte uint64_t for n follow)
631             {
632                 std::uint64_t len{};
633                 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
634             }
635 
636             case 0x9F: // array (indefinite length)
637                 return get_cbor_array(std::size_t(-1), tag_handler);
638 
639             // map (0x00..0x17 pairs of data items follow)
640             case 0xA0:
641             case 0xA1:
642             case 0xA2:
643             case 0xA3:
644             case 0xA4:
645             case 0xA5:
646             case 0xA6:
647             case 0xA7:
648             case 0xA8:
649             case 0xA9:
650             case 0xAA:
651             case 0xAB:
652             case 0xAC:
653             case 0xAD:
654             case 0xAE:
655             case 0xAF:
656             case 0xB0:
657             case 0xB1:
658             case 0xB2:
659             case 0xB3:
660             case 0xB4:
661             case 0xB5:
662             case 0xB6:
663             case 0xB7:
664                 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
665 
666             case 0xB8: // map (one-byte uint8_t for n follows)
667             {
668                 std::uint8_t len{};
669                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
670             }
671 
672             case 0xB9: // map (two-byte uint16_t for n follow)
673             {
674                 std::uint16_t len{};
675                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
676             }
677 
678             case 0xBA: // map (four-byte uint32_t for n follow)
679             {
680                 std::uint32_t len{};
681                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
682             }
683 
684             case 0xBB: // map (eight-byte uint64_t for n follow)
685             {
686                 std::uint64_t len{};
687                 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
688             }
689 
690             case 0xBF: // map (indefinite length)
691                 return get_cbor_object(std::size_t(-1), tag_handler);
692 
693             case 0xC6: // tagged item
694             case 0xC7:
695             case 0xC8:
696             case 0xC9:
697             case 0xCA:
698             case 0xCB:
699             case 0xCC:
700             case 0xCD:
701             case 0xCE:
702             case 0xCF:
703             case 0xD0:
704             case 0xD1:
705             case 0xD2:
706             case 0xD3:
707             case 0xD4:
708             case 0xD8: // tagged item (1 bytes follow)
709             case 0xD9: // tagged item (2 bytes follow)
710             case 0xDA: // tagged item (4 bytes follow)
711             case 0xDB: // tagged item (8 bytes follow)
712             {
713                 switch (tag_handler)
714                 {
715                     case cbor_tag_handler_t::error:
716                     {
717                         auto last_token = get_token_string();
718                         return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
719                     }
720 
721                     case cbor_tag_handler_t::ignore:
722                     {
723                         switch (current)
724                         {
725                             case 0xD8:
726                             {
727                                 std::uint8_t len{};
728                                 get_number(input_format_t::cbor, len);
729                                 break;
730                             }
731                             case 0xD9:
732                             {
733                                 std::uint16_t len{};
734                                 get_number(input_format_t::cbor, len);
735                                 break;
736                             }
737                             case 0xDA:
738                             {
739                                 std::uint32_t len{};
740                                 get_number(input_format_t::cbor, len);
741                                 break;
742                             }
743                             case 0xDB:
744                             {
745                                 std::uint64_t len{};
746                                 get_number(input_format_t::cbor, len);
747                                 break;
748                             }
749                             default:
750                                 break;
751                         }
752                         return parse_cbor_internal(true, tag_handler);
753                     }
754 
755                     default:            // LCOV_EXCL_LINE
756                         JSON_ASSERT(false);  // LCOV_EXCL_LINE
757                 }
758             }
759 
760             case 0xF4: // false
761                 return sax->boolean(false);
762 
763             case 0xF5: // true
764                 return sax->boolean(true);
765 
766             case 0xF6: // null
767                 return sax->null();
768 
769             case 0xF9: // Half-Precision Float (two-byte IEEE 754)
770             {
771                 const auto byte1_raw = get();
772                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
773                 {
774                     return false;
775                 }
776                 const auto byte2_raw = get();
777                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
778                 {
779                     return false;
780                 }
781 
782                 const auto byte1 = static_cast<unsigned char>(byte1_raw);
783                 const auto byte2 = static_cast<unsigned char>(byte2_raw);
784 
785                 // code from RFC 7049, Appendix D, Figure 3:
786                 // As half-precision floating-point numbers were only added
787                 // to IEEE 754 in 2008, today's programming platforms often
788                 // still only have limited support for them. It is very
789                 // easy to include at least decoding support for them even
790                 // without such support. An example of a small decoder for
791                 // half-precision floating-point numbers in the C language
792                 // is shown in Fig. 3.
793                 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
794                 const double val = [&half]
795                 {
796                     const int exp = (half >> 10u) & 0x1Fu;
797                     const unsigned int mant = half & 0x3FFu;
798                     JSON_ASSERT(0 <= exp&& exp <= 32);
799                     JSON_ASSERT(mant <= 1024);
800                     switch (exp)
801                     {
802                         case 0:
803                             return std::ldexp(mant, -24);
804                         case 31:
805                             return (mant == 0)
806                             ? std::numeric_limits<double>::infinity()
807                             : std::numeric_limits<double>::quiet_NaN();
808                         default:
809                             return std::ldexp(mant + 1024, exp - 25);
810                     }
811                 }();
812                 return sax->number_float((half & 0x8000u) != 0
813                                          ? static_cast<number_float_t>(-val)
814                                          : static_cast<number_float_t>(val), "");
815             }
816 
817             case 0xFA: // Single-Precision Float (four-byte IEEE 754)
818             {
819                 float number{};
820                 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
821             }
822 
823             case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
824             {
825                 double number{};
826                 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
827             }
828 
829             default: // anything else (0xFF is handled inside the other types)
830             {
831                 auto last_token = get_token_string();
832                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
833             }
834         }
835     }
836 
837     /*!
838     @brief reads a CBOR string
839 
840     This function first reads starting bytes to determine the expected
841     string length and then copies this number of bytes into a string.
842     Additionally, CBOR's strings with indefinite lengths are supported.
843 
844     @param[out] result  created string
845 
846     @return whether string creation completed
847     */
get_cbor_string(string_t & result)848     bool get_cbor_string(string_t& result)
849     {
850         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
851         {
852             return false;
853         }
854 
855         switch (current)
856         {
857             // UTF-8 string (0x00..0x17 bytes follow)
858             case 0x60:
859             case 0x61:
860             case 0x62:
861             case 0x63:
862             case 0x64:
863             case 0x65:
864             case 0x66:
865             case 0x67:
866             case 0x68:
867             case 0x69:
868             case 0x6A:
869             case 0x6B:
870             case 0x6C:
871             case 0x6D:
872             case 0x6E:
873             case 0x6F:
874             case 0x70:
875             case 0x71:
876             case 0x72:
877             case 0x73:
878             case 0x74:
879             case 0x75:
880             case 0x76:
881             case 0x77:
882             {
883                 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
884             }
885 
886             case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
887             {
888                 std::uint8_t len{};
889                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
890             }
891 
892             case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
893             {
894                 std::uint16_t len{};
895                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
896             }
897 
898             case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
899             {
900                 std::uint32_t len{};
901                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
902             }
903 
904             case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
905             {
906                 std::uint64_t len{};
907                 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
908             }
909 
910             case 0x7F: // UTF-8 string (indefinite length)
911             {
912                 while (get() != 0xFF)
913                 {
914                     string_t chunk;
915                     if (!get_cbor_string(chunk))
916                     {
917                         return false;
918                     }
919                     result.append(chunk);
920                 }
921                 return true;
922             }
923 
924             default:
925             {
926                 auto last_token = get_token_string();
927                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
928             }
929         }
930     }
931 
932     /*!
933     @brief reads a CBOR byte array
934 
935     This function first reads starting bytes to determine the expected
936     byte array length and then copies this number of bytes into the byte array.
937     Additionally, CBOR's byte arrays with indefinite lengths are supported.
938 
939     @param[out] result  created byte array
940 
941     @return whether byte array creation completed
942     */
get_cbor_binary(binary_t & result)943     bool get_cbor_binary(binary_t& result)
944     {
945         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
946         {
947             return false;
948         }
949 
950         switch (current)
951         {
952             // Binary data (0x00..0x17 bytes follow)
953             case 0x40:
954             case 0x41:
955             case 0x42:
956             case 0x43:
957             case 0x44:
958             case 0x45:
959             case 0x46:
960             case 0x47:
961             case 0x48:
962             case 0x49:
963             case 0x4A:
964             case 0x4B:
965             case 0x4C:
966             case 0x4D:
967             case 0x4E:
968             case 0x4F:
969             case 0x50:
970             case 0x51:
971             case 0x52:
972             case 0x53:
973             case 0x54:
974             case 0x55:
975             case 0x56:
976             case 0x57:
977             {
978                 return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
979             }
980 
981             case 0x58: // Binary data (one-byte uint8_t for n follows)
982             {
983                 std::uint8_t len{};
984                 return get_number(input_format_t::cbor, len) &&
985                        get_binary(input_format_t::cbor, len, result);
986             }
987 
988             case 0x59: // Binary data (two-byte uint16_t for n follow)
989             {
990                 std::uint16_t len{};
991                 return get_number(input_format_t::cbor, len) &&
992                        get_binary(input_format_t::cbor, len, result);
993             }
994 
995             case 0x5A: // Binary data (four-byte uint32_t for n follow)
996             {
997                 std::uint32_t len{};
998                 return get_number(input_format_t::cbor, len) &&
999                        get_binary(input_format_t::cbor, len, result);
1000             }
1001 
1002             case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1003             {
1004                 std::uint64_t len{};
1005                 return get_number(input_format_t::cbor, len) &&
1006                        get_binary(input_format_t::cbor, len, result);
1007             }
1008 
1009             case 0x5F: // Binary data (indefinite length)
1010             {
1011                 while (get() != 0xFF)
1012                 {
1013                     binary_t chunk;
1014                     if (!get_cbor_binary(chunk))
1015                     {
1016                         return false;
1017                     }
1018                     result.insert(result.end(), chunk.begin(), chunk.end());
1019                 }
1020                 return true;
1021             }
1022 
1023             default:
1024             {
1025                 auto last_token = get_token_string();
1026                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x" + last_token, "binary")));
1027             }
1028         }
1029     }
1030 
1031     /*!
1032     @param[in] len  the length of the array or std::size_t(-1) for an
1033                     array of indefinite size
1034     @param[in] tag_handler how CBOR tags should be treated
1035     @return whether array creation completed
1036     */
get_cbor_array(const std::size_t len,const cbor_tag_handler_t tag_handler)1037     bool get_cbor_array(const std::size_t len,
1038                         const cbor_tag_handler_t tag_handler)
1039     {
1040         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1041         {
1042             return false;
1043         }
1044 
1045         if (len != std::size_t(-1))
1046         {
1047             for (std::size_t i = 0; i < len; ++i)
1048             {
1049                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1050                 {
1051                     return false;
1052                 }
1053             }
1054         }
1055         else
1056         {
1057             while (get() != 0xFF)
1058             {
1059                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1060                 {
1061                     return false;
1062                 }
1063             }
1064         }
1065 
1066         return sax->end_array();
1067     }
1068 
1069     /*!
1070     @param[in] len  the length of the object or std::size_t(-1) for an
1071                     object of indefinite size
1072     @param[in] tag_handler how CBOR tags should be treated
1073     @return whether object creation completed
1074     */
get_cbor_object(const std::size_t len,const cbor_tag_handler_t tag_handler)1075     bool get_cbor_object(const std::size_t len,
1076                          const cbor_tag_handler_t tag_handler)
1077     {
1078         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1079         {
1080             return false;
1081         }
1082 
1083         string_t key;
1084         if (len != std::size_t(-1))
1085         {
1086             for (std::size_t i = 0; i < len; ++i)
1087             {
1088                 get();
1089                 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1090                 {
1091                     return false;
1092                 }
1093 
1094                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1095                 {
1096                     return false;
1097                 }
1098                 key.clear();
1099             }
1100         }
1101         else
1102         {
1103             while (get() != 0xFF)
1104             {
1105                 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1106                 {
1107                     return false;
1108                 }
1109 
1110                 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1111                 {
1112                     return false;
1113                 }
1114                 key.clear();
1115             }
1116         }
1117 
1118         return sax->end_object();
1119     }
1120 
1121     /////////////
1122     // MsgPack //
1123     /////////////
1124 
1125     /*!
1126     @return whether a valid MessagePack value was passed to the SAX parser
1127     */
parse_msgpack_internal()1128     bool parse_msgpack_internal()
1129     {
1130         switch (get())
1131         {
1132             // EOF
1133             case std::char_traits<char_type>::eof():
1134                 return unexpect_eof(input_format_t::msgpack, "value");
1135 
1136             // positive fixint
1137             case 0x00:
1138             case 0x01:
1139             case 0x02:
1140             case 0x03:
1141             case 0x04:
1142             case 0x05:
1143             case 0x06:
1144             case 0x07:
1145             case 0x08:
1146             case 0x09:
1147             case 0x0A:
1148             case 0x0B:
1149             case 0x0C:
1150             case 0x0D:
1151             case 0x0E:
1152             case 0x0F:
1153             case 0x10:
1154             case 0x11:
1155             case 0x12:
1156             case 0x13:
1157             case 0x14:
1158             case 0x15:
1159             case 0x16:
1160             case 0x17:
1161             case 0x18:
1162             case 0x19:
1163             case 0x1A:
1164             case 0x1B:
1165             case 0x1C:
1166             case 0x1D:
1167             case 0x1E:
1168             case 0x1F:
1169             case 0x20:
1170             case 0x21:
1171             case 0x22:
1172             case 0x23:
1173             case 0x24:
1174             case 0x25:
1175             case 0x26:
1176             case 0x27:
1177             case 0x28:
1178             case 0x29:
1179             case 0x2A:
1180             case 0x2B:
1181             case 0x2C:
1182             case 0x2D:
1183             case 0x2E:
1184             case 0x2F:
1185             case 0x30:
1186             case 0x31:
1187             case 0x32:
1188             case 0x33:
1189             case 0x34:
1190             case 0x35:
1191             case 0x36:
1192             case 0x37:
1193             case 0x38:
1194             case 0x39:
1195             case 0x3A:
1196             case 0x3B:
1197             case 0x3C:
1198             case 0x3D:
1199             case 0x3E:
1200             case 0x3F:
1201             case 0x40:
1202             case 0x41:
1203             case 0x42:
1204             case 0x43:
1205             case 0x44:
1206             case 0x45:
1207             case 0x46:
1208             case 0x47:
1209             case 0x48:
1210             case 0x49:
1211             case 0x4A:
1212             case 0x4B:
1213             case 0x4C:
1214             case 0x4D:
1215             case 0x4E:
1216             case 0x4F:
1217             case 0x50:
1218             case 0x51:
1219             case 0x52:
1220             case 0x53:
1221             case 0x54:
1222             case 0x55:
1223             case 0x56:
1224             case 0x57:
1225             case 0x58:
1226             case 0x59:
1227             case 0x5A:
1228             case 0x5B:
1229             case 0x5C:
1230             case 0x5D:
1231             case 0x5E:
1232             case 0x5F:
1233             case 0x60:
1234             case 0x61:
1235             case 0x62:
1236             case 0x63:
1237             case 0x64:
1238             case 0x65:
1239             case 0x66:
1240             case 0x67:
1241             case 0x68:
1242             case 0x69:
1243             case 0x6A:
1244             case 0x6B:
1245             case 0x6C:
1246             case 0x6D:
1247             case 0x6E:
1248             case 0x6F:
1249             case 0x70:
1250             case 0x71:
1251             case 0x72:
1252             case 0x73:
1253             case 0x74:
1254             case 0x75:
1255             case 0x76:
1256             case 0x77:
1257             case 0x78:
1258             case 0x79:
1259             case 0x7A:
1260             case 0x7B:
1261             case 0x7C:
1262             case 0x7D:
1263             case 0x7E:
1264             case 0x7F:
1265                 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1266 
1267             // fixmap
1268             case 0x80:
1269             case 0x81:
1270             case 0x82:
1271             case 0x83:
1272             case 0x84:
1273             case 0x85:
1274             case 0x86:
1275             case 0x87:
1276             case 0x88:
1277             case 0x89:
1278             case 0x8A:
1279             case 0x8B:
1280             case 0x8C:
1281             case 0x8D:
1282             case 0x8E:
1283             case 0x8F:
1284                 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1285 
1286             // fixarray
1287             case 0x90:
1288             case 0x91:
1289             case 0x92:
1290             case 0x93:
1291             case 0x94:
1292             case 0x95:
1293             case 0x96:
1294             case 0x97:
1295             case 0x98:
1296             case 0x99:
1297             case 0x9A:
1298             case 0x9B:
1299             case 0x9C:
1300             case 0x9D:
1301             case 0x9E:
1302             case 0x9F:
1303                 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1304 
1305             // fixstr
1306             case 0xA0:
1307             case 0xA1:
1308             case 0xA2:
1309             case 0xA3:
1310             case 0xA4:
1311             case 0xA5:
1312             case 0xA6:
1313             case 0xA7:
1314             case 0xA8:
1315             case 0xA9:
1316             case 0xAA:
1317             case 0xAB:
1318             case 0xAC:
1319             case 0xAD:
1320             case 0xAE:
1321             case 0xAF:
1322             case 0xB0:
1323             case 0xB1:
1324             case 0xB2:
1325             case 0xB3:
1326             case 0xB4:
1327             case 0xB5:
1328             case 0xB6:
1329             case 0xB7:
1330             case 0xB8:
1331             case 0xB9:
1332             case 0xBA:
1333             case 0xBB:
1334             case 0xBC:
1335             case 0xBD:
1336             case 0xBE:
1337             case 0xBF:
1338             case 0xD9: // str 8
1339             case 0xDA: // str 16
1340             case 0xDB: // str 32
1341             {
1342                 string_t s;
1343                 return get_msgpack_string(s) && sax->string(s);
1344             }
1345 
1346             case 0xC0: // nil
1347                 return sax->null();
1348 
1349             case 0xC2: // false
1350                 return sax->boolean(false);
1351 
1352             case 0xC3: // true
1353                 return sax->boolean(true);
1354 
1355             case 0xC4: // bin 8
1356             case 0xC5: // bin 16
1357             case 0xC6: // bin 32
1358             case 0xC7: // ext 8
1359             case 0xC8: // ext 16
1360             case 0xC9: // ext 32
1361             case 0xD4: // fixext 1
1362             case 0xD5: // fixext 2
1363             case 0xD6: // fixext 4
1364             case 0xD7: // fixext 8
1365             case 0xD8: // fixext 16
1366             {
1367                 binary_t b;
1368                 return get_msgpack_binary(b) && sax->binary(b);
1369             }
1370 
1371             case 0xCA: // float 32
1372             {
1373                 float number{};
1374                 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1375             }
1376 
1377             case 0xCB: // float 64
1378             {
1379                 double number{};
1380                 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1381             }
1382 
1383             case 0xCC: // uint 8
1384             {
1385                 std::uint8_t number{};
1386                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1387             }
1388 
1389             case 0xCD: // uint 16
1390             {
1391                 std::uint16_t number{};
1392                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1393             }
1394 
1395             case 0xCE: // uint 32
1396             {
1397                 std::uint32_t number{};
1398                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1399             }
1400 
1401             case 0xCF: // uint 64
1402             {
1403                 std::uint64_t number{};
1404                 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1405             }
1406 
1407             case 0xD0: // int 8
1408             {
1409                 std::int8_t number{};
1410                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1411             }
1412 
1413             case 0xD1: // int 16
1414             {
1415                 std::int16_t number{};
1416                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1417             }
1418 
1419             case 0xD2: // int 32
1420             {
1421                 std::int32_t number{};
1422                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1423             }
1424 
1425             case 0xD3: // int 64
1426             {
1427                 std::int64_t number{};
1428                 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1429             }
1430 
1431             case 0xDC: // array 16
1432             {
1433                 std::uint16_t len{};
1434                 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1435             }
1436 
1437             case 0xDD: // array 32
1438             {
1439                 std::uint32_t len{};
1440                 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1441             }
1442 
1443             case 0xDE: // map 16
1444             {
1445                 std::uint16_t len{};
1446                 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1447             }
1448 
1449             case 0xDF: // map 32
1450             {
1451                 std::uint32_t len{};
1452                 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1453             }
1454 
1455             // negative fixint
1456             case 0xE0:
1457             case 0xE1:
1458             case 0xE2:
1459             case 0xE3:
1460             case 0xE4:
1461             case 0xE5:
1462             case 0xE6:
1463             case 0xE7:
1464             case 0xE8:
1465             case 0xE9:
1466             case 0xEA:
1467             case 0xEB:
1468             case 0xEC:
1469             case 0xED:
1470             case 0xEE:
1471             case 0xEF:
1472             case 0xF0:
1473             case 0xF1:
1474             case 0xF2:
1475             case 0xF3:
1476             case 0xF4:
1477             case 0xF5:
1478             case 0xF6:
1479             case 0xF7:
1480             case 0xF8:
1481             case 0xF9:
1482             case 0xFA:
1483             case 0xFB:
1484             case 0xFC:
1485             case 0xFD:
1486             case 0xFE:
1487             case 0xFF:
1488                 return sax->number_integer(static_cast<std::int8_t>(current));
1489 
1490             default: // anything else
1491             {
1492                 auto last_token = get_token_string();
1493                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
1494             }
1495         }
1496     }
1497 
1498     /*!
1499     @brief reads a MessagePack string
1500 
1501     This function first reads starting bytes to determine the expected
1502     string length and then copies this number of bytes into a string.
1503 
1504     @param[out] result  created string
1505 
1506     @return whether string creation completed
1507     */
get_msgpack_string(string_t & result)1508     bool get_msgpack_string(string_t& result)
1509     {
1510         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1511         {
1512             return false;
1513         }
1514 
1515         switch (current)
1516         {
1517             // fixstr
1518             case 0xA0:
1519             case 0xA1:
1520             case 0xA2:
1521             case 0xA3:
1522             case 0xA4:
1523             case 0xA5:
1524             case 0xA6:
1525             case 0xA7:
1526             case 0xA8:
1527             case 0xA9:
1528             case 0xAA:
1529             case 0xAB:
1530             case 0xAC:
1531             case 0xAD:
1532             case 0xAE:
1533             case 0xAF:
1534             case 0xB0:
1535             case 0xB1:
1536             case 0xB2:
1537             case 0xB3:
1538             case 0xB4:
1539             case 0xB5:
1540             case 0xB6:
1541             case 0xB7:
1542             case 0xB8:
1543             case 0xB9:
1544             case 0xBA:
1545             case 0xBB:
1546             case 0xBC:
1547             case 0xBD:
1548             case 0xBE:
1549             case 0xBF:
1550             {
1551                 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1552             }
1553 
1554             case 0xD9: // str 8
1555             {
1556                 std::uint8_t len{};
1557                 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1558             }
1559 
1560             case 0xDA: // str 16
1561             {
1562                 std::uint16_t len{};
1563                 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1564             }
1565 
1566             case 0xDB: // str 32
1567             {
1568                 std::uint32_t len{};
1569                 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1570             }
1571 
1572             default:
1573             {
1574                 auto last_token = get_token_string();
1575                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string")));
1576             }
1577         }
1578     }
1579 
1580     /*!
1581     @brief reads a MessagePack byte array
1582 
1583     This function first reads starting bytes to determine the expected
1584     byte array length and then copies this number of bytes into a byte array.
1585 
1586     @param[out] result  created byte array
1587 
1588     @return whether byte array creation completed
1589     */
get_msgpack_binary(binary_t & result)1590     bool get_msgpack_binary(binary_t& result)
1591     {
1592         // helper function to set the subtype
1593         auto assign_and_return_true = [&result](std::int8_t subtype)
1594         {
1595             result.set_subtype(static_cast<std::uint8_t>(subtype));
1596             return true;
1597         };
1598 
1599         switch (current)
1600         {
1601             case 0xC4: // bin 8
1602             {
1603                 std::uint8_t len{};
1604                 return get_number(input_format_t::msgpack, len) &&
1605                        get_binary(input_format_t::msgpack, len, result);
1606             }
1607 
1608             case 0xC5: // bin 16
1609             {
1610                 std::uint16_t len{};
1611                 return get_number(input_format_t::msgpack, len) &&
1612                        get_binary(input_format_t::msgpack, len, result);
1613             }
1614 
1615             case 0xC6: // bin 32
1616             {
1617                 std::uint32_t len{};
1618                 return get_number(input_format_t::msgpack, len) &&
1619                        get_binary(input_format_t::msgpack, len, result);
1620             }
1621 
1622             case 0xC7: // ext 8
1623             {
1624                 std::uint8_t len{};
1625                 std::int8_t subtype{};
1626                 return get_number(input_format_t::msgpack, len) &&
1627                        get_number(input_format_t::msgpack, subtype) &&
1628                        get_binary(input_format_t::msgpack, len, result) &&
1629                        assign_and_return_true(subtype);
1630             }
1631 
1632             case 0xC8: // ext 16
1633             {
1634                 std::uint16_t len{};
1635                 std::int8_t subtype{};
1636                 return get_number(input_format_t::msgpack, len) &&
1637                        get_number(input_format_t::msgpack, subtype) &&
1638                        get_binary(input_format_t::msgpack, len, result) &&
1639                        assign_and_return_true(subtype);
1640             }
1641 
1642             case 0xC9: // ext 32
1643             {
1644                 std::uint32_t len{};
1645                 std::int8_t subtype{};
1646                 return get_number(input_format_t::msgpack, len) &&
1647                        get_number(input_format_t::msgpack, subtype) &&
1648                        get_binary(input_format_t::msgpack, len, result) &&
1649                        assign_and_return_true(subtype);
1650             }
1651 
1652             case 0xD4: // fixext 1
1653             {
1654                 std::int8_t subtype{};
1655                 return get_number(input_format_t::msgpack, subtype) &&
1656                        get_binary(input_format_t::msgpack, 1, result) &&
1657                        assign_and_return_true(subtype);
1658             }
1659 
1660             case 0xD5: // fixext 2
1661             {
1662                 std::int8_t subtype{};
1663                 return get_number(input_format_t::msgpack, subtype) &&
1664                        get_binary(input_format_t::msgpack, 2, result) &&
1665                        assign_and_return_true(subtype);
1666             }
1667 
1668             case 0xD6: // fixext 4
1669             {
1670                 std::int8_t subtype{};
1671                 return get_number(input_format_t::msgpack, subtype) &&
1672                        get_binary(input_format_t::msgpack, 4, result) &&
1673                        assign_and_return_true(subtype);
1674             }
1675 
1676             case 0xD7: // fixext 8
1677             {
1678                 std::int8_t subtype{};
1679                 return get_number(input_format_t::msgpack, subtype) &&
1680                        get_binary(input_format_t::msgpack, 8, result) &&
1681                        assign_and_return_true(subtype);
1682             }
1683 
1684             case 0xD8: // fixext 16
1685             {
1686                 std::int8_t subtype{};
1687                 return get_number(input_format_t::msgpack, subtype) &&
1688                        get_binary(input_format_t::msgpack, 16, result) &&
1689                        assign_and_return_true(subtype);
1690             }
1691 
1692             default:           // LCOV_EXCL_LINE
1693                 return false;  // LCOV_EXCL_LINE
1694         }
1695     }
1696 
1697     /*!
1698     @param[in] len  the length of the array
1699     @return whether array creation completed
1700     */
get_msgpack_array(const std::size_t len)1701     bool get_msgpack_array(const std::size_t len)
1702     {
1703         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1704         {
1705             return false;
1706         }
1707 
1708         for (std::size_t i = 0; i < len; ++i)
1709         {
1710             if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1711             {
1712                 return false;
1713             }
1714         }
1715 
1716         return sax->end_array();
1717     }
1718 
1719     /*!
1720     @param[in] len  the length of the object
1721     @return whether object creation completed
1722     */
get_msgpack_object(const std::size_t len)1723     bool get_msgpack_object(const std::size_t len)
1724     {
1725         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1726         {
1727             return false;
1728         }
1729 
1730         string_t key;
1731         for (std::size_t i = 0; i < len; ++i)
1732         {
1733             get();
1734             if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1735             {
1736                 return false;
1737             }
1738 
1739             if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1740             {
1741                 return false;
1742             }
1743             key.clear();
1744         }
1745 
1746         return sax->end_object();
1747     }
1748 
1749     ////////////
1750     // UBJSON //
1751     ////////////
1752 
1753     /*!
1754     @param[in] get_char  whether a new character should be retrieved from the
1755                          input (true, default) or whether the last read
1756                          character should be considered instead
1757 
1758     @return whether a valid UBJSON value was passed to the SAX parser
1759     */
parse_ubjson_internal(const bool get_char=true)1760     bool parse_ubjson_internal(const bool get_char = true)
1761     {
1762         return get_ubjson_value(get_char ? get_ignore_noop() : current);
1763     }
1764 
1765     /*!
1766     @brief reads a UBJSON string
1767 
1768     This function is either called after reading the 'S' byte explicitly
1769     indicating a string, or in case of an object key where the 'S' byte can be
1770     left out.
1771 
1772     @param[out] result   created string
1773     @param[in] get_char  whether a new character should be retrieved from the
1774                          input (true, default) or whether the last read
1775                          character should be considered instead
1776 
1777     @return whether string creation completed
1778     */
get_ubjson_string(string_t & result,const bool get_char=true)1779     bool get_ubjson_string(string_t& result, const bool get_char = true)
1780     {
1781         if (get_char)
1782         {
1783             get();  // TODO(niels): may we ignore N here?
1784         }
1785 
1786         if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1787         {
1788             return false;
1789         }
1790 
1791         switch (current)
1792         {
1793             case 'U':
1794             {
1795                 std::uint8_t len{};
1796                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1797             }
1798 
1799             case 'i':
1800             {
1801                 std::int8_t len{};
1802                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1803             }
1804 
1805             case 'I':
1806             {
1807                 std::int16_t len{};
1808                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1809             }
1810 
1811             case 'l':
1812             {
1813                 std::int32_t len{};
1814                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1815             }
1816 
1817             case 'L':
1818             {
1819                 std::int64_t len{};
1820                 return get_number(input_format_t::ubjson, len) && get_string(input_format_t::ubjson, len, result);
1821             }
1822 
1823             default:
1824                 auto last_token = get_token_string();
1825                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string")));
1826         }
1827     }
1828 
1829     /*!
1830     @param[out] result  determined size
1831     @return whether size determination completed
1832     */
get_ubjson_size_value(std::size_t & result)1833     bool get_ubjson_size_value(std::size_t& result)
1834     {
1835         switch (get_ignore_noop())
1836         {
1837             case 'U':
1838             {
1839                 std::uint8_t number{};
1840                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1841                 {
1842                     return false;
1843                 }
1844                 result = static_cast<std::size_t>(number);
1845                 return true;
1846             }
1847 
1848             case 'i':
1849             {
1850                 std::int8_t number{};
1851                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1852                 {
1853                     return false;
1854                 }
1855                 result = static_cast<std::size_t>(number);
1856                 return true;
1857             }
1858 
1859             case 'I':
1860             {
1861                 std::int16_t number{};
1862                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1863                 {
1864                     return false;
1865                 }
1866                 result = static_cast<std::size_t>(number);
1867                 return true;
1868             }
1869 
1870             case 'l':
1871             {
1872                 std::int32_t number{};
1873                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1874                 {
1875                     return false;
1876                 }
1877                 result = static_cast<std::size_t>(number);
1878                 return true;
1879             }
1880 
1881             case 'L':
1882             {
1883                 std::int64_t number{};
1884                 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format_t::ubjson, number)))
1885                 {
1886                     return false;
1887                 }
1888                 result = static_cast<std::size_t>(number);
1889                 return true;
1890             }
1891 
1892             default:
1893             {
1894                 auto last_token = get_token_string();
1895                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size")));
1896             }
1897         }
1898     }
1899 
1900     /*!
1901     @brief determine the type and size for a container
1902 
1903     In the optimized UBJSON format, a type and a size can be provided to allow
1904     for a more compact representation.
1905 
1906     @param[out] result  pair of the size and the type
1907 
1908     @return whether pair creation completed
1909     */
get_ubjson_size_type(std::pair<std::size_t,char_int_type> & result)1910     bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
1911     {
1912         result.first = string_t::npos; // size
1913         result.second = 0; // type
1914 
1915         get_ignore_noop();
1916 
1917         if (current == '$')
1918         {
1919             result.second = get();  // must not ignore 'N', because 'N' maybe the type
1920             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "type")))
1921             {
1922                 return false;
1923             }
1924 
1925             get_ignore_noop();
1926             if (JSON_HEDLEY_UNLIKELY(current != '#'))
1927             {
1928                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "value")))
1929                 {
1930                     return false;
1931                 }
1932                 auto last_token = get_token_string();
1933                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size")));
1934             }
1935 
1936             return get_ubjson_size_value(result.first);
1937         }
1938 
1939         if (current == '#')
1940         {
1941             return get_ubjson_size_value(result.first);
1942         }
1943 
1944         return true;
1945     }
1946 
1947     /*!
1948     @param prefix  the previously read or set type prefix
1949     @return whether value creation completed
1950     */
get_ubjson_value(const char_int_type prefix)1951     bool get_ubjson_value(const char_int_type prefix)
1952     {
1953         switch (prefix)
1954         {
1955             case std::char_traits<char_type>::eof():  // EOF
1956                 return unexpect_eof(input_format_t::ubjson, "value");
1957 
1958             case 'T':  // true
1959                 return sax->boolean(true);
1960             case 'F':  // false
1961                 return sax->boolean(false);
1962 
1963             case 'Z':  // null
1964                 return sax->null();
1965 
1966             case 'U':
1967             {
1968                 std::uint8_t number{};
1969                 return get_number(input_format_t::ubjson, number) && sax->number_unsigned(number);
1970             }
1971 
1972             case 'i':
1973             {
1974                 std::int8_t number{};
1975                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1976             }
1977 
1978             case 'I':
1979             {
1980                 std::int16_t number{};
1981                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1982             }
1983 
1984             case 'l':
1985             {
1986                 std::int32_t number{};
1987                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1988             }
1989 
1990             case 'L':
1991             {
1992                 std::int64_t number{};
1993                 return get_number(input_format_t::ubjson, number) && sax->number_integer(number);
1994             }
1995 
1996             case 'd':
1997             {
1998                 float number{};
1999                 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2000             }
2001 
2002             case 'D':
2003             {
2004                 double number{};
2005                 return get_number(input_format_t::ubjson, number) && sax->number_float(static_cast<number_float_t>(number), "");
2006             }
2007 
2008             case 'H':
2009             {
2010                 return get_ubjson_high_precision_number();
2011             }
2012 
2013             case 'C':  // char
2014             {
2015                 get();
2016                 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "char")))
2017                 {
2018                     return false;
2019                 }
2020                 if (JSON_HEDLEY_UNLIKELY(current > 127))
2021                 {
2022                     auto last_token = get_token_string();
2023                     return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char")));
2024                 }
2025                 string_t s(1, static_cast<typename string_t::value_type>(current));
2026                 return sax->string(s);
2027             }
2028 
2029             case 'S':  // string
2030             {
2031                 string_t s;
2032                 return get_ubjson_string(s) && sax->string(s);
2033             }
2034 
2035             case '[':  // array
2036                 return get_ubjson_array();
2037 
2038             case '{':  // object
2039                 return get_ubjson_object();
2040 
2041             default: // anything else
2042             {
2043                 auto last_token = get_token_string();
2044                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value")));
2045             }
2046         }
2047     }
2048 
2049     /*!
2050     @return whether array creation completed
2051     */
get_ubjson_array()2052     bool get_ubjson_array()
2053     {
2054         std::pair<std::size_t, char_int_type> size_and_type;
2055         if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2056         {
2057             return false;
2058         }
2059 
2060         if (size_and_type.first != string_t::npos)
2061         {
2062             if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2063             {
2064                 return false;
2065             }
2066 
2067             if (size_and_type.second != 0)
2068             {
2069                 if (size_and_type.second != 'N')
2070                 {
2071                     for (std::size_t i = 0; i < size_and_type.first; ++i)
2072                     {
2073                         if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2074                         {
2075                             return false;
2076                         }
2077                     }
2078                 }
2079             }
2080             else
2081             {
2082                 for (std::size_t i = 0; i < size_and_type.first; ++i)
2083                 {
2084                     if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2085                     {
2086                         return false;
2087                     }
2088                 }
2089             }
2090         }
2091         else
2092         {
2093             if (JSON_HEDLEY_UNLIKELY(!sax->start_array(std::size_t(-1))))
2094             {
2095                 return false;
2096             }
2097 
2098             while (current != ']')
2099             {
2100                 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2101                 {
2102                     return false;
2103                 }
2104                 get_ignore_noop();
2105             }
2106         }
2107 
2108         return sax->end_array();
2109     }
2110 
2111     /*!
2112     @return whether object creation completed
2113     */
get_ubjson_object()2114     bool get_ubjson_object()
2115     {
2116         std::pair<std::size_t, char_int_type> size_and_type;
2117         if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2118         {
2119             return false;
2120         }
2121 
2122         string_t key;
2123         if (size_and_type.first != string_t::npos)
2124         {
2125             if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2126             {
2127                 return false;
2128             }
2129 
2130             if (size_and_type.second != 0)
2131             {
2132                 for (std::size_t i = 0; i < size_and_type.first; ++i)
2133                 {
2134                     if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2135                     {
2136                         return false;
2137                     }
2138                     if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2139                     {
2140                         return false;
2141                     }
2142                     key.clear();
2143                 }
2144             }
2145             else
2146             {
2147                 for (std::size_t i = 0; i < size_and_type.first; ++i)
2148                 {
2149                     if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2150                     {
2151                         return false;
2152                     }
2153                     if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2154                     {
2155                         return false;
2156                     }
2157                     key.clear();
2158                 }
2159             }
2160         }
2161         else
2162         {
2163             if (JSON_HEDLEY_UNLIKELY(!sax->start_object(std::size_t(-1))))
2164             {
2165                 return false;
2166             }
2167 
2168             while (current != '}')
2169             {
2170                 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2171                 {
2172                     return false;
2173                 }
2174                 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2175                 {
2176                     return false;
2177                 }
2178                 get_ignore_noop();
2179                 key.clear();
2180             }
2181         }
2182 
2183         return sax->end_object();
2184     }
2185 
2186     // Note, no reader for UBJSON binary types is implemented because they do
2187     // not exist
2188 
get_ubjson_high_precision_number()2189     bool get_ubjson_high_precision_number()
2190     {
2191         // get size of following number string
2192         std::size_t size{};
2193         auto res = get_ubjson_size_value(size);
2194         if (JSON_HEDLEY_UNLIKELY(!res))
2195         {
2196             return res;
2197         }
2198 
2199         // get number string
2200         std::vector<char> number_vector;
2201         for (std::size_t i = 0; i < size; ++i)
2202         {
2203             get();
2204             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::ubjson, "number")))
2205             {
2206                 return false;
2207             }
2208             number_vector.push_back(static_cast<char>(current));
2209         }
2210 
2211         // parse number string
2212         auto number_ia = detail::input_adapter(std::forward<decltype(number_vector)>(number_vector));
2213         auto number_lexer = detail::lexer<BasicJsonType, decltype(number_ia)>(std::move(number_ia), false);
2214         const auto result_number = number_lexer.scan();
2215         const auto number_string = number_lexer.get_token_string();
2216         const auto result_remainder = number_lexer.scan();
2217 
2218         using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2219 
2220         if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2221         {
2222             return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
2223         }
2224 
2225         switch (result_number)
2226         {
2227             case token_type::value_integer:
2228                 return sax->number_integer(number_lexer.get_number_integer());
2229             case token_type::value_unsigned:
2230                 return sax->number_unsigned(number_lexer.get_number_unsigned());
2231             case token_type::value_float:
2232                 return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2233             default:
2234                 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read, exception_message(input_format_t::ubjson, "invalid number text: " + number_lexer.get_token_string(), "high-precision number")));
2235         }
2236     }
2237 
2238     ///////////////////////
2239     // Utility functions //
2240     ///////////////////////
2241 
2242     /*!
2243     @brief get next character from the input
2244 
2245     This function provides the interface to the used input adapter. It does
2246     not throw in case the input reached EOF, but returns a -'ve valued
2247     `std::char_traits<char_type>::eof()` in that case.
2248 
2249     @return character read from the input
2250     */
get()2251     char_int_type get()
2252     {
2253         ++chars_read;
2254         return current = ia.get_character();
2255     }
2256 
2257     /*!
2258     @return character read from the input after ignoring all 'N' entries
2259     */
get_ignore_noop()2260     char_int_type get_ignore_noop()
2261     {
2262         do
2263         {
2264             get();
2265         }
2266         while (current == 'N');
2267 
2268         return current;
2269     }
2270 
2271     /*
2272     @brief read a number from the input
2273 
2274     @tparam NumberType the type of the number
2275     @param[in] format   the current format (for diagnostics)
2276     @param[out] result  number of type @a NumberType
2277 
2278     @return whether conversion completed
2279 
2280     @note This function needs to respect the system's endianess, because
2281           bytes in CBOR, MessagePack, and UBJSON are stored in network order
2282           (big endian) and therefore need reordering on little endian systems.
2283     */
2284     template<typename NumberType, bool InputIsLittleEndian = false>
get_number(const input_format_t format,NumberType & result)2285     bool get_number(const input_format_t format, NumberType& result)
2286     {
2287         // step 1: read input into array with system's byte order
2288         std::array<std::uint8_t, sizeof(NumberType)> vec;
2289         for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2290         {
2291             get();
2292             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2293             {
2294                 return false;
2295             }
2296 
2297             // reverse byte order prior to conversion if necessary
2298             if (is_little_endian != InputIsLittleEndian)
2299             {
2300                 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2301             }
2302             else
2303             {
2304                 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2305             }
2306         }
2307 
2308         // step 2: convert array into number of type T and return
2309         std::memcpy(&result, vec.data(), sizeof(NumberType));
2310         return true;
2311     }
2312 
2313     /*!
2314     @brief create a string by reading characters from the input
2315 
2316     @tparam NumberType the type of the number
2317     @param[in] format the current format (for diagnostics)
2318     @param[in] len number of characters to read
2319     @param[out] result string created by reading @a len bytes
2320 
2321     @return whether string creation completed
2322 
2323     @note We can not reserve @a len bytes for the result, because @a len
2324           may be too large. Usually, @ref unexpect_eof() detects the end of
2325           the input before we run out of string memory.
2326     */
2327     template<typename NumberType>
get_string(const input_format_t format,const NumberType len,string_t & result)2328     bool get_string(const input_format_t format,
2329                     const NumberType len,
2330                     string_t& result)
2331     {
2332         bool success = true;
2333         for (NumberType i = 0; i < len; i++)
2334         {
2335             get();
2336             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2337             {
2338                 success = false;
2339                 break;
2340             }
2341             result.push_back(static_cast<typename string_t::value_type>(current));
2342         };
2343         return success;
2344     }
2345 
2346     /*!
2347     @brief create a byte array by reading bytes from the input
2348 
2349     @tparam NumberType the type of the number
2350     @param[in] format the current format (for diagnostics)
2351     @param[in] len number of bytes to read
2352     @param[out] result byte array created by reading @a len bytes
2353 
2354     @return whether byte array creation completed
2355 
2356     @note We can not reserve @a len bytes for the result, because @a len
2357           may be too large. Usually, @ref unexpect_eof() detects the end of
2358           the input before we run out of memory.
2359     */
2360     template<typename NumberType>
get_binary(const input_format_t format,const NumberType len,binary_t & result)2361     bool get_binary(const input_format_t format,
2362                     const NumberType len,
2363                     binary_t& result)
2364     {
2365         bool success = true;
2366         for (NumberType i = 0; i < len; i++)
2367         {
2368             get();
2369             if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2370             {
2371                 success = false;
2372                 break;
2373             }
2374             result.push_back(static_cast<std::uint8_t>(current));
2375         }
2376         return success;
2377     }
2378 
2379     /*!
2380     @param[in] format   the current format (for diagnostics)
2381     @param[in] context  further context information (for diagnostics)
2382     @return whether the last read character is not EOF
2383     */
2384     JSON_HEDLEY_NON_NULL(3)
unexpect_eof(const input_format_t format,const char * context) const2385     bool unexpect_eof(const input_format_t format, const char* context) const
2386     {
2387         if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2388         {
2389             return sax->parse_error(chars_read, "<end of file>",
2390                                     parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context)));
2391         }
2392         return true;
2393     }
2394 
2395     /*!
2396     @return a string representation of the last read byte
2397     */
get_token_string() const2398     std::string get_token_string() const
2399     {
2400         std::array<char, 3> cr{{}};
2401         (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current));
2402         return std::string{cr.data()};
2403     }
2404 
2405     /*!
2406     @param[in] format   the current format
2407     @param[in] detail   a detailed error message
2408     @param[in] context  further context information
2409     @return a message string to use in the parse_error exceptions
2410     */
exception_message(const input_format_t format,const std::string & detail,const std::string & context) const2411     std::string exception_message(const input_format_t format,
2412                                   const std::string& detail,
2413                                   const std::string& context) const
2414     {
2415         std::string error_msg = "syntax error while parsing ";
2416 
2417         switch (format)
2418         {
2419             case input_format_t::cbor:
2420                 error_msg += "CBOR";
2421                 break;
2422 
2423             case input_format_t::msgpack:
2424                 error_msg += "MessagePack";
2425                 break;
2426 
2427             case input_format_t::ubjson:
2428                 error_msg += "UBJSON";
2429                 break;
2430 
2431             case input_format_t::bson:
2432                 error_msg += "BSON";
2433                 break;
2434 
2435             default:            // LCOV_EXCL_LINE
2436                 JSON_ASSERT(false);  // LCOV_EXCL_LINE
2437         }
2438 
2439         return error_msg + " " + context + ": " + detail;
2440     }
2441 
2442   private:
2443     /// input adapter
2444     InputAdapterType ia;
2445 
2446     /// the current character
2447     char_int_type current = std::char_traits<char_type>::eof();
2448 
2449     /// the number of characters read
2450     std::size_t chars_read = 0;
2451 
2452     /// whether we can assume little endianess
2453     const bool is_little_endian = little_endianess();
2454 
2455     /// the SAX parser
2456     json_sax_t* sax = nullptr;
2457 };
2458 }  // namespace detail
2459 }  // namespace nlohmann
2460