1 #pragma once
2 
3 #include <algorithm> // generate_n
4 #include <array> // array
5 #include <cassert> // assert
6 #include <cmath> // ldexp
7 #include <cstddef> // size_t
8 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
9 #include <cstdio> // snprintf
10 #include <cstring> // memcpy
11 #include <iterator> // back_inserter
12 #include <limits> // numeric_limits
13 #include <string> // char_traits, string
14 #include <utility> // make_pair, move
15 
16 #include <nlohmann/detail/exceptions.hpp>
17 #include <nlohmann/detail/input/input_adapters.hpp>
18 #include <nlohmann/detail/input/json_sax.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 #include <nlohmann/detail/meta/is_sax.hpp>
21 #include <nlohmann/detail/value_t.hpp>
22 
23 namespace nlohmann
24 {
25 namespace detail
26 {
27 ///////////////////
28 // binary reader //
29 ///////////////////
30 
31 /*!
32 @brief deserialization of CBOR, MessagePack, and UBJSON values
33 */
34 template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>>
35 class binary_reader
36 {
37     using number_integer_t = typename BasicJsonType::number_integer_t;
38     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
39     using number_float_t = typename BasicJsonType::number_float_t;
40     using string_t = typename BasicJsonType::string_t;
41     using json_sax_t = SAX;
42 
43   public:
44     /*!
45     @brief create a binary reader
46 
47     @param[in] adapter  input adapter to read from
48     */
binary_reader(input_adapter_t adapter)49     explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter))
50     {
51         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
52         assert(ia);
53     }
54 
55     // make class move-only
56     binary_reader(const binary_reader&) = delete;
57     binary_reader(binary_reader&&) = default;
58     binary_reader& operator=(const binary_reader&) = delete;
59     binary_reader& operator=(binary_reader&&) = default;
60     ~binary_reader() = default;
61 
62     /*!
63     @param[in] format  the binary format to parse
64     @param[in] sax_    a SAX event processor
65     @param[in] strict  whether to expect the input to be consumed completed
66 
67     @return
68     */
sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true)69     bool sax_parse(const input_format_t format,
70                    json_sax_t* sax_,
71                    const bool strict = true)
72     {
73         sax = sax_;
74         bool result = false;
75 
76         switch (format)
77         {
78             case input_format_t::bson:
79                 result = parse_bson_internal();
80                 break;
81 
82             case input_format_t::cbor:
83                 result = parse_cbor_internal();
84                 break;
85 
86             case input_format_t::msgpack:
87                 result = parse_msgpack_internal();
88                 break;
89 
90             case input_format_t::ubjson:
91                 result = parse_ubjson_internal();
92                 break;
93 
94             default:            // LCOV_EXCL_LINE
95                 assert(false);  // LCOV_EXCL_LINE
96         }
97 
98         // strict mode: next byte must be EOF
99         if (result and strict)
100         {
101             if (format == input_format_t::ubjson)
102             {
103                 get_ignore_noop();
104             }
105             else
106             {
107                 get();
108             }
109 
110             if (JSON_UNLIKELY(current != std::char_traits<char>::eof()))
111             {
112                 return sax->parse_error(chars_read, get_token_string(),
113                                         parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value")));
114             }
115         }
116 
117         return result;
118     }
119 
120     /*!
121     @brief determine system byte order
122 
123     @return true if and only if system's byte order is little endian
124 
125     @note from http://stackoverflow.com/a/1001328/266378
126     */
little_endianess(int num=1)127     static constexpr bool little_endianess(int num = 1) noexcept
128     {
129         return *reinterpret_cast<char*>(&num) == 1;
130     }
131 
132   private:
133     //////////
134     // BSON //
135     //////////
136 
137     /*!
138     @brief Reads in a BSON-object and passes it to the SAX-parser.
139     @return whether a valid BSON-value was passed to the SAX parser
140     */
parse_bson_internal()141     bool parse_bson_internal()
142     {
143         std::int32_t document_size;
144         get_number<std::int32_t, true>(input_format_t::bson, document_size);
145 
146         if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
147         {
148             return false;
149         }
150 
151         if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false)))
152         {
153             return false;
154         }
155 
156         return sax->end_object();
157     }
158 
159     /*!
160     @brief Parses a C-style string from the BSON input.
161     @param[in, out] result  A reference to the string variable where the read
162                             string is to be stored.
163     @return `true` if the \x00-byte indicating the end of the string was
164              encountered before the EOF; false` indicates an unexpected EOF.
165     */
get_bson_cstr(string_t & result)166     bool get_bson_cstr(string_t& result)
167     {
168         auto out = std::back_inserter(result);
169         while (true)
170         {
171             get();
172             if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring")))
173             {
174                 return false;
175             }
176             if (current == 0x00)
177             {
178                 return true;
179             }
180             *out++ = static_cast<char>(current);
181         }
182 
183         return true;
184     }
185 
186     /*!
187     @brief Parses a zero-terminated string of length @a len from the BSON
188            input.
189     @param[in] len  The length (including the zero-byte at the end) of the
190                     string to be read.
191     @param[in, out] result  A reference to the string variable where the read
192                             string is to be stored.
193     @tparam NumberType The type of the length @a len
194     @pre len >= 1
195     @return `true` if the string was successfully parsed
196     */
197     template<typename NumberType>
get_bson_string(const NumberType len,string_t & result)198     bool get_bson_string(const NumberType len, string_t& result)
199     {
200         if (JSON_UNLIKELY(len < 1))
201         {
202             auto last_token = get_token_string();
203             return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string")));
204         }
205 
206         return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof();
207     }
208 
209     /*!
210     @brief Read a BSON document element of the given @a element_type.
211     @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
212     @param[in] element_type_parse_position The position in the input stream,
213                where the `element_type` was read.
214     @warning Not all BSON element types are supported yet. An unsupported
215              @a element_type will give rise to a parse_error.114:
216              Unsupported BSON record type 0x...
217     @return whether a valid BSON-object/array was passed to the SAX parser
218     */
parse_bson_element_internal(const int element_type,const std::size_t element_type_parse_position)219     bool parse_bson_element_internal(const int element_type,
220                                      const std::size_t element_type_parse_position)
221     {
222         switch (element_type)
223         {
224             case 0x01: // double
225             {
226                 double number;
227                 return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), "");
228             }
229 
230             case 0x02: // string
231             {
232                 std::int32_t len;
233                 string_t value;
234                 return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
235             }
236 
237             case 0x03: // object
238             {
239                 return parse_bson_internal();
240             }
241 
242             case 0x04: // array
243             {
244                 return parse_bson_array();
245             }
246 
247             case 0x08: // boolean
248             {
249                 return sax->boolean(get() != 0);
250             }
251 
252             case 0x0A: // null
253             {
254                 return sax->null();
255             }
256 
257             case 0x10: // int32
258             {
259                 std::int32_t value;
260                 return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value);
261             }
262 
263             case 0x12: // int64
264             {
265                 std::int64_t value;
266                 return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value);
267             }
268 
269             default: // anything else not supported (yet)
270             {
271                 std::array<char, 3> cr{{}};
272                 (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type));
273                 return sax->parse_error(element_type_parse_position, std::string(cr.data()), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr.data())));
274             }
275         }
276     }
277 
278     /*!
279     @brief Read a BSON element list (as specified in the BSON-spec)
280 
281     The same binary layout is used for objects and arrays, hence it must be
282     indicated with the argument @a is_array which one is expected
283     (true --> array, false --> object).
284 
285     @param[in] is_array Determines if the element list being read is to be
286                         treated as an object (@a is_array == false), or as an
287                         array (@a is_array == true).
288     @return whether a valid BSON-object/array was passed to the SAX parser
289     */
parse_bson_element_list(const bool is_array)290     bool parse_bson_element_list(const bool is_array)
291     {
292         string_t key;
293         while (int element_type = get())
294         {
295             if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list")))
296             {
297                 return false;
298             }
299 
300             const std::size_t element_type_parse_position = chars_read;
301             if (JSON_UNLIKELY(not get_bson_cstr(key)))
302             {
303                 return false;
304             }
305 
306             if (not is_array and not sax->key(key))
307             {
308                 return false;
309             }
310 
311             if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position)))
312             {
313                 return false;
314             }
315 
316             // get_bson_cstr only appends
317             key.clear();
318         }
319 
320         return true;
321     }
322 
323     /*!
324     @brief Reads an array from the BSON input and passes it to the SAX-parser.
325     @return whether a valid BSON-array was passed to the SAX parser
326     */
parse_bson_array()327     bool parse_bson_array()
328     {
329         std::int32_t document_size;
330         get_number<std::int32_t, true>(input_format_t::bson, document_size);
331 
332         if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
333         {
334             return false;
335         }
336 
337         if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true)))
338         {
339             return false;
340         }
341 
342         return sax->end_array();
343     }
344 
345     //////////
346     // CBOR //
347     //////////
348 
349     /*!
350     @param[in] get_char  whether a new character should be retrieved from the
351                          input (true, default) or whether the last read
352                          character should be considered instead
353 
354     @return whether a valid CBOR value was passed to the SAX parser
355     */
parse_cbor_internal(const bool get_char=true)356     bool parse_cbor_internal(const bool get_char = true)
357     {
358         switch (get_char ? get() : current)
359         {
360             // EOF
361             case std::char_traits<char>::eof():
362                 return unexpect_eof(input_format_t::cbor, "value");
363 
364             // Integer 0x00..0x17 (0..23)
365             case 0x00:
366             case 0x01:
367             case 0x02:
368             case 0x03:
369             case 0x04:
370             case 0x05:
371             case 0x06:
372             case 0x07:
373             case 0x08:
374             case 0x09:
375             case 0x0A:
376             case 0x0B:
377             case 0x0C:
378             case 0x0D:
379             case 0x0E:
380             case 0x0F:
381             case 0x10:
382             case 0x11:
383             case 0x12:
384             case 0x13:
385             case 0x14:
386             case 0x15:
387             case 0x16:
388             case 0x17:
389                 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
390 
391             case 0x18: // Unsigned integer (one-byte uint8_t follows)
392             {
393                 std::uint8_t number;
394                 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
395             }
396 
397             case 0x19: // Unsigned integer (two-byte uint16_t follows)
398             {
399                 std::uint16_t number;
400                 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
401             }
402 
403             case 0x1A: // Unsigned integer (four-byte uint32_t follows)
404             {
405                 std::uint32_t number;
406                 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
407             }
408 
409             case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
410             {
411                 std::uint64_t number;
412                 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
413             }
414 
415             // Negative integer -1-0x00..-1-0x17 (-1..-24)
416             case 0x20:
417             case 0x21:
418             case 0x22:
419             case 0x23:
420             case 0x24:
421             case 0x25:
422             case 0x26:
423             case 0x27:
424             case 0x28:
425             case 0x29:
426             case 0x2A:
427             case 0x2B:
428             case 0x2C:
429             case 0x2D:
430             case 0x2E:
431             case 0x2F:
432             case 0x30:
433             case 0x31:
434             case 0x32:
435             case 0x33:
436             case 0x34:
437             case 0x35:
438             case 0x36:
439             case 0x37:
440                 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
441 
442             case 0x38: // Negative integer (one-byte uint8_t follows)
443             {
444                 std::uint8_t number;
445                 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number);
446             }
447 
448             case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
449             {
450                 std::uint16_t number;
451                 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number);
452             }
453 
454             case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
455             {
456                 std::uint32_t number;
457                 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number);
458             }
459 
460             case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
461             {
462                 std::uint64_t number;
463                 return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1)
464                         - static_cast<number_integer_t>(number));
465             }
466 
467             // UTF-8 string (0x00..0x17 bytes follow)
468             case 0x60:
469             case 0x61:
470             case 0x62:
471             case 0x63:
472             case 0x64:
473             case 0x65:
474             case 0x66:
475             case 0x67:
476             case 0x68:
477             case 0x69:
478             case 0x6A:
479             case 0x6B:
480             case 0x6C:
481             case 0x6D:
482             case 0x6E:
483             case 0x6F:
484             case 0x70:
485             case 0x71:
486             case 0x72:
487             case 0x73:
488             case 0x74:
489             case 0x75:
490             case 0x76:
491             case 0x77:
492             case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
493             case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
494             case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
495             case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
496             case 0x7F: // UTF-8 string (indefinite length)
497             {
498                 string_t s;
499                 return get_cbor_string(s) and sax->string(s);
500             }
501 
502             // array (0x00..0x17 data items follow)
503             case 0x80:
504             case 0x81:
505             case 0x82:
506             case 0x83:
507             case 0x84:
508             case 0x85:
509             case 0x86:
510             case 0x87:
511             case 0x88:
512             case 0x89:
513             case 0x8A:
514             case 0x8B:
515             case 0x8C:
516             case 0x8D:
517             case 0x8E:
518             case 0x8F:
519             case 0x90:
520             case 0x91:
521             case 0x92:
522             case 0x93:
523             case 0x94:
524             case 0x95:
525             case 0x96:
526             case 0x97:
527                 return get_cbor_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu));
528 
529             case 0x98: // array (one-byte uint8_t for n follows)
530             {
531                 std::uint8_t len;
532                 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
533             }
534 
535             case 0x99: // array (two-byte uint16_t for n follow)
536             {
537                 std::uint16_t len;
538                 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
539             }
540 
541             case 0x9A: // array (four-byte uint32_t for n follow)
542             {
543                 std::uint32_t len;
544                 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
545             }
546 
547             case 0x9B: // array (eight-byte uint64_t for n follow)
548             {
549                 std::uint64_t len;
550                 return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len));
551             }
552 
553             case 0x9F: // array (indefinite length)
554                 return get_cbor_array(std::size_t(-1));
555 
556             // map (0x00..0x17 pairs of data items follow)
557             case 0xA0:
558             case 0xA1:
559             case 0xA2:
560             case 0xA3:
561             case 0xA4:
562             case 0xA5:
563             case 0xA6:
564             case 0xA7:
565             case 0xA8:
566             case 0xA9:
567             case 0xAA:
568             case 0xAB:
569             case 0xAC:
570             case 0xAD:
571             case 0xAE:
572             case 0xAF:
573             case 0xB0:
574             case 0xB1:
575             case 0xB2:
576             case 0xB3:
577             case 0xB4:
578             case 0xB5:
579             case 0xB6:
580             case 0xB7:
581                 return get_cbor_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu));
582 
583             case 0xB8: // map (one-byte uint8_t for n follows)
584             {
585                 std::uint8_t len;
586                 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
587             }
588 
589             case 0xB9: // map (two-byte uint16_t for n follow)
590             {
591                 std::uint16_t len;
592                 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
593             }
594 
595             case 0xBA: // map (four-byte uint32_t for n follow)
596             {
597                 std::uint32_t len;
598                 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
599             }
600 
601             case 0xBB: // map (eight-byte uint64_t for n follow)
602             {
603                 std::uint64_t len;
604                 return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len));
605             }
606 
607             case 0xBF: // map (indefinite length)
608                 return get_cbor_object(std::size_t(-1));
609 
610             case 0xF4: // false
611                 return sax->boolean(false);
612 
613             case 0xF5: // true
614                 return sax->boolean(true);
615 
616             case 0xF6: // null
617                 return sax->null();
618 
619             case 0xF9: // Half-Precision Float (two-byte IEEE 754)
620             {
621                 const int byte1_raw = get();
622                 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number")))
623                 {
624                     return false;
625                 }
626                 const int byte2_raw = get();
627                 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number")))
628                 {
629                     return false;
630                 }
631 
632                 const auto byte1 = static_cast<unsigned char>(byte1_raw);
633                 const auto byte2 = static_cast<unsigned char>(byte2_raw);
634 
635                 // code from RFC 7049, Appendix D, Figure 3:
636                 // As half-precision floating-point numbers were only added
637                 // to IEEE 754 in 2008, today's programming platforms often
638                 // still only have limited support for them. It is very
639                 // easy to include at least decoding support for them even
640                 // without such support. An example of a small decoder for
641                 // half-precision floating-point numbers in the C language
642                 // is shown in Fig. 3.
643                 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
644                 const double val = [&half]
645                 {
646                     const int exp = (half >> 10u) & 0x1Fu;
647                     const unsigned int mant = half & 0x3FFu;
648                     assert(0 <= exp and exp <= 32);
649                     assert(0 <= mant and mant <= 1024);
650                     switch (exp)
651                     {
652                         case 0:
653                             return std::ldexp(mant, -24);
654                         case 31:
655                             return (mant == 0)
656                             ? std::numeric_limits<double>::infinity()
657                             : std::numeric_limits<double>::quiet_NaN();
658                         default:
659                             return std::ldexp(mant + 1024, exp - 25);
660                     }
661                 }();
662                 return sax->number_float((half & 0x8000u) != 0
663                                          ? static_cast<number_float_t>(-val)
664                                          : static_cast<number_float_t>(val), "");
665             }
666 
667             case 0xFA: // Single-Precision Float (four-byte IEEE 754)
668             {
669                 float number;
670                 return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), "");
671             }
672 
673             case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
674             {
675                 double number;
676                 return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), "");
677             }
678 
679             default: // anything else (0xFF is handled inside the other types)
680             {
681                 auto last_token = get_token_string();
682                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value")));
683             }
684         }
685     }
686 
687     /*!
688     @brief reads a CBOR string
689 
690     This function first reads starting bytes to determine the expected
691     string length and then copies this number of bytes into a string.
692     Additionally, CBOR's strings with indefinite lengths are supported.
693 
694     @param[out] result  created string
695 
696     @return whether string creation completed
697     */
get_cbor_string(string_t & result)698     bool get_cbor_string(string_t& result)
699     {
700         if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string")))
701         {
702             return false;
703         }
704 
705         switch (current)
706         {
707             // UTF-8 string (0x00..0x17 bytes follow)
708             case 0x60:
709             case 0x61:
710             case 0x62:
711             case 0x63:
712             case 0x64:
713             case 0x65:
714             case 0x66:
715             case 0x67:
716             case 0x68:
717             case 0x69:
718             case 0x6A:
719             case 0x6B:
720             case 0x6C:
721             case 0x6D:
722             case 0x6E:
723             case 0x6F:
724             case 0x70:
725             case 0x71:
726             case 0x72:
727             case 0x73:
728             case 0x74:
729             case 0x75:
730             case 0x76:
731             case 0x77:
732             {
733                 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
734             }
735 
736             case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
737             {
738                 std::uint8_t len;
739                 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
740             }
741 
742             case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
743             {
744                 std::uint16_t len;
745                 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
746             }
747 
748             case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
749             {
750                 std::uint32_t len;
751                 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
752             }
753 
754             case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
755             {
756                 std::uint64_t len;
757                 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
758             }
759 
760             case 0x7F: // UTF-8 string (indefinite length)
761             {
762                 while (get() != 0xFF)
763                 {
764                     string_t chunk;
765                     if (not get_cbor_string(chunk))
766                     {
767                         return false;
768                     }
769                     result.append(chunk);
770                 }
771                 return true;
772             }
773 
774             default:
775             {
776                 auto last_token = get_token_string();
777                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string")));
778             }
779         }
780     }
781 
782     /*!
783     @param[in] len  the length of the array or std::size_t(-1) for an
784                     array of indefinite size
785     @return whether array creation completed
786     */
get_cbor_array(const std::size_t len)787     bool get_cbor_array(const std::size_t len)
788     {
789         if (JSON_UNLIKELY(not sax->start_array(len)))
790         {
791             return false;
792         }
793 
794         if (len != std::size_t(-1))
795         {
796             for (std::size_t i = 0; i < len; ++i)
797             {
798                 if (JSON_UNLIKELY(not parse_cbor_internal()))
799                 {
800                     return false;
801                 }
802             }
803         }
804         else
805         {
806             while (get() != 0xFF)
807             {
808                 if (JSON_UNLIKELY(not parse_cbor_internal(false)))
809                 {
810                     return false;
811                 }
812             }
813         }
814 
815         return sax->end_array();
816     }
817 
818     /*!
819     @param[in] len  the length of the object or std::size_t(-1) for an
820                     object of indefinite size
821     @return whether object creation completed
822     */
get_cbor_object(const std::size_t len)823     bool get_cbor_object(const std::size_t len)
824     {
825         if (JSON_UNLIKELY(not sax->start_object(len)))
826         {
827             return false;
828         }
829 
830         string_t key;
831         if (len != std::size_t(-1))
832         {
833             for (std::size_t i = 0; i < len; ++i)
834             {
835                 get();
836                 if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
837                 {
838                     return false;
839                 }
840 
841                 if (JSON_UNLIKELY(not parse_cbor_internal()))
842                 {
843                     return false;
844                 }
845                 key.clear();
846             }
847         }
848         else
849         {
850             while (get() != 0xFF)
851             {
852                 if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
853                 {
854                     return false;
855                 }
856 
857                 if (JSON_UNLIKELY(not parse_cbor_internal()))
858                 {
859                     return false;
860                 }
861                 key.clear();
862             }
863         }
864 
865         return sax->end_object();
866     }
867 
868     /////////////
869     // MsgPack //
870     /////////////
871 
872     /*!
873     @return whether a valid MessagePack value was passed to the SAX parser
874     */
parse_msgpack_internal()875     bool parse_msgpack_internal()
876     {
877         switch (get())
878         {
879             // EOF
880             case std::char_traits<char>::eof():
881                 return unexpect_eof(input_format_t::msgpack, "value");
882 
883             // positive fixint
884             case 0x00:
885             case 0x01:
886             case 0x02:
887             case 0x03:
888             case 0x04:
889             case 0x05:
890             case 0x06:
891             case 0x07:
892             case 0x08:
893             case 0x09:
894             case 0x0A:
895             case 0x0B:
896             case 0x0C:
897             case 0x0D:
898             case 0x0E:
899             case 0x0F:
900             case 0x10:
901             case 0x11:
902             case 0x12:
903             case 0x13:
904             case 0x14:
905             case 0x15:
906             case 0x16:
907             case 0x17:
908             case 0x18:
909             case 0x19:
910             case 0x1A:
911             case 0x1B:
912             case 0x1C:
913             case 0x1D:
914             case 0x1E:
915             case 0x1F:
916             case 0x20:
917             case 0x21:
918             case 0x22:
919             case 0x23:
920             case 0x24:
921             case 0x25:
922             case 0x26:
923             case 0x27:
924             case 0x28:
925             case 0x29:
926             case 0x2A:
927             case 0x2B:
928             case 0x2C:
929             case 0x2D:
930             case 0x2E:
931             case 0x2F:
932             case 0x30:
933             case 0x31:
934             case 0x32:
935             case 0x33:
936             case 0x34:
937             case 0x35:
938             case 0x36:
939             case 0x37:
940             case 0x38:
941             case 0x39:
942             case 0x3A:
943             case 0x3B:
944             case 0x3C:
945             case 0x3D:
946             case 0x3E:
947             case 0x3F:
948             case 0x40:
949             case 0x41:
950             case 0x42:
951             case 0x43:
952             case 0x44:
953             case 0x45:
954             case 0x46:
955             case 0x47:
956             case 0x48:
957             case 0x49:
958             case 0x4A:
959             case 0x4B:
960             case 0x4C:
961             case 0x4D:
962             case 0x4E:
963             case 0x4F:
964             case 0x50:
965             case 0x51:
966             case 0x52:
967             case 0x53:
968             case 0x54:
969             case 0x55:
970             case 0x56:
971             case 0x57:
972             case 0x58:
973             case 0x59:
974             case 0x5A:
975             case 0x5B:
976             case 0x5C:
977             case 0x5D:
978             case 0x5E:
979             case 0x5F:
980             case 0x60:
981             case 0x61:
982             case 0x62:
983             case 0x63:
984             case 0x64:
985             case 0x65:
986             case 0x66:
987             case 0x67:
988             case 0x68:
989             case 0x69:
990             case 0x6A:
991             case 0x6B:
992             case 0x6C:
993             case 0x6D:
994             case 0x6E:
995             case 0x6F:
996             case 0x70:
997             case 0x71:
998             case 0x72:
999             case 0x73:
1000             case 0x74:
1001             case 0x75:
1002             case 0x76:
1003             case 0x77:
1004             case 0x78:
1005             case 0x79:
1006             case 0x7A:
1007             case 0x7B:
1008             case 0x7C:
1009             case 0x7D:
1010             case 0x7E:
1011             case 0x7F:
1012                 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1013 
1014             // fixmap
1015             case 0x80:
1016             case 0x81:
1017             case 0x82:
1018             case 0x83:
1019             case 0x84:
1020             case 0x85:
1021             case 0x86:
1022             case 0x87:
1023             case 0x88:
1024             case 0x89:
1025             case 0x8A:
1026             case 0x8B:
1027             case 0x8C:
1028             case 0x8D:
1029             case 0x8E:
1030             case 0x8F:
1031                 return get_msgpack_object(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1032 
1033             // fixarray
1034             case 0x90:
1035             case 0x91:
1036             case 0x92:
1037             case 0x93:
1038             case 0x94:
1039             case 0x95:
1040             case 0x96:
1041             case 0x97:
1042             case 0x98:
1043             case 0x99:
1044             case 0x9A:
1045             case 0x9B:
1046             case 0x9C:
1047             case 0x9D:
1048             case 0x9E:
1049             case 0x9F:
1050                 return get_msgpack_array(static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1051 
1052             // fixstr
1053             case 0xA0:
1054             case 0xA1:
1055             case 0xA2:
1056             case 0xA3:
1057             case 0xA4:
1058             case 0xA5:
1059             case 0xA6:
1060             case 0xA7:
1061             case 0xA8:
1062             case 0xA9:
1063             case 0xAA:
1064             case 0xAB:
1065             case 0xAC:
1066             case 0xAD:
1067             case 0xAE:
1068             case 0xAF:
1069             case 0xB0:
1070             case 0xB1:
1071             case 0xB2:
1072             case 0xB3:
1073             case 0xB4:
1074             case 0xB5:
1075             case 0xB6:
1076             case 0xB7:
1077             case 0xB8:
1078             case 0xB9:
1079             case 0xBA:
1080             case 0xBB:
1081             case 0xBC:
1082             case 0xBD:
1083             case 0xBE:
1084             case 0xBF:
1085             {
1086                 string_t s;
1087                 return get_msgpack_string(s) and sax->string(s);
1088             }
1089 
1090             case 0xC0: // nil
1091                 return sax->null();
1092 
1093             case 0xC2: // false
1094                 return sax->boolean(false);
1095 
1096             case 0xC3: // true
1097                 return sax->boolean(true);
1098 
1099             case 0xCA: // float 32
1100             {
1101                 float number;
1102                 return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
1103             }
1104 
1105             case 0xCB: // float 64
1106             {
1107                 double number;
1108                 return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), "");
1109             }
1110 
1111             case 0xCC: // uint 8
1112             {
1113                 std::uint8_t number;
1114                 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1115             }
1116 
1117             case 0xCD: // uint 16
1118             {
1119                 std::uint16_t number;
1120                 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1121             }
1122 
1123             case 0xCE: // uint 32
1124             {
1125                 std::uint32_t number;
1126                 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1127             }
1128 
1129             case 0xCF: // uint 64
1130             {
1131                 std::uint64_t number;
1132                 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1133             }
1134 
1135             case 0xD0: // int 8
1136             {
1137                 std::int8_t number;
1138                 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1139             }
1140 
1141             case 0xD1: // int 16
1142             {
1143                 std::int16_t number;
1144                 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1145             }
1146 
1147             case 0xD2: // int 32
1148             {
1149                 std::int32_t number;
1150                 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1151             }
1152 
1153             case 0xD3: // int 64
1154             {
1155                 std::int64_t number;
1156                 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1157             }
1158 
1159             case 0xD9: // str 8
1160             case 0xDA: // str 16
1161             case 0xDB: // str 32
1162             {
1163                 string_t s;
1164                 return get_msgpack_string(s) and sax->string(s);
1165             }
1166 
1167             case 0xDC: // array 16
1168             {
1169                 std::uint16_t len;
1170                 return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
1171             }
1172 
1173             case 0xDD: // array 32
1174             {
1175                 std::uint32_t len;
1176                 return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len));
1177             }
1178 
1179             case 0xDE: // map 16
1180             {
1181                 std::uint16_t len;
1182                 return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
1183             }
1184 
1185             case 0xDF: // map 32
1186             {
1187                 std::uint32_t len;
1188                 return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len));
1189             }
1190 
1191             // negative fixint
1192             case 0xE0:
1193             case 0xE1:
1194             case 0xE2:
1195             case 0xE3:
1196             case 0xE4:
1197             case 0xE5:
1198             case 0xE6:
1199             case 0xE7:
1200             case 0xE8:
1201             case 0xE9:
1202             case 0xEA:
1203             case 0xEB:
1204             case 0xEC:
1205             case 0xED:
1206             case 0xEE:
1207             case 0xEF:
1208             case 0xF0:
1209             case 0xF1:
1210             case 0xF2:
1211             case 0xF3:
1212             case 0xF4:
1213             case 0xF5:
1214             case 0xF6:
1215             case 0xF7:
1216             case 0xF8:
1217             case 0xF9:
1218             case 0xFA:
1219             case 0xFB:
1220             case 0xFC:
1221             case 0xFD:
1222             case 0xFE:
1223             case 0xFF:
1224                 return sax->number_integer(static_cast<std::int8_t>(current));
1225 
1226             default: // anything else
1227             {
1228                 auto last_token = get_token_string();
1229                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value")));
1230             }
1231         }
1232     }
1233 
1234     /*!
1235     @brief reads a MessagePack string
1236 
1237     This function first reads starting bytes to determine the expected
1238     string length and then copies this number of bytes into a string.
1239 
1240     @param[out] result  created string
1241 
1242     @return whether string creation completed
1243     */
get_msgpack_string(string_t & result)1244     bool get_msgpack_string(string_t& result)
1245     {
1246         if (JSON_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string")))
1247         {
1248             return false;
1249         }
1250 
1251         switch (current)
1252         {
1253             // fixstr
1254             case 0xA0:
1255             case 0xA1:
1256             case 0xA2:
1257             case 0xA3:
1258             case 0xA4:
1259             case 0xA5:
1260             case 0xA6:
1261             case 0xA7:
1262             case 0xA8:
1263             case 0xA9:
1264             case 0xAA:
1265             case 0xAB:
1266             case 0xAC:
1267             case 0xAD:
1268             case 0xAE:
1269             case 0xAF:
1270             case 0xB0:
1271             case 0xB1:
1272             case 0xB2:
1273             case 0xB3:
1274             case 0xB4:
1275             case 0xB5:
1276             case 0xB6:
1277             case 0xB7:
1278             case 0xB8:
1279             case 0xB9:
1280             case 0xBA:
1281             case 0xBB:
1282             case 0xBC:
1283             case 0xBD:
1284             case 0xBE:
1285             case 0xBF:
1286             {
1287                 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1288             }
1289 
1290             case 0xD9: // str 8
1291             {
1292                 std::uint8_t len;
1293                 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1294             }
1295 
1296             case 0xDA: // str 16
1297             {
1298                 std::uint16_t len;
1299                 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1300             }
1301 
1302             case 0xDB: // str 32
1303             {
1304                 std::uint32_t len;
1305                 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1306             }
1307 
1308             default:
1309             {
1310                 auto last_token = get_token_string();
1311                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string")));
1312             }
1313         }
1314     }
1315 
1316     /*!
1317     @param[in] len  the length of the array
1318     @return whether array creation completed
1319     */
get_msgpack_array(const std::size_t len)1320     bool get_msgpack_array(const std::size_t len)
1321     {
1322         if (JSON_UNLIKELY(not sax->start_array(len)))
1323         {
1324             return false;
1325         }
1326 
1327         for (std::size_t i = 0; i < len; ++i)
1328         {
1329             if (JSON_UNLIKELY(not parse_msgpack_internal()))
1330             {
1331                 return false;
1332             }
1333         }
1334 
1335         return sax->end_array();
1336     }
1337 
1338     /*!
1339     @param[in] len  the length of the object
1340     @return whether object creation completed
1341     */
get_msgpack_object(const std::size_t len)1342     bool get_msgpack_object(const std::size_t len)
1343     {
1344         if (JSON_UNLIKELY(not sax->start_object(len)))
1345         {
1346             return false;
1347         }
1348 
1349         string_t key;
1350         for (std::size_t i = 0; i < len; ++i)
1351         {
1352             get();
1353             if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(key)))
1354             {
1355                 return false;
1356             }
1357 
1358             if (JSON_UNLIKELY(not parse_msgpack_internal()))
1359             {
1360                 return false;
1361             }
1362             key.clear();
1363         }
1364 
1365         return sax->end_object();
1366     }
1367 
1368     ////////////
1369     // UBJSON //
1370     ////////////
1371 
1372     /*!
1373     @param[in] get_char  whether a new character should be retrieved from the
1374                          input (true, default) or whether the last read
1375                          character should be considered instead
1376 
1377     @return whether a valid UBJSON value was passed to the SAX parser
1378     */
parse_ubjson_internal(const bool get_char=true)1379     bool parse_ubjson_internal(const bool get_char = true)
1380     {
1381         return get_ubjson_value(get_char ? get_ignore_noop() : current);
1382     }
1383 
1384     /*!
1385     @brief reads a UBJSON string
1386 
1387     This function is either called after reading the 'S' byte explicitly
1388     indicating a string, or in case of an object key where the 'S' byte can be
1389     left out.
1390 
1391     @param[out] result   created string
1392     @param[in] get_char  whether a new character should be retrieved from the
1393                          input (true, default) or whether the last read
1394                          character should be considered instead
1395 
1396     @return whether string creation completed
1397     */
get_ubjson_string(string_t & result,const bool get_char=true)1398     bool get_ubjson_string(string_t& result, const bool get_char = true)
1399     {
1400         if (get_char)
1401         {
1402             get();  // TODO(niels): may we ignore N here?
1403         }
1404 
1405         if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value")))
1406         {
1407             return false;
1408         }
1409 
1410         switch (current)
1411         {
1412             case 'U':
1413             {
1414                 std::uint8_t len;
1415                 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1416             }
1417 
1418             case 'i':
1419             {
1420                 std::int8_t len;
1421                 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1422             }
1423 
1424             case 'I':
1425             {
1426                 std::int16_t len;
1427                 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1428             }
1429 
1430             case 'l':
1431             {
1432                 std::int32_t len;
1433                 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1434             }
1435 
1436             case 'L':
1437             {
1438                 std::int64_t len;
1439                 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1440             }
1441 
1442             default:
1443                 auto last_token = get_token_string();
1444                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string")));
1445         }
1446     }
1447 
1448     /*!
1449     @param[out] result  determined size
1450     @return whether size determination completed
1451     */
get_ubjson_size_value(std::size_t & result)1452     bool get_ubjson_size_value(std::size_t& result)
1453     {
1454         switch (get_ignore_noop())
1455         {
1456             case 'U':
1457             {
1458                 std::uint8_t number;
1459                 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1460                 {
1461                     return false;
1462                 }
1463                 result = static_cast<std::size_t>(number);
1464                 return true;
1465             }
1466 
1467             case 'i':
1468             {
1469                 std::int8_t number;
1470                 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1471                 {
1472                     return false;
1473                 }
1474                 result = static_cast<std::size_t>(number);
1475                 return true;
1476             }
1477 
1478             case 'I':
1479             {
1480                 std::int16_t number;
1481                 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1482                 {
1483                     return false;
1484                 }
1485                 result = static_cast<std::size_t>(number);
1486                 return true;
1487             }
1488 
1489             case 'l':
1490             {
1491                 std::int32_t number;
1492                 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1493                 {
1494                     return false;
1495                 }
1496                 result = static_cast<std::size_t>(number);
1497                 return true;
1498             }
1499 
1500             case 'L':
1501             {
1502                 std::int64_t number;
1503                 if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1504                 {
1505                     return false;
1506                 }
1507                 result = static_cast<std::size_t>(number);
1508                 return true;
1509             }
1510 
1511             default:
1512             {
1513                 auto last_token = get_token_string();
1514                 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size")));
1515             }
1516         }
1517     }
1518 
1519     /*!
1520     @brief determine the type and size for a container
1521 
1522     In the optimized UBJSON format, a type and a size can be provided to allow
1523     for a more compact representation.
1524 
1525     @param[out] result  pair of the size and the type
1526 
1527     @return whether pair creation completed
1528     */
get_ubjson_size_type(std::pair<std::size_t,int> & result)1529     bool get_ubjson_size_type(std::pair<std::size_t, int>& result)
1530     {
1531         result.first = string_t::npos; // size
1532         result.second = 0; // type
1533 
1534         get_ignore_noop();
1535 
1536         if (current == '$')
1537         {
1538             result.second = get();  // must not ignore 'N', because 'N' maybe the type
1539             if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type")))
1540             {
1541                 return false;
1542             }
1543 
1544             get_ignore_noop();
1545             if (JSON_UNLIKELY(current != '#'))
1546             {
1547                 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value")))
1548                 {
1549                     return false;
1550                 }
1551                 auto last_token = get_token_string();
1552                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size")));
1553             }
1554 
1555             return get_ubjson_size_value(result.first);
1556         }
1557 
1558         if (current == '#')
1559         {
1560             return get_ubjson_size_value(result.first);
1561         }
1562 
1563         return true;
1564     }
1565 
1566     /*!
1567     @param prefix  the previously read or set type prefix
1568     @return whether value creation completed
1569     */
get_ubjson_value(const int prefix)1570     bool get_ubjson_value(const int prefix)
1571     {
1572         switch (prefix)
1573         {
1574             case std::char_traits<char>::eof():  // EOF
1575                 return unexpect_eof(input_format_t::ubjson, "value");
1576 
1577             case 'T':  // true
1578                 return sax->boolean(true);
1579             case 'F':  // false
1580                 return sax->boolean(false);
1581 
1582             case 'Z':  // null
1583                 return sax->null();
1584 
1585             case 'U':
1586             {
1587                 std::uint8_t number;
1588                 return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number);
1589             }
1590 
1591             case 'i':
1592             {
1593                 std::int8_t number;
1594                 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1595             }
1596 
1597             case 'I':
1598             {
1599                 std::int16_t number;
1600                 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1601             }
1602 
1603             case 'l':
1604             {
1605                 std::int32_t number;
1606                 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1607             }
1608 
1609             case 'L':
1610             {
1611                 std::int64_t number;
1612                 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1613             }
1614 
1615             case 'd':
1616             {
1617                 float number;
1618                 return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), "");
1619             }
1620 
1621             case 'D':
1622             {
1623                 double number;
1624                 return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), "");
1625             }
1626 
1627             case 'C':  // char
1628             {
1629                 get();
1630                 if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char")))
1631                 {
1632                     return false;
1633                 }
1634                 if (JSON_UNLIKELY(current > 127))
1635                 {
1636                     auto last_token = get_token_string();
1637                     return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char")));
1638                 }
1639                 string_t s(1, static_cast<char>(current));
1640                 return sax->string(s);
1641             }
1642 
1643             case 'S':  // string
1644             {
1645                 string_t s;
1646                 return get_ubjson_string(s) and sax->string(s);
1647             }
1648 
1649             case '[':  // array
1650                 return get_ubjson_array();
1651 
1652             case '{':  // object
1653                 return get_ubjson_object();
1654 
1655             default: // anything else
1656             {
1657                 auto last_token = get_token_string();
1658                 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value")));
1659             }
1660         }
1661     }
1662 
1663     /*!
1664     @return whether array creation completed
1665     */
get_ubjson_array()1666     bool get_ubjson_array()
1667     {
1668         std::pair<std::size_t, int> size_and_type;
1669         if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type)))
1670         {
1671             return false;
1672         }
1673 
1674         if (size_and_type.first != string_t::npos)
1675         {
1676             if (JSON_UNLIKELY(not sax->start_array(size_and_type.first)))
1677             {
1678                 return false;
1679             }
1680 
1681             if (size_and_type.second != 0)
1682             {
1683                 if (size_and_type.second != 'N')
1684                 {
1685                     for (std::size_t i = 0; i < size_and_type.first; ++i)
1686                     {
1687                         if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second)))
1688                         {
1689                             return false;
1690                         }
1691                     }
1692                 }
1693             }
1694             else
1695             {
1696                 for (std::size_t i = 0; i < size_and_type.first; ++i)
1697                 {
1698                     if (JSON_UNLIKELY(not parse_ubjson_internal()))
1699                     {
1700                         return false;
1701                     }
1702                 }
1703             }
1704         }
1705         else
1706         {
1707             if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
1708             {
1709                 return false;
1710             }
1711 
1712             while (current != ']')
1713             {
1714                 if (JSON_UNLIKELY(not parse_ubjson_internal(false)))
1715                 {
1716                     return false;
1717                 }
1718                 get_ignore_noop();
1719             }
1720         }
1721 
1722         return sax->end_array();
1723     }
1724 
1725     /*!
1726     @return whether object creation completed
1727     */
get_ubjson_object()1728     bool get_ubjson_object()
1729     {
1730         std::pair<std::size_t, int> size_and_type;
1731         if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type)))
1732         {
1733             return false;
1734         }
1735 
1736         string_t key;
1737         if (size_and_type.first != string_t::npos)
1738         {
1739             if (JSON_UNLIKELY(not sax->start_object(size_and_type.first)))
1740             {
1741                 return false;
1742             }
1743 
1744             if (size_and_type.second != 0)
1745             {
1746                 for (std::size_t i = 0; i < size_and_type.first; ++i)
1747                 {
1748                     if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key)))
1749                     {
1750                         return false;
1751                     }
1752                     if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second)))
1753                     {
1754                         return false;
1755                     }
1756                     key.clear();
1757                 }
1758             }
1759             else
1760             {
1761                 for (std::size_t i = 0; i < size_and_type.first; ++i)
1762                 {
1763                     if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key)))
1764                     {
1765                         return false;
1766                     }
1767                     if (JSON_UNLIKELY(not parse_ubjson_internal()))
1768                     {
1769                         return false;
1770                     }
1771                     key.clear();
1772                 }
1773             }
1774         }
1775         else
1776         {
1777             if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
1778             {
1779                 return false;
1780             }
1781 
1782             while (current != '}')
1783             {
1784                 if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key)))
1785                 {
1786                     return false;
1787                 }
1788                 if (JSON_UNLIKELY(not parse_ubjson_internal()))
1789                 {
1790                     return false;
1791                 }
1792                 get_ignore_noop();
1793                 key.clear();
1794             }
1795         }
1796 
1797         return sax->end_object();
1798     }
1799 
1800     ///////////////////////
1801     // Utility functions //
1802     ///////////////////////
1803 
1804     /*!
1805     @brief get next character from the input
1806 
1807     This function provides the interface to the used input adapter. It does
1808     not throw in case the input reached EOF, but returns a -'ve valued
1809     `std::char_traits<char>::eof()` in that case.
1810 
1811     @return character read from the input
1812     */
get()1813     int get()
1814     {
1815         ++chars_read;
1816         return current = ia->get_character();
1817     }
1818 
1819     /*!
1820     @return character read from the input after ignoring all 'N' entries
1821     */
get_ignore_noop()1822     int get_ignore_noop()
1823     {
1824         do
1825         {
1826             get();
1827         }
1828         while (current == 'N');
1829 
1830         return current;
1831     }
1832 
1833     /*
1834     @brief read a number from the input
1835 
1836     @tparam NumberType the type of the number
1837     @param[in] format   the current format (for diagnostics)
1838     @param[out] result  number of type @a NumberType
1839 
1840     @return whether conversion completed
1841 
1842     @note This function needs to respect the system's endianess, because
1843           bytes in CBOR, MessagePack, and UBJSON are stored in network order
1844           (big endian) and therefore need reordering on little endian systems.
1845     */
1846     template<typename NumberType, bool InputIsLittleEndian = false>
get_number(const input_format_t format,NumberType & result)1847     bool get_number(const input_format_t format, NumberType& result)
1848     {
1849         // step 1: read input into array with system's byte order
1850         std::array<std::uint8_t, sizeof(NumberType)> vec;
1851         for (std::size_t i = 0; i < sizeof(NumberType); ++i)
1852         {
1853             get();
1854             if (JSON_UNLIKELY(not unexpect_eof(format, "number")))
1855             {
1856                 return false;
1857             }
1858 
1859             // reverse byte order prior to conversion if necessary
1860             if (is_little_endian != InputIsLittleEndian)
1861             {
1862                 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
1863             }
1864             else
1865             {
1866                 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
1867             }
1868         }
1869 
1870         // step 2: convert array into number of type T and return
1871         std::memcpy(&result, vec.data(), sizeof(NumberType));
1872         return true;
1873     }
1874 
1875     /*!
1876     @brief create a string by reading characters from the input
1877 
1878     @tparam NumberType the type of the number
1879     @param[in] format the current format (for diagnostics)
1880     @param[in] len number of characters to read
1881     @param[out] result string created by reading @a len bytes
1882 
1883     @return whether string creation completed
1884 
1885     @note We can not reserve @a len bytes for the result, because @a len
1886           may be too large. Usually, @ref unexpect_eof() detects the end of
1887           the input before we run out of string memory.
1888     */
1889     template<typename NumberType>
get_string(const input_format_t format,const NumberType len,string_t & result)1890     bool get_string(const input_format_t format,
1891                     const NumberType len,
1892                     string_t& result)
1893     {
1894         bool success = true;
1895         std::generate_n(std::back_inserter(result), len, [this, &success, &format]()
1896         {
1897             get();
1898             if (JSON_UNLIKELY(not unexpect_eof(format, "string")))
1899             {
1900                 success = false;
1901             }
1902             return static_cast<char>(current);
1903         });
1904         return success;
1905     }
1906 
1907     /*!
1908     @param[in] format   the current format (for diagnostics)
1909     @param[in] context  further context information (for diagnostics)
1910     @return whether the last read character is not EOF
1911     */
unexpect_eof(const input_format_t format,const char * context) const1912     bool unexpect_eof(const input_format_t format, const char* context) const
1913     {
1914         if (JSON_UNLIKELY(current == std::char_traits<char>::eof()))
1915         {
1916             return sax->parse_error(chars_read, "<end of file>",
1917                                     parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context)));
1918         }
1919         return true;
1920     }
1921 
1922     /*!
1923     @return a string representation of the last read byte
1924     */
get_token_string() const1925     std::string get_token_string() const
1926     {
1927         std::array<char, 3> cr{{}};
1928         (std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current));
1929         return std::string{cr.data()};
1930     }
1931 
1932     /*!
1933     @param[in] format   the current format
1934     @param[in] detail   a detailed error message
1935     @param[in] context  further contect information
1936     @return a message string to use in the parse_error exceptions
1937     */
exception_message(const input_format_t format,const std::string & detail,const std::string & context) const1938     std::string exception_message(const input_format_t format,
1939                                   const std::string& detail,
1940                                   const std::string& context) const
1941     {
1942         std::string error_msg = "syntax error while parsing ";
1943 
1944         switch (format)
1945         {
1946             case input_format_t::cbor:
1947                 error_msg += "CBOR";
1948                 break;
1949 
1950             case input_format_t::msgpack:
1951                 error_msg += "MessagePack";
1952                 break;
1953 
1954             case input_format_t::ubjson:
1955                 error_msg += "UBJSON";
1956                 break;
1957 
1958             case input_format_t::bson:
1959                 error_msg += "BSON";
1960                 break;
1961 
1962             default:            // LCOV_EXCL_LINE
1963                 assert(false);  // LCOV_EXCL_LINE
1964         }
1965 
1966         return error_msg + " " + context + ": " + detail;
1967     }
1968 
1969   private:
1970     /// input adapter
1971     input_adapter_t ia = nullptr;
1972 
1973     /// the current character
1974     int current = std::char_traits<char>::eof();
1975 
1976     /// the number of characters read
1977     std::size_t chars_read = 0;
1978 
1979     /// whether we can assume little endianess
1980     const bool is_little_endian = little_endianess();
1981 
1982     /// the SAX parser
1983     json_sax_t* sax = nullptr;
1984 };
1985 }  // namespace detail
1986 }  // namespace nlohmann
1987