1 #ifndef JSON_BINARY_INCLUDED
2 #define JSON_BINARY_INCLUDED
3 
4 /* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License, version 2.0,
8    as published by the Free Software Foundation.
9 
10    This program is also distributed with certain software (including
11    but not limited to OpenSSL) that is licensed under separate terms,
12    as designated in a particular file or component or in included license
13    documentation.  The authors of MySQL hereby grant you an additional
14    permission to link the program and your derivative works with the
15    separately licensed software that they have included with MySQL.
16 
17    This program is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20    GNU General Public License, version 2.0, for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
25 
26 /**
27   @file
28 
29   This file specifies the interface for serializing JSON values into
30   binary representation, and for reading values back from the binary
31   representation.
32 
33   The binary format is as follows:
34 
35   Each JSON value (scalar, object or array) has a one byte type
36   identifier followed by the actual value.
37 
38   If the value is a JSON object, its binary representation will have a
39   header that contains:
40 
41   - the member count
42   - the size of the binary value in bytes
43   - a list of pointers to each key
44   - a list of pointers to each value
45 
46   The actual keys and values will come after the header, in the same
47   order as in the header.
48 
49   Similarly, if the value is a JSON array, the binary representation
50   will have a header with
51 
52   - the element count
53   - the size of the binary value in bytes
54   - a list of pointers to each value
55 
56   followed by the actual values, in the same order as in the header.
57 
58   @verbatim
59   doc ::= type value
60 
61   type ::=
62       0x00 |       // small JSON object
63       0x01 |       // large JSON object
64       0x02 |       // small JSON array
65       0x03 |       // large JSON array
66       0x04 |       // literal (true/false/null)
67       0x05 |       // int16
68       0x06 |       // uint16
69       0x07 |       // int32
70       0x08 |       // uint32
71       0x09 |       // int64
72       0x0a |       // uint64
73       0x0b |       // double
74       0x0c |       // utf8mb4 string
75       0x0f         // custom data (any MySQL data type)
76 
77   value ::=
78       object  |
79       array   |
80       literal |
81       number  |
82       string  |
83       custom-data
84 
85   object ::= element-count size key-entry* value-entry* key* value*
86 
87   array ::= element-count size value-entry* value*
88 
89   // number of members in object or number of elements in array
90   element-count ::=
91       uint16 |  // if used in small JSON object/array
92       uint32    // if used in large JSON object/array
93 
94   // number of bytes in the binary representation of the object or array
95   size ::=
96       uint16 |  // if used in small JSON object/array
97       uint32    // if used in large JSON object/array
98 
99   key-entry ::= key-offset key-length
100 
101   key-offset ::=
102       uint16 |  // if used in small JSON object
103       uint32    // if used in large JSON object
104 
105   key-length ::= uint16    // key length must be less than 64KB
106 
107   value-entry ::= type offset-or-inlined-value
108 
109   // This field holds either the offset to where the value is stored,
110   // or the value itself if it is small enough to be inlined (that is,
111   // if it is a JSON literal or a small enough [u]int).
112   offset-or-inlined-value ::=
113       uint16 |   // if used in small JSON object/array
114       uint32     // if used in large JSON object/array
115 
116   key ::= utf8mb4-data
117 
118   literal ::=
119       0x00 |   // JSON null literal
120       0x01 |   // JSON true literal
121       0x02 |   // JSON false literal
122 
123   number ::=  ....  // little-endian format for [u]int(16|32|64), whereas
124                     // double is stored in a platform-independent, eight-byte
125                     // format using float8store()
126 
127   string ::= data-length utf8mb4-data
128 
129   custom-data ::= custom-type data-length binary-data
130 
131   custom-type ::= uint8   // type identifier that matches the
132                           // internal enum_field_types enum
133 
134   data-length ::= uint8*  // If the high bit of a byte is 1, the length
135                           // field is continued in the next byte,
136                           // otherwise it is the last byte of the length
137                           // field. So we need 1 byte to represent
138                           // lengths up to 127, 2 bytes to represent
139                           // lengths up to 16383, and so on...
140   @endverbatim
141 */
142 
143 #include <stddef.h>
144 #include <string>
145 
146 #include "field_types.h"  // enum_field_types
147 #include "my_dbug.h"      // DBUG_ASSERT
148 #include "my_inttypes.h"
149 
150 class Field_json;
151 class Json_dom;
152 class Json_wrapper;
153 class String;
154 class THD;
155 
156 namespace json_binary {
157 
158 /**
159   Serialize the JSON document represented by dom to binary format in
160   the destination string, replacing any content already in the
161   destination string.
162 
163   @param[in]     thd   THD handle
164   @param[in]     dom   the input DOM tree
165   @param[in,out] dest  the destination string
166   @retval false on success
167   @retval true if an error occurred
168 */
169 #ifdef MYSQL_SERVER
170 bool serialize(const THD *thd, const Json_dom *dom, String *dest);
171 #endif
172 
173 /**
174   Class used for reading JSON values that are stored in the binary
175   format. Values are parsed lazily, so that only the parts of the
176   value that are interesting to the caller, are read. Array elements
177   can be looked up in constant time using the element() function.
178   Object members can be looked up in O(log n) time using the lookup()
179   function.
180 */
181 class Value {
182  public:
183   enum enum_type : uint8 {
184     OBJECT,
185     ARRAY,
186     STRING,
187     INT,
188     UINT,
189     DOUBLE,
190     LITERAL_NULL,
191     LITERAL_TRUE,
192     LITERAL_FALSE,
193     OPAQUE,
194     ERROR /* Not really a type. Used to signal that an
195              error was detected. */
196   };
197 
198   /**
199     Does this value, and all of its members, represent a valid JSON
200     value?
201   */
202   bool is_valid() const;
type()203   enum_type type() const { return m_type; }
204   /// Does this value use the large storage format?
large_format()205   bool large_format() const { return m_large; }
206 
207   /**
208     Get a pointer to the beginning of the STRING or OPAQUE data
209     represented by this instance.
210   */
get_data()211   const char *get_data() const {
212     DBUG_ASSERT(m_type == STRING || m_type == OPAQUE);
213     return m_data;
214   }
215 
216   /**
217     Get the length in bytes of the STRING or OPAQUE value represented by
218     this instance.
219   */
get_data_length()220   uint32 get_data_length() const {
221     DBUG_ASSERT(m_type == STRING || m_type == OPAQUE);
222     return m_length;
223   }
224 
225   /** Get the value of an INT. */
get_int64()226   int64 get_int64() const {
227     DBUG_ASSERT(m_type == INT);
228     return m_int_value;
229   }
230 
231   /** Get the value of a UINT. */
get_uint64()232   uint64 get_uint64() const {
233     DBUG_ASSERT(m_type == UINT);
234     return static_cast<uint64>(m_int_value);
235   }
236 
237   /** Get the value of a DOUBLE. */
get_double()238   double get_double() const {
239     DBUG_ASSERT(m_type == DOUBLE);
240     return m_double_value;
241   }
242 
243   /**
244     Get the number of elements in an array, or the number of members in
245     an object.
246   */
element_count()247   uint32 element_count() const {
248     DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
249     return m_element_count;
250   }
251 
252   /**
253     Get the MySQL field type of an opaque value. Identifies the type of
254     the value stored in the data portion of an opaque value.
255   */
field_type()256   enum_field_types field_type() const {
257     DBUG_ASSERT(m_type == OPAQUE);
258     return m_field_type;
259   }
260 
261   Value element(size_t pos) const;
262   Value key(size_t pos) const;
263   Value lookup(const char *key, size_t length) const;
lookup(const std::string & key)264   Value lookup(const std::string &key) const {
265     return lookup(key.c_str(), key.length());
266   }
267   size_t lookup_index(const char *key, size_t length) const;
lookup_index(const std::string & key)268   size_t lookup_index(const std::string &key) const {
269     return lookup_index(key.c_str(), key.length());
270   }
271   bool is_backed_by(const String *str) const;
272   bool raw_binary(const THD *thd, String *buf) const;
273   bool get_free_space(const THD *thd, size_t *space) const;
274   bool has_space(size_t pos, size_t needed, size_t *offset) const;
275   bool update_in_shadow(const Field_json *field, size_t pos,
276                         Json_wrapper *new_value, size_t data_offset,
277                         size_t data_length, const char *original,
278                         char *destination, bool *changed) const;
279   bool remove_in_shadow(const Field_json *field, size_t pos,
280                         const char *original, char *destination) const;
281 
282   /** Constructor for values that represent literals or errors. */
Value(enum_type t)283   explicit Value(enum_type t) : m_data(nullptr), m_type(t) {
284     DBUG_ASSERT(t == LITERAL_NULL || t == LITERAL_TRUE || t == LITERAL_FALSE ||
285                 t == ERROR);
286   }
287 
288   /** Constructor for values that represent ints or uints. */
Value(enum_type t,int64 val)289   explicit Value(enum_type t, int64 val) : m_int_value(val), m_type(t) {
290     DBUG_ASSERT(t == INT || t == UINT);
291   }
292 
293   /** Constructor for values that represent doubles. */
Value(double val)294   explicit Value(double val) : m_double_value(val), m_type(DOUBLE) {}
295 
296   /** Constructor for values that represent strings. */
Value(const char * data,uint32 len)297   Value(const char *data, uint32 len)
298       : m_data(data), m_length(len), m_type(STRING) {}
299 
300   /**
301     Constructor for values that represent arrays or objects.
302 
303     @param t type
304     @param data pointer to the start of the binary representation
305     @param bytes the number of bytes in the binary representation of the value
306     @param element_count the number of elements or members in the value
307     @param large true if the value should be stored in the large
308     storage format with 4 byte offsets instead of 2 byte offsets
309   */
Value(enum_type t,const char * data,uint32 bytes,uint32 element_count,bool large)310   Value(enum_type t, const char *data, uint32 bytes, uint32 element_count,
311         bool large)
312       : m_data(data),
313         m_element_count(element_count),
314         m_length(bytes),
315         m_type(t),
316         m_large(large) {
317     DBUG_ASSERT(t == ARRAY || t == OBJECT);
318   }
319 
320   /** Constructor for values that represent opaque data. */
Value(enum_field_types ft,const char * data,uint32 len)321   Value(enum_field_types ft, const char *data, uint32 len)
322       : m_data(data), m_length(len), m_field_type(ft), m_type(OPAQUE) {}
323 
324   /** Empty constructor. Produces a value that represents an error condition. */
Value()325   Value() : Value(ERROR) {}
326 
327   /** Is this value an array? */
is_array()328   bool is_array() const { return m_type == ARRAY; }
329 
330   /** Is this value an object? */
is_object()331   bool is_object() const { return m_type == OBJECT; }
332 
333   /**
334     Compare two Values
335     @note This function is limited to scalars only, for objects/arrays it
336     asserts. The main purpose is to separate old/new scalar values for updates
337     on multi-valued indexes.
338     @returns
339       -1  this < val
340        0  this == val
341        1  this > val
342   */
343   int eq(const Value &val) const;
344 
345  private:
346   /*
347     Instances use only one of m_data, m_int_value and m_double_value,
348     so keep them in a union to save space in memory.
349   */
350   union {
351     /**
352       Pointer to the start of the binary representation of the value. Only
353       used by STRING, OPAQUE, OBJECT and ARRAY.
354 
355       The memory pointed to by this member is not owned by this Value
356       object. Callers that create Value objects must make sure that the
357       memory is not freed as long as the Value object is alive.
358     */
359     const char *m_data;
360     /** The value if the type is INT or UINT. */
361     int64 m_int_value;
362     /** The value if the type is DOUBLE. */
363     double m_double_value;
364   };
365 
366   /**
367     Element count for arrays and objects. Unused for other types.
368   */
369   uint32 m_element_count;
370 
371   /**
372     The full length (in bytes) of the binary representation of an array or
373     object, or the length of a string or opaque value. Unused for other types.
374   */
375   uint32 m_length;
376 
377   /**
378     The MySQL field type of the value, in case the type of the value is
379     OPAQUE. Otherwise, it is unused.
380   */
381   enum_field_types m_field_type;
382 
383   /** The JSON type of the value. */
384   enum_type m_type;
385 
386   /**
387     True if an array or an object uses the large storage format with 4
388     byte offsets instead of 2 byte offsets.
389   */
390   bool m_large;
391 
392   size_t key_entry_offset(size_t pos) const;
393   size_t value_entry_offset(size_t pos) const;
394   bool first_value_offset(size_t *offset) const;
395   bool element_offsets(size_t pos, size_t *start, size_t *end,
396                        bool *inlined) const;
397 };
398 
399 /**
400   Parse a JSON binary document.
401 
402   @param[in] data  a pointer to the binary data
403   @param[in] len   the size of the binary document in bytes
404   @return an object that allows access to the contents of the document
405 */
406 Value parse_binary(const char *data, size_t len);
407 
408 /**
409   How much space is needed for a JSON value when it is stored in the binary
410   format.
411 
412   @param[in]  thd     THD handle
413   @param[in]  value   the JSON value to add to a document
414   @param[in]  large   true if the large storage format is used
415   @param[out] needed  gets set to the amount of bytes needed to store
416                       the value
417   @retval false if successful
418   @retval true if an error occurred while calculating the needed space
419 */
420 #ifdef MYSQL_SERVER
421 bool space_needed(const THD *thd, const Json_wrapper *value, bool large,
422                   size_t *needed);
423 #endif
424 
425 /**
426   Apply a function to every value in a JSON document. That is, apply
427   the function to the root node of the JSON document, to all its
428   children, grandchildren and so on.
429 
430   @param  value the root of the JSON document
431   @param  func  the function to apply
432   @retval true  if the processing was stopped
433   @retval false if the processing was completed
434 
435   @tparam Func a functor type that takes a #json_binary::Value
436   parameter and returns a `bool` which is `true` if the processing
437   should stop or `false` if the processing should continue with the
438   next node
439 */
440 template <typename Func>
for_each_node(const Value & value,const Func & func)441 bool for_each_node(const Value &value, const Func &func) {
442   if (func(value)) return true;
443 
444   if (value.is_array() || value.is_object())
445     for (size_t i = 0, size = value.element_count(); i < size; ++i)
446       if (for_each_node(value.element(i), func)) return true;
447 
448   return false;
449 }
450 }  // namespace json_binary
451 
452 #endif /* JSON_BINARY_INCLUDED */
453