1 #ifndef JSON_BINARY_INCLUDED
2 #define JSON_BINARY_INCLUDED
3
4 /* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26 /**
27 @file
28
29 This file specifies the interface for serializing JSON values into
30 binary representation, and for reading values back from the binary
31 representation.
32
33 The binary format is as follows:
34
35 Each JSON value (scalar, object or array) has a one byte type
36 identifier followed by the actual value.
37
38 If the value is a JSON object, its binary representation will have a
39 header that contains:
40
41 - the member count
42 - the size of the binary value in bytes
43 - a list of pointers to each key
44 - a list of pointers to each value
45
46 The actual keys and values will come after the header, in the same
47 order as in the header.
48
49 Similarly, if the value is a JSON array, the binary representation
50 will have a header with
51
52 - the element count
53 - the size of the binary value in bytes
54 - a list of pointers to each value
55
56 followed by the actual values, in the same order as in the header.
57
58 @verbatim
59 doc ::= type value
60
61 type ::=
62 0x00 | // small JSON object
63 0x01 | // large JSON object
64 0x02 | // small JSON array
65 0x03 | // large JSON array
66 0x04 | // literal (true/false/null)
67 0x05 | // int16
68 0x06 | // uint16
69 0x07 | // int32
70 0x08 | // uint32
71 0x09 | // int64
72 0x0a | // uint64
73 0x0b | // double
74 0x0c | // utf8mb4 string
75 0x0f // custom data (any MySQL data type)
76
77 value ::=
78 object |
79 array |
80 literal |
81 number |
82 string |
83 custom-data
84
85 object ::= element-count size key-entry* value-entry* key* value*
86
87 array ::= element-count size value-entry* value*
88
89 // number of members in object or number of elements in array
90 element-count ::=
91 uint16 | // if used in small JSON object/array
92 uint32 // if used in large JSON object/array
93
94 // number of bytes in the binary representation of the object or array
95 size ::=
96 uint16 | // if used in small JSON object/array
97 uint32 // if used in large JSON object/array
98
99 key-entry ::= key-offset key-length
100
101 key-offset ::=
102 uint16 | // if used in small JSON object
103 uint32 // if used in large JSON object
104
105 key-length ::= uint16 // key length must be less than 64KB
106
107 value-entry ::= type offset-or-inlined-value
108
109 // This field holds either the offset to where the value is stored,
110 // or the value itself if it is small enough to be inlined (that is,
111 // if it is a JSON literal or a small enough [u]int).
112 offset-or-inlined-value ::=
113 uint16 | // if used in small JSON object/array
114 uint32 // if used in large JSON object/array
115
116 key ::= utf8mb4-data
117
118 literal ::=
119 0x00 | // JSON null literal
120 0x01 | // JSON true literal
121 0x02 | // JSON false literal
122
123 number ::= .... // little-endian format for [u]int(16|32|64), whereas
124 // double is stored in a platform-independent, eight-byte
125 // format using float8store()
126
127 string ::= data-length utf8mb4-data
128
129 custom-data ::= custom-type data-length binary-data
130
131 custom-type ::= uint8 // type identifier that matches the
132 // internal enum_field_types enum
133
134 data-length ::= uint8* // If the high bit of a byte is 1, the length
135 // field is continued in the next byte,
136 // otherwise it is the last byte of the length
137 // field. So we need 1 byte to represent
138 // lengths up to 127, 2 bytes to represent
139 // lengths up to 16383, and so on...
140 @endverbatim
141 */
142
143 #include <stddef.h>
144 #include <string>
145
146 #include "field_types.h" // enum_field_types
147 #include "my_dbug.h" // DBUG_ASSERT
148 #include "my_inttypes.h"
149
150 class Field_json;
151 class Json_dom;
152 class Json_wrapper;
153 class String;
154 class THD;
155
156 namespace json_binary {
157
158 /**
159 Serialize the JSON document represented by dom to binary format in
160 the destination string, replacing any content already in the
161 destination string.
162
163 @param[in] thd THD handle
164 @param[in] dom the input DOM tree
165 @param[in,out] dest the destination string
166 @retval false on success
167 @retval true if an error occurred
168 */
169 #ifdef MYSQL_SERVER
170 bool serialize(const THD *thd, const Json_dom *dom, String *dest);
171 #endif
172
173 /**
174 Class used for reading JSON values that are stored in the binary
175 format. Values are parsed lazily, so that only the parts of the
176 value that are interesting to the caller, are read. Array elements
177 can be looked up in constant time using the element() function.
178 Object members can be looked up in O(log n) time using the lookup()
179 function.
180 */
181 class Value {
182 public:
183 enum enum_type : uint8 {
184 OBJECT,
185 ARRAY,
186 STRING,
187 INT,
188 UINT,
189 DOUBLE,
190 LITERAL_NULL,
191 LITERAL_TRUE,
192 LITERAL_FALSE,
193 OPAQUE,
194 ERROR /* Not really a type. Used to signal that an
195 error was detected. */
196 };
197
198 /**
199 Does this value, and all of its members, represent a valid JSON
200 value?
201 */
202 bool is_valid() const;
type()203 enum_type type() const { return m_type; }
204 /// Does this value use the large storage format?
large_format()205 bool large_format() const { return m_large; }
206
207 /**
208 Get a pointer to the beginning of the STRING or OPAQUE data
209 represented by this instance.
210 */
get_data()211 const char *get_data() const {
212 DBUG_ASSERT(m_type == STRING || m_type == OPAQUE);
213 return m_data;
214 }
215
216 /**
217 Get the length in bytes of the STRING or OPAQUE value represented by
218 this instance.
219 */
get_data_length()220 uint32 get_data_length() const {
221 DBUG_ASSERT(m_type == STRING || m_type == OPAQUE);
222 return m_length;
223 }
224
225 /** Get the value of an INT. */
get_int64()226 int64 get_int64() const {
227 DBUG_ASSERT(m_type == INT);
228 return m_int_value;
229 }
230
231 /** Get the value of a UINT. */
get_uint64()232 uint64 get_uint64() const {
233 DBUG_ASSERT(m_type == UINT);
234 return static_cast<uint64>(m_int_value);
235 }
236
237 /** Get the value of a DOUBLE. */
get_double()238 double get_double() const {
239 DBUG_ASSERT(m_type == DOUBLE);
240 return m_double_value;
241 }
242
243 /**
244 Get the number of elements in an array, or the number of members in
245 an object.
246 */
element_count()247 uint32 element_count() const {
248 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
249 return m_element_count;
250 }
251
252 /**
253 Get the MySQL field type of an opaque value. Identifies the type of
254 the value stored in the data portion of an opaque value.
255 */
field_type()256 enum_field_types field_type() const {
257 DBUG_ASSERT(m_type == OPAQUE);
258 return m_field_type;
259 }
260
261 Value element(size_t pos) const;
262 Value key(size_t pos) const;
263 Value lookup(const char *key, size_t length) const;
lookup(const std::string & key)264 Value lookup(const std::string &key) const {
265 return lookup(key.c_str(), key.length());
266 }
267 size_t lookup_index(const char *key, size_t length) const;
lookup_index(const std::string & key)268 size_t lookup_index(const std::string &key) const {
269 return lookup_index(key.c_str(), key.length());
270 }
271 bool is_backed_by(const String *str) const;
272 bool raw_binary(const THD *thd, String *buf) const;
273 bool get_free_space(const THD *thd, size_t *space) const;
274 bool has_space(size_t pos, size_t needed, size_t *offset) const;
275 bool update_in_shadow(const Field_json *field, size_t pos,
276 Json_wrapper *new_value, size_t data_offset,
277 size_t data_length, const char *original,
278 char *destination, bool *changed) const;
279 bool remove_in_shadow(const Field_json *field, size_t pos,
280 const char *original, char *destination) const;
281
282 /** Constructor for values that represent literals or errors. */
Value(enum_type t)283 explicit Value(enum_type t) : m_data(nullptr), m_type(t) {
284 DBUG_ASSERT(t == LITERAL_NULL || t == LITERAL_TRUE || t == LITERAL_FALSE ||
285 t == ERROR);
286 }
287
288 /** Constructor for values that represent ints or uints. */
Value(enum_type t,int64 val)289 explicit Value(enum_type t, int64 val) : m_int_value(val), m_type(t) {
290 DBUG_ASSERT(t == INT || t == UINT);
291 }
292
293 /** Constructor for values that represent doubles. */
Value(double val)294 explicit Value(double val) : m_double_value(val), m_type(DOUBLE) {}
295
296 /** Constructor for values that represent strings. */
Value(const char * data,uint32 len)297 Value(const char *data, uint32 len)
298 : m_data(data), m_length(len), m_type(STRING) {}
299
300 /**
301 Constructor for values that represent arrays or objects.
302
303 @param t type
304 @param data pointer to the start of the binary representation
305 @param bytes the number of bytes in the binary representation of the value
306 @param element_count the number of elements or members in the value
307 @param large true if the value should be stored in the large
308 storage format with 4 byte offsets instead of 2 byte offsets
309 */
Value(enum_type t,const char * data,uint32 bytes,uint32 element_count,bool large)310 Value(enum_type t, const char *data, uint32 bytes, uint32 element_count,
311 bool large)
312 : m_data(data),
313 m_element_count(element_count),
314 m_length(bytes),
315 m_type(t),
316 m_large(large) {
317 DBUG_ASSERT(t == ARRAY || t == OBJECT);
318 }
319
320 /** Constructor for values that represent opaque data. */
Value(enum_field_types ft,const char * data,uint32 len)321 Value(enum_field_types ft, const char *data, uint32 len)
322 : m_data(data), m_length(len), m_field_type(ft), m_type(OPAQUE) {}
323
324 /** Empty constructor. Produces a value that represents an error condition. */
Value()325 Value() : Value(ERROR) {}
326
327 /** Is this value an array? */
is_array()328 bool is_array() const { return m_type == ARRAY; }
329
330 /** Is this value an object? */
is_object()331 bool is_object() const { return m_type == OBJECT; }
332
333 /**
334 Compare two Values
335 @note This function is limited to scalars only, for objects/arrays it
336 asserts. The main purpose is to separate old/new scalar values for updates
337 on multi-valued indexes.
338 @returns
339 -1 this < val
340 0 this == val
341 1 this > val
342 */
343 int eq(const Value &val) const;
344
345 private:
346 /*
347 Instances use only one of m_data, m_int_value and m_double_value,
348 so keep them in a union to save space in memory.
349 */
350 union {
351 /**
352 Pointer to the start of the binary representation of the value. Only
353 used by STRING, OPAQUE, OBJECT and ARRAY.
354
355 The memory pointed to by this member is not owned by this Value
356 object. Callers that create Value objects must make sure that the
357 memory is not freed as long as the Value object is alive.
358 */
359 const char *m_data;
360 /** The value if the type is INT or UINT. */
361 int64 m_int_value;
362 /** The value if the type is DOUBLE. */
363 double m_double_value;
364 };
365
366 /**
367 Element count for arrays and objects. Unused for other types.
368 */
369 uint32 m_element_count;
370
371 /**
372 The full length (in bytes) of the binary representation of an array or
373 object, or the length of a string or opaque value. Unused for other types.
374 */
375 uint32 m_length;
376
377 /**
378 The MySQL field type of the value, in case the type of the value is
379 OPAQUE. Otherwise, it is unused.
380 */
381 enum_field_types m_field_type;
382
383 /** The JSON type of the value. */
384 enum_type m_type;
385
386 /**
387 True if an array or an object uses the large storage format with 4
388 byte offsets instead of 2 byte offsets.
389 */
390 bool m_large;
391
392 size_t key_entry_offset(size_t pos) const;
393 size_t value_entry_offset(size_t pos) const;
394 bool first_value_offset(size_t *offset) const;
395 bool element_offsets(size_t pos, size_t *start, size_t *end,
396 bool *inlined) const;
397 };
398
399 /**
400 Parse a JSON binary document.
401
402 @param[in] data a pointer to the binary data
403 @param[in] len the size of the binary document in bytes
404 @return an object that allows access to the contents of the document
405 */
406 Value parse_binary(const char *data, size_t len);
407
408 /**
409 How much space is needed for a JSON value when it is stored in the binary
410 format.
411
412 @param[in] thd THD handle
413 @param[in] value the JSON value to add to a document
414 @param[in] large true if the large storage format is used
415 @param[out] needed gets set to the amount of bytes needed to store
416 the value
417 @retval false if successful
418 @retval true if an error occurred while calculating the needed space
419 */
420 #ifdef MYSQL_SERVER
421 bool space_needed(const THD *thd, const Json_wrapper *value, bool large,
422 size_t *needed);
423 #endif
424
425 /**
426 Apply a function to every value in a JSON document. That is, apply
427 the function to the root node of the JSON document, to all its
428 children, grandchildren and so on.
429
430 @param value the root of the JSON document
431 @param func the function to apply
432 @retval true if the processing was stopped
433 @retval false if the processing was completed
434
435 @tparam Func a functor type that takes a #json_binary::Value
436 parameter and returns a `bool` which is `true` if the processing
437 should stop or `false` if the processing should continue with the
438 next node
439 */
440 template <typename Func>
for_each_node(const Value & value,const Func & func)441 bool for_each_node(const Value &value, const Func &func) {
442 if (func(value)) return true;
443
444 if (value.is_array() || value.is_object())
445 for (size_t i = 0, size = value.element_count(); i < size; ++i)
446 if (for_each_node(value.element(i), func)) return true;
447
448 return false;
449 }
450 } // namespace json_binary
451
452 #endif /* JSON_BINARY_INCLUDED */
453