1 /* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 #include "sql/json_binary.h"
24
25 #include <string.h>
26 #include <algorithm> // std::min
27 #include <map>
28 #include <memory>
29 #include <string>
30 #include <utility>
31
32 #include "m_ctype.h"
33 #include "my_byteorder.h"
34 #include "my_dbug.h"
35 #include "my_sys.h"
36 #include "mysqld_error.h"
37 #ifdef MYSQL_SERVER
38 #include "sql/check_stack.h"
39 #endif
40 #include "sql/field.h" // Field_json
41 #include "sql/json_dom.h" // Json_dom
42 #include "sql/json_syntax_check.h"
43 #include "sql/sql_class.h" // THD
44 #include "sql/sql_const.h"
45 #include "sql/system_variables.h"
46 #include "sql/table.h" // TABLE::add_binary_diff()
47 #include "sql_string.h"
48 #include "template_utils.h" // down_cast
49
50 namespace {
51
52 constexpr char JSONB_TYPE_SMALL_OBJECT = 0x0;
53 constexpr char JSONB_TYPE_LARGE_OBJECT = 0x1;
54 constexpr char JSONB_TYPE_SMALL_ARRAY = 0x2;
55 constexpr char JSONB_TYPE_LARGE_ARRAY = 0x3;
56 constexpr char JSONB_TYPE_LITERAL = 0x4;
57 constexpr char JSONB_TYPE_INT16 = 0x5;
58 constexpr char JSONB_TYPE_UINT16 = 0x6;
59 constexpr char JSONB_TYPE_INT32 = 0x7;
60 constexpr char JSONB_TYPE_UINT32 = 0x8;
61 constexpr char JSONB_TYPE_INT64 = 0x9;
62 constexpr char JSONB_TYPE_UINT64 = 0xA;
63 constexpr char JSONB_TYPE_DOUBLE = 0xB;
64 constexpr char JSONB_TYPE_STRING = 0xC;
65 constexpr char JSONB_TYPE_OPAQUE = 0xF;
66
67 constexpr char JSONB_NULL_LITERAL = 0x0;
68 constexpr char JSONB_TRUE_LITERAL = 0x1;
69 constexpr char JSONB_FALSE_LITERAL = 0x2;
70
71 /*
72 The size of offset or size fields in the small and the large storage
73 format for JSON objects and JSON arrays.
74 */
75 constexpr uint8 SMALL_OFFSET_SIZE = 2;
76 constexpr uint8 LARGE_OFFSET_SIZE = 4;
77
78 /*
79 The size of key entries for objects when using the small storage
80 format or the large storage format. In the small format it is 4
81 bytes (2 bytes for key length and 2 bytes for key offset). In the
82 large format it is 6 (2 bytes for length, 4 bytes for offset).
83 */
84 constexpr uint8 KEY_ENTRY_SIZE_SMALL = 2 + SMALL_OFFSET_SIZE;
85 constexpr uint8 KEY_ENTRY_SIZE_LARGE = 2 + LARGE_OFFSET_SIZE;
86
87 /*
88 The size of value entries for objects or arrays. When using the
89 small storage format, the entry size is 3 (1 byte for type, 2 bytes
90 for offset). When using the large storage format, it is 5 (1 byte
91 for type, 4 bytes for offset).
92 */
93 constexpr uint8 VALUE_ENTRY_SIZE_SMALL = 1 + SMALL_OFFSET_SIZE;
94 constexpr uint8 VALUE_ENTRY_SIZE_LARGE = 1 + LARGE_OFFSET_SIZE;
95
96 } // namespace
97
98 namespace json_binary {
99
100 /// Status codes for JSON serialization.
101 enum enum_serialization_result {
102 /**
103 Success. The JSON value was successfully serialized.
104 */
105 OK,
106 /**
107 The JSON value was too big to be serialized. If this status code
108 is returned, and the small storage format is in use, the caller
109 should retry the serialization with the large storage format. If
110 this status code is returned, and the large format is in use,
111 my_error() will already have been called.
112 */
113 VALUE_TOO_BIG,
114 /**
115 Some other error occurred. my_error() will have been called with
116 more specific information about the failure.
117 */
118 FAILURE
119 };
120
121 #ifdef MYSQL_SERVER
122 static enum_serialization_result serialize_json_value(
123 const THD *thd, const Json_dom *dom, size_t type_pos, String *dest,
124 size_t depth, bool small_parent);
125 static void write_offset_or_size(char *dest, size_t offset_or_size, bool large);
126 #endif // ifdef MYSQL_SERVER
127 static uint8 offset_size(bool large);
128
129 #ifdef MYSQL_SERVER
serialize(const THD * thd,const Json_dom * dom,String * dest)130 bool serialize(const THD *thd, const Json_dom *dom, String *dest) {
131 // Reset the destination buffer.
132 dest->length(0);
133 dest->set_charset(&my_charset_bin);
134
135 // Reserve space (one byte) for the type identifier.
136 if (dest->append('\0')) return true; /* purecov: inspected */
137 return serialize_json_value(thd, dom, 0, dest, 0, false) != OK;
138 }
139
140 /**
141 Reserve space for the given amount of extra bytes at the end of a
142 String buffer. If the String needs to allocate more memory, it will
143 grow by at least 50%, to avoid frequent reallocations.
144 */
reserve(String * buffer,size_t bytes_needed)145 static bool reserve(String *buffer, size_t bytes_needed) {
146 return buffer->reserve(bytes_needed, buffer->length() / 2);
147 }
148
149 /** Encode a 16-bit int at the end of the destination string. */
append_int16(String * dest,int16 value)150 static bool append_int16(String *dest, int16 value) {
151 if (reserve(dest, sizeof(value))) return true; /* purecov: inspected */
152 int2store(dest->ptr() + dest->length(), value);
153 dest->length(dest->length() + sizeof(value));
154 return false;
155 }
156
157 /** Encode a 32-bit int at the end of the destination string. */
append_int32(String * dest,int32 value)158 static bool append_int32(String *dest, int32 value) {
159 if (reserve(dest, sizeof(value))) return true; /* purecov: inspected */
160 int4store(dest->ptr() + dest->length(), value);
161 dest->length(dest->length() + sizeof(value));
162 return false;
163 }
164
165 /** Encode a 64-bit int at the end of the destination string. */
append_int64(String * dest,int64 value)166 static bool append_int64(String *dest, int64 value) {
167 if (reserve(dest, sizeof(value))) return true; /* purecov: inspected */
168 int8store(dest->ptr() + dest->length(), value);
169 dest->length(dest->length() + sizeof(value));
170 return false;
171 }
172
173 /**
174 Append an offset or a size to a String.
175
176 @param dest the destination String
177 @param offset_or_size the offset or size to append
178 @param large if true, use the large storage format (4 bytes);
179 otherwise, use the small storage format (2 bytes)
180 @return false if successfully appended, true otherwise
181 */
append_offset_or_size(String * dest,size_t offset_or_size,bool large)182 static bool append_offset_or_size(String *dest, size_t offset_or_size,
183 bool large) {
184 if (large)
185 return append_int32(dest, static_cast<int32>(offset_or_size));
186 else
187 return append_int16(dest, static_cast<int16>(offset_or_size));
188 }
189
190 /**
191 Insert an offset or a size at the specified position in a String. It
192 is assumed that the String has already allocated enough space to
193 hold the value.
194
195 @param dest the destination String
196 @param pos the position in the String
197 @param offset_or_size the offset or size to append
198 @param large if true, use the large storage format (4 bytes);
199 otherwise, use the small storage format (2 bytes)
200 */
insert_offset_or_size(String * dest,size_t pos,size_t offset_or_size,bool large)201 static void insert_offset_or_size(String *dest, size_t pos,
202 size_t offset_or_size, bool large) {
203 DBUG_ASSERT(pos + offset_size(large) <= dest->alloced_length());
204 write_offset_or_size(dest->ptr() + pos, offset_or_size, large);
205 }
206
207 /**
208 Write an offset or a size to a char array. The char array is assumed to be
209 large enough to hold an offset or size value.
210
211 @param dest the array to write to
212 @param offset_or_size the offset or size to write
213 @param large if true, use the large storage format
214 */
write_offset_or_size(char * dest,size_t offset_or_size,bool large)215 static void write_offset_or_size(char *dest, size_t offset_or_size,
216 bool large) {
217 if (large)
218 int4store(dest, static_cast<uint32>(offset_or_size));
219 else
220 int2store(dest, static_cast<uint16>(offset_or_size));
221 }
222
223 /**
224 Check if the size of a document exceeds the maximum JSON binary size
225 (4 GB, aka UINT_MAX32). Raise an error if it is too big.
226
227 @param size the size of the document
228 @return true if the document is too big, false otherwise
229 */
check_document_size(size_t size)230 static bool check_document_size(size_t size) {
231 if (size > UINT_MAX32) {
232 /* purecov: begin inspected */
233 my_error(ER_JSON_VALUE_TOO_BIG, MYF(0));
234 return true;
235 /* purecov: end */
236 }
237 return false;
238 }
239
240 /**
241 Append a length to a String. The number of bytes used to store the length
242 uses a variable number of bytes depending on how large the length is. If the
243 highest bit in a byte is 1, then the length is continued on the next byte.
244 The least significant bits are stored in the first byte.
245
246 @param dest the destination String
247 @param length the length to write
248 @return false on success, true on error
249 */
append_variable_length(String * dest,size_t length)250 static bool append_variable_length(String *dest, size_t length) {
251 do {
252 // Filter out the seven least significant bits of length.
253 uchar ch = (length & 0x7F);
254
255 /*
256 Right-shift length to drop the seven least significant bits. If there
257 is more data in length, set the high bit of the byte we're writing
258 to the String.
259 */
260 length >>= 7;
261 if (length != 0) ch |= 0x80;
262
263 if (dest->append(ch)) return true; /* purecov: inspected */
264 } while (length != 0);
265
266 if (check_document_size(dest->length() + length))
267 return true; /* purecov: inspected */
268
269 // Successfully appended the length.
270 return false;
271 }
272 #endif // ifdef MYSQL_SERVER
273
274 /**
275 Read a variable length written by append_variable_length().
276
277 @param[in] data the buffer to read from
278 @param[in] data_length the maximum number of bytes to read from data
279 @param[out] length the length that was read
280 @param[out] num the number of bytes needed to represent the length
281 @return false on success, true if the variable length field is ill-formed
282 */
read_variable_length(const char * data,size_t data_length,uint32 * length,uint8 * num)283 static bool read_variable_length(const char *data, size_t data_length,
284 uint32 *length, uint8 *num) {
285 /*
286 It takes five bytes to represent UINT_MAX32, which is the largest
287 supported length, so don't look any further.
288 */
289 const size_t max_bytes = std::min(data_length, static_cast<size_t>(5));
290
291 size_t len = 0;
292 for (size_t i = 0; i < max_bytes; i++) {
293 // Get the next 7 bits of the length.
294 len |= (data[i] & 0x7f) << (7 * i);
295 if ((data[i] & 0x80) == 0) {
296 // The length shouldn't exceed 32 bits.
297 if (len > UINT_MAX32) return true; /* purecov: inspected */
298
299 // This was the last byte. Return successfully.
300 *num = static_cast<uint8>(i + 1);
301 *length = static_cast<uint32>(len);
302 return false;
303 }
304 }
305
306 // No more available bytes. Return true to signal error.
307 return true; /* purecov: inspected */
308 }
309
310 /**
311 Check if the specified offset or size is too big to store in the
312 binary JSON format.
313
314 If the small storage format is used, the caller is expected to retry
315 serialization in the large storage format, so no error is generated
316 if the offset or size is too big. If the large storage format is
317 used, an error will be generated if the offset or size is too big.
318
319 @param offset_or_size the offset or size to check
320 @param large if true, we are using the large storage format
321 for JSON arrays and objects, which allows offsets and sizes that
322 fit in a uint32; otherwise, we are using the small storage format,
323 which allow offsets and sizes that fit in a uint16.
324 @return true if offset_or_size is too big for the format, false
325 otherwise
326 */
327 #ifdef MYSQL_SERVER
is_too_big_for_json(size_t offset_or_size,bool large)328 static bool is_too_big_for_json(size_t offset_or_size, bool large) {
329 if (offset_or_size > UINT_MAX16) {
330 if (!large) return true;
331 return check_document_size(offset_or_size);
332 }
333
334 return false;
335 }
336
337 /**
338 Append all the key entries of a JSON object to a destination string.
339 The key entries are just a series of offset/length pairs that point
340 to where the actual key names are stored.
341
342 @param[in] object the JSON object
343 @param[out] dest the destination string
344 @param[in] offset the offset of the first key
345 @param[in] large if true, the large storage format will be used
346 @return serialization status
347 */
append_key_entries(const Json_object * object,String * dest,size_t offset,bool large)348 static enum_serialization_result append_key_entries(const Json_object *object,
349 String *dest, size_t offset,
350 bool large) {
351 #ifndef DBUG_OFF
352 const std::string *prev_key = nullptr;
353 #endif
354
355 // Add the key entries.
356 for (Json_object::const_iterator it = object->begin(); it != object->end();
357 ++it) {
358 const std::string *key = &it->first;
359 size_t len = key->length();
360
361 #ifndef DBUG_OFF
362 // Check that the DOM returns the keys in the correct order.
363 if (prev_key) {
364 DBUG_ASSERT(prev_key->length() <= len);
365 if (len == prev_key->length())
366 DBUG_ASSERT(memcmp(prev_key->data(), key->data(), len) < 0);
367 }
368 prev_key = key;
369 #endif
370
371 // We only have two bytes for the key size. Check if the key is too big.
372 if (len > UINT_MAX16) {
373 my_error(ER_JSON_KEY_TOO_BIG, MYF(0));
374 return FAILURE;
375 }
376
377 if (is_too_big_for_json(offset, large))
378 return VALUE_TOO_BIG; /* purecov: inspected */
379
380 if (append_offset_or_size(dest, offset, large) ||
381 append_int16(dest, static_cast<int16>(len)))
382 return FAILURE; /* purecov: inspected */
383 offset += len;
384 }
385
386 return OK;
387 }
388 #endif // ifdef MYSQL_SERVER
389
390 /**
391 Will a value of the specified type be inlined?
392 @param type the type to check
393 @param large true if the large storage format is used
394 @return true if the value will be inlined
395 */
inlined_type(uint8 type,bool large)396 static bool inlined_type(uint8 type, bool large) {
397 switch (type) {
398 case JSONB_TYPE_LITERAL:
399 case JSONB_TYPE_INT16:
400 case JSONB_TYPE_UINT16:
401 return true;
402 case JSONB_TYPE_INT32:
403 case JSONB_TYPE_UINT32:
404 return large;
405 default:
406 return false;
407 }
408 }
409
410 /**
411 Get the size of an offset value.
412 @param large true if the large storage format is used
413 @return the size of an offset
414 */
offset_size(bool large)415 static uint8 offset_size(bool large) {
416 return large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
417 }
418
419 /**
420 Get the size of a key entry.
421 @param large true if the large storage format is used
422 @return the size of a key entry
423 */
key_entry_size(bool large)424 static uint8 key_entry_size(bool large) {
425 return large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
426 }
427
428 /**
429 Get the size of a value entry.
430 @param large true if the large storage format is used
431 @return the size of a value entry
432 */
value_entry_size(bool large)433 static uint8 value_entry_size(bool large) {
434 return large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
435 }
436
437 /**
438 Attempt to inline a value in its value entry at the beginning of an
439 object or an array. This function assumes that the destination
440 string has already allocated enough space to hold the inlined value.
441
442 @param[in] value the JSON value
443 @param[out] dest the destination string
444 @param[in] pos the offset where the value should be inlined
445 @param[in] large true if the large storage format is used
446 @return true if the value was inlined, false if it was not
447 */
448 #ifdef MYSQL_SERVER
attempt_inline_value(const Json_dom * value,String * dest,size_t pos,bool large)449 static bool attempt_inline_value(const Json_dom *value, String *dest,
450 size_t pos, bool large) {
451 int32 inlined_val;
452 char inlined_type;
453 switch (value->json_type()) {
454 case enum_json_type::J_NULL:
455 inlined_val = JSONB_NULL_LITERAL;
456 inlined_type = JSONB_TYPE_LITERAL;
457 break;
458 case enum_json_type::J_BOOLEAN:
459 inlined_val = down_cast<const Json_boolean *>(value)->value()
460 ? JSONB_TRUE_LITERAL
461 : JSONB_FALSE_LITERAL;
462 inlined_type = JSONB_TYPE_LITERAL;
463 break;
464 case enum_json_type::J_INT: {
465 const Json_int *i = down_cast<const Json_int *>(value);
466 if (!i->is_16bit() && !(large && i->is_32bit()))
467 return false; // cannot inline this value
468 inlined_val = static_cast<int32>(i->value());
469 inlined_type = i->is_16bit() ? JSONB_TYPE_INT16 : JSONB_TYPE_INT32;
470 break;
471 }
472 case enum_json_type::J_UINT: {
473 const Json_uint *i = down_cast<const Json_uint *>(value);
474 if (!i->is_16bit() && !(large && i->is_32bit()))
475 return false; // cannot inline this value
476 inlined_val = static_cast<int32>(i->value());
477 inlined_type = i->is_16bit() ? JSONB_TYPE_UINT16 : JSONB_TYPE_UINT32;
478 break;
479 }
480 default:
481 return false; // cannot inline value of this type
482 }
483
484 (*dest)[pos] = inlined_type;
485 insert_offset_or_size(dest, pos + 1, inlined_val, large);
486 return true;
487 }
488
489 /**
490 Serialize a JSON array at the end of the destination string.
491
492 @param thd THD handle
493 @param array the JSON array to serialize
494 @param dest the destination string
495 @param large if true, the large storage format will be used
496 @param depth the current nesting level
497 @return serialization status
498 */
serialize_json_array(const THD * thd,const Json_array * array,String * dest,bool large,size_t depth)499 static enum_serialization_result serialize_json_array(const THD *thd,
500 const Json_array *array,
501 String *dest, bool large,
502 size_t depth) {
503 if (check_stack_overrun(thd, STACK_MIN_SIZE, nullptr))
504 return FAILURE; /* purecov: inspected */
505
506 const size_t start_pos = dest->length();
507 const size_t size = array->size();
508
509 if (check_json_depth(++depth)) {
510 return FAILURE;
511 }
512
513 if (is_too_big_for_json(size, large)) return VALUE_TOO_BIG;
514
515 // First write the number of elements in the array.
516 if (append_offset_or_size(dest, size, large))
517 return FAILURE; /* purecov: inspected */
518
519 // Reserve space for the size of the array in bytes. To be filled in later.
520 const size_t size_pos = dest->length();
521 if (append_offset_or_size(dest, 0, large))
522 return FAILURE; /* purecov: inspected */
523
524 size_t entry_pos = dest->length();
525
526 // Reserve space for the value entries at the beginning of the array.
527 const auto entry_size = value_entry_size(large);
528 if (dest->fill(dest->length() + size * entry_size, 0))
529 return FAILURE; /* purecov: inspected */
530
531 for (const auto &child : *array) {
532 const Json_dom *elt = child.get();
533 if (!attempt_inline_value(elt, dest, entry_pos, large)) {
534 size_t offset = dest->length() - start_pos;
535 if (is_too_big_for_json(offset, large)) return VALUE_TOO_BIG;
536 insert_offset_or_size(dest, entry_pos + 1, offset, large);
537 auto res = serialize_json_value(thd, elt, entry_pos, dest, depth, !large);
538 if (res != OK) return res;
539 }
540 entry_pos += entry_size;
541 }
542
543 // Finally, write the size of the object in bytes.
544 size_t bytes = dest->length() - start_pos;
545 if (is_too_big_for_json(bytes, large))
546 return VALUE_TOO_BIG; /* purecov: inspected */
547 insert_offset_or_size(dest, size_pos, bytes, large);
548
549 return OK;
550 }
551
552 /**
553 Serialize a JSON object at the end of the destination string.
554
555 @param thd THD handle
556 @param object the JSON object to serialize
557 @param dest the destination string
558 @param large if true, the large storage format will be used
559 @param depth the current nesting level
560 @return serialization status
561 */
serialize_json_object(const THD * thd,const Json_object * object,String * dest,bool large,size_t depth)562 static enum_serialization_result serialize_json_object(
563 const THD *thd, const Json_object *object, String *dest, bool large,
564 size_t depth) {
565 if (check_stack_overrun(thd, STACK_MIN_SIZE, nullptr))
566 return FAILURE; /* purecov: inspected */
567
568 const size_t start_pos = dest->length();
569 const size_t size = object->cardinality();
570
571 if (check_json_depth(++depth)) {
572 return FAILURE;
573 }
574
575 if (is_too_big_for_json(size, large))
576 return VALUE_TOO_BIG; /* purecov: inspected */
577
578 // First write the number of members in the object.
579 if (append_offset_or_size(dest, size, large))
580 return FAILURE; /* purecov: inspected */
581
582 // Reserve space for the size of the object in bytes. To be filled in later.
583 const size_t size_pos = dest->length();
584 if (append_offset_or_size(dest, 0, large))
585 return FAILURE; /* purecov: inspected */
586
587 const auto key_entry_size = json_binary::key_entry_size(large);
588 const auto value_entry_size = json_binary::value_entry_size(large);
589
590 /*
591 Calculate the offset of the first key relative to the start of the
592 object. The first key comes right after the value entries.
593 */
594 const size_t first_key_offset =
595 dest->length() + size * (key_entry_size + value_entry_size) - start_pos;
596
597 // Append all the key entries.
598 enum_serialization_result res =
599 append_key_entries(object, dest, first_key_offset, large);
600 if (res != OK) return res;
601
602 const size_t start_of_value_entries = dest->length();
603
604 // Reserve space for the value entries. Will be filled in later.
605 dest->fill(dest->length() + size * value_entry_size, 0);
606
607 // Add the actual keys.
608 for (const auto &member : *object) {
609 if (dest->append(member.first.c_str(), member.first.length()))
610 return FAILURE; /* purecov: inspected */
611 }
612
613 // Add the values, and update the value entries accordingly.
614 size_t entry_pos = start_of_value_entries;
615 for (const auto &member : *object) {
616 const Json_dom *child = member.second.get();
617 if (!attempt_inline_value(child, dest, entry_pos, large)) {
618 size_t offset = dest->length() - start_pos;
619 if (is_too_big_for_json(offset, large)) return VALUE_TOO_BIG;
620 insert_offset_or_size(dest, entry_pos + 1, offset, large);
621 res = serialize_json_value(thd, child, entry_pos, dest, depth, !large);
622 if (res != OK) return res;
623 }
624 entry_pos += value_entry_size;
625 }
626
627 // Finally, write the size of the object in bytes.
628 size_t bytes = dest->length() - start_pos;
629 if (is_too_big_for_json(bytes, large)) return VALUE_TOO_BIG;
630 insert_offset_or_size(dest, size_pos, bytes, large);
631
632 return OK;
633 }
634
635 /**
636 Serialize a JSON opaque value at the end of the destination string.
637 @param[in] opaque the JSON opaque value
638 @param[in] type_pos where to write the type specifier
639 @param[out] dest the destination string
640 @return serialization status
641 */
serialize_opaque(const Json_opaque * opaque,size_t type_pos,String * dest)642 static enum_serialization_result serialize_opaque(const Json_opaque *opaque,
643 size_t type_pos,
644 String *dest) {
645 DBUG_ASSERT(type_pos < dest->length());
646 if (dest->append(static_cast<char>(opaque->type())) ||
647 append_variable_length(dest, opaque->size()) ||
648 dest->append(opaque->value(), opaque->size()))
649 return FAILURE; /* purecov: inspected */
650 (*dest)[type_pos] = JSONB_TYPE_OPAQUE;
651 return OK;
652 }
653
654 /**
655 Serialize a DECIMAL value at the end of the destination string.
656 @param[in] jd the DECIMAL value
657 @param[in] type_pos where to write the type specifier
658 @param[out] dest the destination string
659 @return serialization status
660 */
serialize_decimal(const Json_decimal * jd,size_t type_pos,String * dest)661 static enum_serialization_result serialize_decimal(const Json_decimal *jd,
662 size_t type_pos,
663 String *dest) {
664 // Store DECIMALs as opaque values.
665 const int bin_size = jd->binary_size();
666 char buf[Json_decimal::MAX_BINARY_SIZE];
667 if (jd->get_binary(buf)) return FAILURE; /* purecov: inspected */
668 Json_opaque o(MYSQL_TYPE_NEWDECIMAL, buf, bin_size);
669 return serialize_opaque(&o, type_pos, dest);
670 }
671
672 /**
673 Serialize a DATETIME value at the end of the destination string.
674 @param[in] jdt the DATETIME value
675 @param[in] type_pos where to write the type specifier
676 @param[out] dest the destination string
677 @return serialization status
678 */
serialize_datetime(const Json_datetime * jdt,size_t type_pos,String * dest)679 static enum_serialization_result serialize_datetime(const Json_datetime *jdt,
680 size_t type_pos,
681 String *dest) {
682 // Store datetime as opaque values.
683 char buf[Json_datetime::PACKED_SIZE];
684 jdt->to_packed(buf);
685 Json_opaque o(jdt->field_type(), buf, sizeof(buf));
686 return serialize_opaque(&o, type_pos, dest);
687 }
688
689 /**
690 Serialize a JSON value at the end of the destination string.
691
692 Also go back and update the type specifier for the value to specify
693 the correct type. For top-level documents, the type specifier is
694 located in the byte right in front of the value. For documents that
695 are nested within other documents, the type specifier is located in
696 the value entry portion at the beginning of the parent document.
697
698 @param thd THD handle
699 @param dom the JSON value to serialize
700 @param type_pos the position of the type specifier to update
701 @param dest the destination string
702 @param depth the current nesting level
703 @param small_parent
704 tells if @a dom is contained in an array or object
705 which is stored in the small storage format
706 @return serialization status
707 */
serialize_json_value(const THD * thd,const Json_dom * dom,size_t type_pos,String * dest,size_t depth,bool small_parent)708 static enum_serialization_result serialize_json_value(
709 const THD *thd, const Json_dom *dom, size_t type_pos, String *dest,
710 size_t depth, bool small_parent) {
711 const size_t start_pos = dest->length();
712 DBUG_ASSERT(type_pos < start_pos);
713
714 enum_serialization_result result;
715
716 switch (dom->json_type()) {
717 case enum_json_type::J_ARRAY: {
718 const Json_array *array = down_cast<const Json_array *>(dom);
719 (*dest)[type_pos] = JSONB_TYPE_SMALL_ARRAY;
720 result = serialize_json_array(thd, array, dest, false, depth);
721 /*
722 If the array was too large to fit in the small storage format,
723 reset the destination buffer and retry with the large storage
724 format.
725
726 Possible future optimization: Analyze size up front and pick the
727 correct format on the first attempt, so that we don't have to
728 redo parts of the serialization.
729 */
730 if (result == VALUE_TOO_BIG) {
731 // If the parent uses the small storage format, it needs to grow too.
732 if (small_parent) return VALUE_TOO_BIG;
733 dest->length(start_pos);
734 (*dest)[type_pos] = JSONB_TYPE_LARGE_ARRAY;
735 result = serialize_json_array(thd, array, dest, true, depth);
736 }
737 break;
738 }
739 case enum_json_type::J_OBJECT: {
740 const Json_object *object = down_cast<const Json_object *>(dom);
741 (*dest)[type_pos] = JSONB_TYPE_SMALL_OBJECT;
742 result = serialize_json_object(thd, object, dest, false, depth);
743 /*
744 If the object was too large to fit in the small storage format,
745 reset the destination buffer and retry with the large storage
746 format.
747
748 Possible future optimization: Analyze size up front and pick the
749 correct format on the first attempt, so that we don't have to
750 redo parts of the serialization.
751 */
752 if (result == VALUE_TOO_BIG) {
753 // If the parent uses the small storage format, it needs to grow too.
754 if (small_parent) return VALUE_TOO_BIG;
755 dest->length(start_pos);
756 (*dest)[type_pos] = JSONB_TYPE_LARGE_OBJECT;
757 result = serialize_json_object(thd, object, dest, true, depth);
758 }
759 break;
760 }
761 case enum_json_type::J_STRING: {
762 const Json_string *jstr = down_cast<const Json_string *>(dom);
763 size_t size = jstr->size();
764 if (append_variable_length(dest, size) ||
765 dest->append(jstr->value().c_str(), size))
766 return FAILURE; /* purecov: inspected */
767 (*dest)[type_pos] = JSONB_TYPE_STRING;
768 result = OK;
769 break;
770 }
771 case enum_json_type::J_INT: {
772 const Json_int *i = down_cast<const Json_int *>(dom);
773 longlong val = i->value();
774 if (i->is_16bit()) {
775 if (append_int16(dest, static_cast<int16>(val)))
776 return FAILURE; /* purecov: inspected */
777 (*dest)[type_pos] = JSONB_TYPE_INT16;
778 } else if (i->is_32bit()) {
779 if (append_int32(dest, static_cast<int32>(val)))
780 return FAILURE; /* purecov: inspected */
781 (*dest)[type_pos] = JSONB_TYPE_INT32;
782 } else {
783 if (append_int64(dest, val)) return FAILURE; /* purecov: inspected */
784 (*dest)[type_pos] = JSONB_TYPE_INT64;
785 }
786 result = OK;
787 break;
788 }
789 case enum_json_type::J_UINT: {
790 const Json_uint *i = down_cast<const Json_uint *>(dom);
791 ulonglong val = i->value();
792 if (i->is_16bit()) {
793 if (append_int16(dest, static_cast<int16>(val)))
794 return FAILURE; /* purecov: inspected */
795 (*dest)[type_pos] = JSONB_TYPE_UINT16;
796 } else if (i->is_32bit()) {
797 if (append_int32(dest, static_cast<int32>(val)))
798 return FAILURE; /* purecov: inspected */
799 (*dest)[type_pos] = JSONB_TYPE_UINT32;
800 } else {
801 if (append_int64(dest, val)) return FAILURE; /* purecov: inspected */
802 (*dest)[type_pos] = JSONB_TYPE_UINT64;
803 }
804 result = OK;
805 break;
806 }
807 case enum_json_type::J_DOUBLE: {
808 // Store the double in a platform-independent eight-byte format.
809 const Json_double *d = down_cast<const Json_double *>(dom);
810 if (reserve(dest, 8)) return FAILURE; /* purecov: inspected */
811 float8store(dest->ptr() + dest->length(), d->value());
812 dest->length(dest->length() + 8);
813 (*dest)[type_pos] = JSONB_TYPE_DOUBLE;
814 result = OK;
815 break;
816 }
817 case enum_json_type::J_NULL:
818 if (dest->append(JSONB_NULL_LITERAL))
819 return FAILURE; /* purecov: inspected */
820 (*dest)[type_pos] = JSONB_TYPE_LITERAL;
821 result = OK;
822 break;
823 case enum_json_type::J_BOOLEAN: {
824 char c = (down_cast<const Json_boolean *>(dom)->value())
825 ? JSONB_TRUE_LITERAL
826 : JSONB_FALSE_LITERAL;
827 if (dest->append(c)) return FAILURE; /* purecov: inspected */
828 (*dest)[type_pos] = JSONB_TYPE_LITERAL;
829 result = OK;
830 break;
831 }
832 case enum_json_type::J_OPAQUE:
833 result =
834 serialize_opaque(down_cast<const Json_opaque *>(dom), type_pos, dest);
835 break;
836 case enum_json_type::J_DECIMAL:
837 result = serialize_decimal(down_cast<const Json_decimal *>(dom), type_pos,
838 dest);
839 break;
840 case enum_json_type::J_DATETIME:
841 case enum_json_type::J_DATE:
842 case enum_json_type::J_TIME:
843 case enum_json_type::J_TIMESTAMP:
844 result = serialize_datetime(down_cast<const Json_datetime *>(dom),
845 type_pos, dest);
846 break;
847 default:
848 /* purecov: begin deadcode */
849 DBUG_ASSERT(false);
850 my_error(ER_INTERNAL_ERROR, MYF(0), "JSON serialization failed");
851 return FAILURE;
852 /* purecov: end */
853 }
854
855 if (result == OK && dest->length() > thd->variables.max_allowed_packet) {
856 my_error(ER_WARN_ALLOWED_PACKET_OVERFLOWED, MYF(0),
857 "json_binary::serialize", thd->variables.max_allowed_packet);
858 return FAILURE;
859 }
860
861 return result;
862 }
863 #endif // ifdef MYSQL_SERVER
864
is_valid() const865 bool Value::is_valid() const {
866 switch (m_type) {
867 case ERROR:
868 return false;
869 case ARRAY:
870 // Check that all the array elements are valid.
871 for (size_t i = 0; i < element_count(); i++)
872 if (!element(i).is_valid()) return false; /* purecov: inspected */
873 return true;
874 case OBJECT: {
875 /*
876 Check that all keys and values are valid, and that the keys come
877 in the correct order.
878 */
879 const char *prev_key = nullptr;
880 size_t prev_key_len = 0;
881 for (size_t i = 0; i < element_count(); i++) {
882 Value k = key(i);
883 if (!k.is_valid() || !element(i).is_valid())
884 return false; /* purecov: inspected */
885 const char *curr_key = k.get_data();
886 size_t curr_key_len = k.get_data_length();
887 if (i > 0) {
888 if (prev_key_len > curr_key_len)
889 return false; /* purecov: inspected */
890 if (prev_key_len == curr_key_len &&
891 (memcmp(prev_key, curr_key, curr_key_len) >= 0))
892 return false; /* purecov: inspected */
893 }
894 prev_key = curr_key;
895 prev_key_len = curr_key_len;
896 }
897 return true;
898 }
899 default:
900 // This is a valid scalar value.
901 return true;
902 }
903 }
904
905 /**
906 Create a Value object that represents an error condition.
907 */
err()908 static Value err() { return Value(Value::ERROR); }
909
910 /**
911 Parse a JSON scalar value.
912
913 @param type the binary type of the scalar
914 @param data pointer to the start of the binary representation of the scalar
915 @param len the maximum number of bytes to read from data
916 @return an object that represents the scalar value
917 */
parse_scalar(uint8 type,const char * data,size_t len)918 static Value parse_scalar(uint8 type, const char *data, size_t len) {
919 switch (type) {
920 case JSONB_TYPE_LITERAL:
921 if (len < 1) return err(); /* purecov: inspected */
922 switch (static_cast<uint8>(*data)) {
923 case JSONB_NULL_LITERAL:
924 return Value(Value::LITERAL_NULL);
925 case JSONB_TRUE_LITERAL:
926 return Value(Value::LITERAL_TRUE);
927 case JSONB_FALSE_LITERAL:
928 return Value(Value::LITERAL_FALSE);
929 default:
930 return err(); /* purecov: inspected */
931 }
932 case JSONB_TYPE_INT16:
933 if (len < 2) return err(); /* purecov: inspected */
934 return Value(Value::INT, sint2korr(data));
935 case JSONB_TYPE_INT32:
936 if (len < 4) return err(); /* purecov: inspected */
937 return Value(Value::INT, sint4korr(data));
938 case JSONB_TYPE_INT64:
939 if (len < 8) return err(); /* purecov: inspected */
940 return Value(Value::INT, sint8korr(data));
941 case JSONB_TYPE_UINT16:
942 if (len < 2) return err(); /* purecov: inspected */
943 return Value(Value::UINT, uint2korr(data));
944 case JSONB_TYPE_UINT32:
945 if (len < 4) return err(); /* purecov: inspected */
946 return Value(Value::UINT, uint4korr(data));
947 case JSONB_TYPE_UINT64:
948 if (len < 8) return err(); /* purecov: inspected */
949 return Value(Value::UINT, uint8korr(data));
950 case JSONB_TYPE_DOUBLE: {
951 if (len < 8) return err(); /* purecov: inspected */
952 return Value(float8get(data));
953 }
954 case JSONB_TYPE_STRING: {
955 uint32 str_len;
956 uint8 n;
957 if (read_variable_length(data, len, &str_len, &n))
958 return err(); /* purecov: inspected */
959 if (len < n + str_len) return err(); /* purecov: inspected */
960 return Value(data + n, str_len);
961 }
962 case JSONB_TYPE_OPAQUE: {
963 /*
964 There should always be at least one byte, which tells the field
965 type of the opaque value.
966 */
967 if (len < 1) return err(); /* purecov: inspected */
968
969 // The type is encoded as a uint8 that maps to an enum_field_types.
970 uint8 type_byte = static_cast<uint8>(*data);
971 enum_field_types field_type = static_cast<enum_field_types>(type_byte);
972
973 // Then there's the length of the value.
974 uint32 val_len;
975 uint8 n;
976 if (read_variable_length(data + 1, len - 1, &val_len, &n))
977 return err(); /* purecov: inspected */
978 if (len < 1 + n + val_len) return err(); /* purecov: inspected */
979 return Value(field_type, data + 1 + n, val_len);
980 }
981 default:
982 // Not a valid scalar type.
983 return err();
984 }
985 }
986
987 /**
988 Read an offset or size field from a buffer. The offset could be either
989 a two byte unsigned integer or a four byte unsigned integer.
990
991 @param data the buffer to read from
992 @param large tells if the large or small storage format is used; true
993 means read four bytes, false means read two bytes
994 */
read_offset_or_size(const char * data,bool large)995 static uint32 read_offset_or_size(const char *data, bool large) {
996 return large ? uint4korr(data) : uint2korr(data);
997 }
998
999 /**
1000 Parse a JSON array or object.
1001
1002 @param t type (either ARRAY or OBJECT)
1003 @param data pointer to the start of the array or object
1004 @param len the maximum number of bytes to read from data
1005 @param large if true, the array or object is stored using the large
1006 storage format; otherwise, it is stored using the small
1007 storage format
1008 @return an object that allows access to the array or object
1009 */
parse_array_or_object(Value::enum_type t,const char * data,size_t len,bool large)1010 static Value parse_array_or_object(Value::enum_type t, const char *data,
1011 size_t len, bool large) {
1012 DBUG_ASSERT(t == Value::ARRAY || t == Value::OBJECT);
1013
1014 /*
1015 Make sure the document is long enough to contain the two length fields
1016 (both number of elements or members, and number of bytes).
1017 */
1018 const auto offset_size = json_binary::offset_size(large);
1019 if (len < 2 * offset_size) return err();
1020 const uint32 element_count = read_offset_or_size(data, large);
1021 const uint32 bytes = read_offset_or_size(data + offset_size, large);
1022
1023 // The value can't have more bytes than what's available in the data buffer.
1024 if (bytes > len) return err();
1025
1026 /*
1027 Calculate the size of the header. It consists of:
1028 - two length fields
1029 - if it is a JSON object, key entries with pointers to where the keys
1030 are stored
1031 - value entries with pointers to where the actual values are stored
1032 */
1033 size_t header_size = 2 * offset_size;
1034 if (t == Value::OBJECT) header_size += element_count * key_entry_size(large);
1035 header_size += element_count * value_entry_size(large);
1036
1037 // The header should not be larger than the full size of the value.
1038 if (header_size > bytes) return err(); /* purecov: inspected */
1039
1040 return Value(t, data, bytes, element_count, large);
1041 }
1042
1043 /**
1044 Parse a JSON value within a larger JSON document.
1045
1046 @param type the binary type of the value to parse
1047 @param data pointer to the start of the binary representation of the value
1048 @param len the maximum number of bytes to read from data
1049 @return an object that allows access to the value
1050 */
parse_value(uint8 type,const char * data,size_t len)1051 static Value parse_value(uint8 type, const char *data, size_t len) {
1052 switch (type) {
1053 case JSONB_TYPE_SMALL_OBJECT:
1054 return parse_array_or_object(Value::OBJECT, data, len, false);
1055 case JSONB_TYPE_LARGE_OBJECT:
1056 return parse_array_or_object(Value::OBJECT, data, len, true);
1057 case JSONB_TYPE_SMALL_ARRAY:
1058 return parse_array_or_object(Value::ARRAY, data, len, false);
1059 case JSONB_TYPE_LARGE_ARRAY:
1060 return parse_array_or_object(Value::ARRAY, data, len, true);
1061 default:
1062 return parse_scalar(type, data, len);
1063 }
1064 }
1065
parse_binary(const char * data,size_t len)1066 Value parse_binary(const char *data, size_t len) {
1067 DBUG_TRACE;
1068 /*
1069 Each document should start with a one-byte type specifier, so an
1070 empty document is invalid according to the format specification.
1071 Empty documents may appear due to inserts using the IGNORE keyword
1072 or with non-strict SQL mode, which will insert an empty string if
1073 the value NULL is inserted into a NOT NULL column. We choose to
1074 interpret empty values as the JSON null literal.
1075 */
1076 if (len == 0) return Value(Value::LITERAL_NULL);
1077
1078 Value ret = parse_value(data[0], data + 1, len - 1);
1079 return ret;
1080 }
1081
1082 /**
1083 Get the element at the specified position of a JSON array or a JSON
1084 object. When called on a JSON object, it returns the value
1085 associated with the key returned by key(pos).
1086
1087 @param pos the index of the element
1088 @return a value representing the specified element, or a value where
1089 type() returns ERROR if pos does not point to an element
1090 */
element(size_t pos) const1091 Value Value::element(size_t pos) const {
1092 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1093
1094 if (pos >= m_element_count) return err();
1095
1096 const auto entry_size = value_entry_size(m_large);
1097 const auto entry_offset = value_entry_offset(pos);
1098
1099 uint8 type = m_data[entry_offset];
1100
1101 /*
1102 Check if this is an inlined scalar value. If so, return it.
1103 The scalar will be inlined just after the byte that identifies the
1104 type, so it's found on entry_offset + 1.
1105 */
1106 if (inlined_type(type, m_large))
1107 return parse_scalar(type, m_data + entry_offset + 1, entry_size - 1);
1108
1109 /*
1110 Otherwise, it's a non-inlined value, and the offset to where the value
1111 is stored, can be found right after the type byte in the entry.
1112 */
1113 uint32 value_offset = read_offset_or_size(m_data + entry_offset + 1, m_large);
1114
1115 if (m_length < value_offset || value_offset < entry_offset + entry_size)
1116 return err(); /* purecov: inspected */
1117
1118 return parse_value(type, m_data + value_offset, m_length - value_offset);
1119 }
1120
1121 /**
1122 Get the key of the member stored at the specified position in a JSON
1123 object.
1124
1125 @param pos the index of the member
1126 @return the key of the specified member, or a value where type()
1127 returns ERROR if pos does not point to a member
1128 */
key(size_t pos) const1129 Value Value::key(size_t pos) const {
1130 DBUG_ASSERT(m_type == OBJECT);
1131
1132 if (pos >= m_element_count) return err();
1133
1134 const auto offset_size = json_binary::offset_size(m_large);
1135 const auto key_entry_size = json_binary::key_entry_size(m_large);
1136 const auto value_entry_size = json_binary::value_entry_size(m_large);
1137
1138 // The key entries are located after two length fields of size offset_size.
1139 const size_t entry_offset = key_entry_offset(pos);
1140
1141 // The offset of the key is the first part of the key entry.
1142 const uint32 key_offset = read_offset_or_size(m_data + entry_offset, m_large);
1143
1144 // The length of the key is the second part of the entry, always two bytes.
1145 const uint16 key_length = uint2korr(m_data + entry_offset + offset_size);
1146
1147 /*
1148 The key must start somewhere after the last value entry, and it must
1149 end before the end of the m_data buffer.
1150 */
1151 if ((key_offset < entry_offset + (m_element_count - pos) * key_entry_size +
1152 m_element_count * value_entry_size) ||
1153 (m_length < key_offset + key_length))
1154 return err(); /* purecov: inspected */
1155
1156 return Value(m_data + key_offset, key_length);
1157 }
1158
1159 /**
1160 Get the value associated with the specified key in a JSON object.
1161
1162 @param[in] key the key to look up
1163 @param[in] length the length of the key
1164 @return the value associated with the key, if there is one. otherwise,
1165 returns ERROR
1166 */
lookup(const char * key,size_t length) const1167 Value Value::lookup(const char *key, size_t length) const {
1168 size_t index = lookup_index(key, length);
1169 if (index == element_count()) return err();
1170 return element(index);
1171 }
1172
1173 /**
1174 Get the index of the element with the specified key in a JSON object.
1175
1176 @param[in] key the key to look up
1177 @param[in] length the length of the key
1178 @return the index if the key is found, or `element_count()` if the
1179 key is not found
1180 */
lookup_index(const char * key,size_t length) const1181 size_t Value::lookup_index(const char *key, size_t length) const {
1182 DBUG_ASSERT(m_type == OBJECT);
1183
1184 const auto offset_size = json_binary::offset_size(m_large);
1185 const auto entry_size = key_entry_size(m_large);
1186
1187 const size_t first_entry_offset = key_entry_offset(0);
1188
1189 size_t lo = 0U; // lower bound for binary search (inclusive)
1190 size_t hi = m_element_count; // upper bound for binary search (exclusive)
1191
1192 while (lo < hi) {
1193 // Find the entry in the middle of the search interval.
1194 size_t idx = (lo + hi) / 2;
1195 size_t entry_offset = first_entry_offset + idx * entry_size;
1196
1197 // Keys are ordered on length, so check length first.
1198 size_t key_len = uint2korr(m_data + entry_offset + offset_size);
1199 if (length > key_len) {
1200 lo = idx + 1;
1201 } else if (length < key_len) {
1202 hi = idx;
1203 } else {
1204 // The keys had the same length, so compare their contents.
1205 size_t key_offset = read_offset_or_size(m_data + entry_offset, m_large);
1206
1207 int cmp = memcmp(key, m_data + key_offset, key_len);
1208 if (cmp > 0)
1209 lo = idx + 1;
1210 else if (cmp < 0)
1211 hi = idx;
1212 else
1213 return idx;
1214 }
1215 }
1216
1217 return m_element_count; // not found
1218 }
1219
1220 /**
1221 Is this binary value pointing to data that is contained in the specified
1222 string.
1223
1224 @param str a string with binary data
1225 @retval true if the string contains data pointed to from this object
1226 @retval false otherwise
1227 */
is_backed_by(const String * str) const1228 bool Value::is_backed_by(const String *str) const {
1229 /*
1230 The m_data member is only valid for objects, arrays, strings and opaque
1231 values. Other types have copied the necessary data into the Value object
1232 and do not depend on data in any String object.
1233 */
1234 switch (m_type) {
1235 case OBJECT:
1236 case ARRAY:
1237 case STRING:
1238 case OPAQUE:
1239 return m_data >= str->ptr() && m_data < str->ptr() + str->length();
1240 default:
1241 return false;
1242 }
1243 }
1244
1245 /**
1246 Copy the binary representation of this value into a buffer,
1247 replacing the contents of the receiving buffer.
1248
1249 @param thd THD handle
1250 @param buf the receiving buffer
1251 @return false on success, true otherwise
1252 */
1253 #ifdef MYSQL_SERVER
raw_binary(const THD * thd,String * buf) const1254 bool Value::raw_binary(const THD *thd, String *buf) const {
1255 // It's not safe to overwrite ourselves.
1256 DBUG_ASSERT(!is_backed_by(buf));
1257
1258 // Reset the buffer.
1259 buf->length(0);
1260 buf->set_charset(&my_charset_bin);
1261
1262 switch (m_type) {
1263 case OBJECT:
1264 case ARRAY: {
1265 char tp = m_large ? (m_type == OBJECT ? JSONB_TYPE_LARGE_OBJECT
1266 : JSONB_TYPE_LARGE_ARRAY)
1267 : (m_type == OBJECT ? JSONB_TYPE_SMALL_OBJECT
1268 : JSONB_TYPE_SMALL_ARRAY);
1269 return buf->append(tp) || buf->append(m_data, m_length);
1270 }
1271 case STRING:
1272 return buf->append(JSONB_TYPE_STRING) ||
1273 append_variable_length(buf, m_length) ||
1274 buf->append(m_data, m_length);
1275 case INT: {
1276 Json_int i(get_int64());
1277 return serialize(thd, &i, buf) != OK;
1278 }
1279 case UINT: {
1280 Json_uint i(get_uint64());
1281 return serialize(thd, &i, buf) != OK;
1282 }
1283 case DOUBLE: {
1284 Json_double d(get_double());
1285 return serialize(thd, &d, buf) != OK;
1286 }
1287 case LITERAL_NULL: {
1288 Json_null n;
1289 return serialize(thd, &n, buf) != OK;
1290 }
1291 case LITERAL_TRUE:
1292 case LITERAL_FALSE: {
1293 Json_boolean b(m_type == LITERAL_TRUE);
1294 return serialize(thd, &b, buf) != OK;
1295 }
1296 case OPAQUE:
1297 return buf->append(JSONB_TYPE_OPAQUE) || buf->append(field_type()) ||
1298 append_variable_length(buf, m_length) ||
1299 buf->append(m_data, m_length);
1300 case ERROR:
1301 break; /* purecov: inspected */
1302 }
1303
1304 /* purecov: begin deadcode */
1305 DBUG_ASSERT(false);
1306 return true;
1307 /* purecov: end */
1308 }
1309 #endif // ifdef MYSQL_SERVER
1310
1311 /**
1312 Find the start offset and the end offset of the specified element.
1313 @param[in] pos which element to check
1314 @param[out] start the start offset of the value
1315 @param[out] end the end offset of the value (exclusive)
1316 @param[out] inlined set to true if the specified element is inlined
1317 @return true if the offsets cannot be determined, false if successful
1318 */
element_offsets(size_t pos,size_t * start,size_t * end,bool * inlined) const1319 bool Value::element_offsets(size_t pos, size_t *start, size_t *end,
1320 bool *inlined) const {
1321 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1322 DBUG_ASSERT(pos < m_element_count);
1323
1324 const char *entry = m_data + value_entry_offset(pos);
1325 if (entry + value_entry_size(m_large) > m_data + m_length)
1326 return true; /* purecov: inspected */
1327
1328 if (inlined_type(*entry, m_large)) {
1329 *start = 0;
1330 *end = 0;
1331 *inlined = true;
1332 return false;
1333 }
1334
1335 const size_t val_pos = read_offset_or_size(entry + 1, m_large);
1336 if (val_pos >= m_length) return true;
1337
1338 size_t val_end = 0;
1339 switch (entry[0]) {
1340 case JSONB_TYPE_INT32:
1341 case JSONB_TYPE_UINT32:
1342 val_end = val_pos + 4;
1343 break;
1344 case JSONB_TYPE_INT64:
1345 case JSONB_TYPE_UINT64:
1346 case JSONB_TYPE_DOUBLE:
1347 val_end = val_pos + 8;
1348 break;
1349 case JSONB_TYPE_STRING:
1350 case JSONB_TYPE_OPAQUE:
1351 case JSONB_TYPE_SMALL_OBJECT:
1352 case JSONB_TYPE_LARGE_OBJECT:
1353 case JSONB_TYPE_SMALL_ARRAY:
1354 case JSONB_TYPE_LARGE_ARRAY: {
1355 Value v = element(pos);
1356 if (v.type() == ERROR) return true;
1357 val_end = (v.m_data - this->m_data) + v.m_length;
1358 } break;
1359 default:
1360 return true;
1361 }
1362
1363 *start = val_pos;
1364 *end = val_end;
1365 *inlined = false;
1366 return false;
1367 }
1368
1369 /**
1370 Find the lowest possible offset where a value can be located inside this
1371 array or object.
1372
1373 @param[out] offset the lowest offset where a value can be located
1374 @return false on success, true on error
1375 */
first_value_offset(size_t * offset) const1376 bool Value::first_value_offset(size_t *offset) const {
1377 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1378
1379 /*
1380 Find the lowest offset where a value could be stored. Arrays can
1381 store them right after the last value entry. Objects can store
1382 them right after the last key.
1383 */
1384 if (m_type == ARRAY || m_element_count == 0) {
1385 *offset = value_entry_offset(m_element_count);
1386 return false;
1387 }
1388
1389 Value key = this->key(m_element_count - 1);
1390 if (key.type() == ERROR) return true;
1391
1392 *offset = key.get_data() + key.get_data_length() - m_data;
1393 return false;
1394 }
1395
1396 /**
1397 Does this array or object have enough space to replace the value at
1398 the given position with another value of a given size?
1399
1400 @param[in] pos the position in the array or object
1401 @param[in] needed the number of bytes needed for the new value
1402 @param[out] offset if true is returned, this value is set to an
1403 offset relative to the start of the array or
1404 object, which tells where the replacement value
1405 should be stored
1406 @return true if there is enough space, false otherwise
1407 */
has_space(size_t pos,size_t needed,size_t * offset) const1408 bool Value::has_space(size_t pos, size_t needed, size_t *offset) const {
1409 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1410 DBUG_ASSERT(pos < m_element_count);
1411
1412 /*
1413 Find the lowest offset where a value could be stored. Arrays can
1414 store them right after the last value entry. Objects can store
1415 them right after the last key.
1416 */
1417 size_t first_value_offset;
1418 if (this->first_value_offset(&first_value_offset)) return false;
1419
1420 /*
1421 No need to check further if we need more space than the total
1422 space available in the array or object.
1423 */
1424 if (needed > m_length - first_value_offset) return false;
1425
1426 size_t val_start;
1427 size_t val_end;
1428 bool inlined;
1429 if (element_offsets(pos, &val_start, &val_end, &inlined)) return false;
1430
1431 if (!inlined && val_end - val_start >= needed) {
1432 // Found enough space at the position where the original value was located.
1433 *offset = val_start;
1434 return true;
1435 }
1436
1437 /*
1438 Need more space. Look for free space after the original value.
1439 There's potential free space after the end of the original value
1440 and up to the start of the next non-inlined value.
1441 */
1442 const auto entry_size = value_entry_size(m_large);
1443 size_t i = pos + 1;
1444 for (auto entry = m_data + value_entry_offset(pos); i < m_element_count;
1445 ++i) {
1446 entry += entry_size;
1447 // TODO Give up after N iterations?
1448 if (inlined_type(*entry, m_large)) continue;
1449 val_end = read_offset_or_size(entry + 1, m_large);
1450 if (val_end > m_length) return false;
1451 break;
1452 }
1453
1454 if (i == m_element_count) {
1455 /*
1456 There are no non-inlined values behind the one we are updating,
1457 so we can use the rest of the space allocated for the array or
1458 object.
1459 */
1460 val_end = m_length;
1461 }
1462
1463 if (!inlined && val_end - val_start >= needed) {
1464 *offset = val_start;
1465 return true;
1466 }
1467
1468 /*
1469 Still not enough space. See if there's free space we can use in
1470 front of the original value. We can use space after the end of the
1471 first non-inlined value we find.
1472 */
1473 if (needed > val_end - first_value_offset) return false;
1474 for (i = pos; i > 0; --i) {
1475 size_t elt_start;
1476 size_t elt_end;
1477 bool elt_inlined;
1478 if (element_offsets(i - 1, &elt_start, &elt_end, &elt_inlined))
1479 return false;
1480 if (elt_inlined) continue;
1481 val_start = elt_end;
1482 break;
1483 }
1484
1485 if (i == 0) {
1486 /*
1487 There are no non-inlined values ahead of the value we are
1488 updating, so we can start right after the value entries.
1489 */
1490 val_start = first_value_offset;
1491 }
1492
1493 if (val_start >= first_value_offset && val_end <= m_length &&
1494 val_start <= val_end && val_end - val_start >= needed) {
1495 *offset = val_start;
1496 return true;
1497 }
1498
1499 return false;
1500 }
1501
1502 /**
1503 Get the offset of the key entry that describes the key of the member at a
1504 given position in this object.
1505
1506 @param pos the position of the member
1507 @return the offset of the key entry, relative to the start of the object
1508 */
key_entry_offset(size_t pos) const1509 inline size_t Value::key_entry_offset(size_t pos) const {
1510 DBUG_ASSERT(m_type == OBJECT);
1511 // The first key entry is located right after the two length fields.
1512 return 2 * offset_size(m_large) + key_entry_size(m_large) * pos;
1513 }
1514
1515 /**
1516 Get the offset of the value entry that describes the element at a
1517 given position in this array or object.
1518
1519 @param pos the position of the element
1520 @return the offset of the entry, relative to the start of the array or object
1521 */
value_entry_offset(size_t pos) const1522 inline size_t Value::value_entry_offset(size_t pos) const {
1523 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1524 /*
1525 Value entries come after the two length fields if it's an array, or
1526 after the two length fields and all the key entries if it's an object.
1527 */
1528 size_t first_entry_offset = 2 * offset_size(m_large);
1529 if (m_type == OBJECT)
1530 first_entry_offset += m_element_count * key_entry_size(m_large);
1531
1532 return first_entry_offset + value_entry_size(m_large) * pos;
1533 }
1534
1535 #ifdef MYSQL_SERVER
space_needed(const THD * thd,const Json_wrapper * value,bool large,size_t * needed)1536 bool space_needed(const THD *thd, const Json_wrapper *value, bool large,
1537 size_t *needed) {
1538 if (value->type() == enum_json_type::J_ERROR) {
1539 my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
1540 return true;
1541 }
1542
1543 // Serialize the value to a temporary buffer to find out how big it is.
1544 StringBuffer<STRING_BUFFER_USUAL_SIZE> buf;
1545 if (value->to_binary(thd, &buf)) return true; /* purecov: inspected */
1546
1547 DBUG_ASSERT(buf.length() > 1);
1548
1549 // If the value can be inlined in the value entry, it doesn't need any space.
1550 if (inlined_type(buf[0], large)) {
1551 *needed = 0;
1552 return false;
1553 }
1554
1555 /*
1556 The first byte in the buffer is the type identifier. We're only
1557 interested in the size of the data portion, so exclude the type byte
1558 from the returned size.
1559 */
1560 *needed = buf.length() - 1;
1561 return false;
1562 }
1563
1564 /**
1565 Update a value in an array or object. The updated value is written to a
1566 shadow copy. The original array or object is left unchanged, unless the
1567 shadow copy is actually a pointer to the array backing this Value object. It
1568 is assumed that the shadow copy is at least as big as the original document,
1569 and that there is enough space at the given position to hold the new value.
1570
1571 Typically, if a document is modified multiple times in a single update
1572 statement, the first invocation of update_in_shadow() will have a Value
1573 object that points into the binary data in the Field, and write to a separate
1574 destination buffer. Subsequent updates of the document will have a Value
1575 object that points to the partially updated value in the destination buffer,
1576 and write the new modifications to the same buffer.
1577
1578 All changes made to the binary value are recorded as binary diffs using
1579 TABLE::add_binary_diff().
1580
1581 @param field the column that is updated
1582 @param pos the element to update
1583 @param new_value the new value of the element
1584 @param data_offset where to write the value (offset relative to the
1585 beginning of the array or object, obtained with
1586 #has_space) or zero if the value can be inlined
1587 @param data_length the length of the new value in bytes or zero if
1588 the value can be inlined
1589 @param original pointer to the start of the JSON document
1590 @param destination pointer to the shadow copy of the JSON document
1591 (it could be the same as @a original, in which case the
1592 original document will be modified)
1593 @param[out] changed gets set to true if a change was made to the document,
1594 or to false if this operation was a no-op
1595 @return false on success, true if an error occurred
1596
1597 @par Example of partial update
1598
1599 Given the JSON document [ "abc", "def" ], which is serialized like this in a
1600 JSON column:
1601
1602 0x02 - type: small JSON array
1603 0x02 - number of elements (low byte)
1604 0x00 - number of elements (high byte)
1605 0x12 - number of bytes (low byte)
1606 0x00 - number of bytes (high byte)
1607 0x0C - type of element 0 (string)
1608 0x0A - offset of element 0 (low byte)
1609 0x00 - offset of element 0 (high byte)
1610 0x0C - type of element 1 (string)
1611 0x0E - offset of element 1 (low byte)
1612 0x00 - offset of element 1 (high byte)
1613 0x03 - length of element 0
1614 'a'
1615 'b' - content of element 0
1616 'c'
1617 0x03 - length of element 1
1618 'd'
1619 'e' - content of element 1
1620 'f'
1621
1622 Let's change element 0 from "abc" to "XY" using the following statement:
1623
1624 UPDATE t SET j = JSON_SET(j, '$[0]', 'XY')
1625
1626 Since we're replacing one string with a shorter one, we can just overwrite
1627 the length byte with the new length, and the beginning of the original string
1628 data. Since the original string "abc" is longer than the new string "XY",
1629 we'll have a free byte at the end of the string. This byte is left as is
1630 ('c'). The resulting binary representation looks like this:
1631
1632 0x02 - type: small JSON array
1633 0x02 - number of elements (low byte)
1634 0x00 - number of elements (high byte)
1635 0x12 - number of bytes (low byte)
1636 0x00 - number of bytes (high byte)
1637 0x0C - type of element 0 (string)
1638 0x0A - offset of element 0 (low byte)
1639 0x00 - offset of element 0 (high byte)
1640 0x0C - type of element 1 (string)
1641 0x0E - offset of element 1 (low byte)
1642 0x00 - offset of element 1 (high byte)
1643 CHANGED 0x02 - length of element 0
1644 CHANGED 'X'
1645 CHANGED 'Y' - content of element 0
1646 (free) 'c'
1647 0x03 - length of element 1
1648 'd'
1649 'e' - content of element 1
1650 'f'
1651
1652 This change will be represented as one binary diff that covers the three
1653 changed bytes.
1654
1655 Let's now change element 1 from "def" to "XYZW":
1656
1657 UPDATE t SET j = JSON_SET(j, '$[1]', 'XYZW')
1658
1659 Since the new string is one byte longer than the original string, we cannot
1660 simply overwrite the old one. But we can reuse the free byte from the
1661 previous update, which is immediately preceding the original value.
1662
1663 To make use of this, we need to change the offset of element 1 to point to
1664 the free byte. Then we can overwrite the free byte and the original string
1665 data with the new length and string contents. Resulting binary
1666 representation:
1667
1668 0x02 - type: small JSON array
1669 0x02 - number of elements (low byte)
1670 0x00 - number of elements (high byte)
1671 0x12 - number of bytes (low byte)
1672 0x00 - number of bytes (high byte)
1673 0x0C - type of element 0 (string)
1674 0x0A - offset of element 0 (low byte)
1675 0x00 - offset of element 0 (high byte)
1676 0x0C - type of element 1 (string)
1677 CHANGED 0x0D - offset of element 1 (low byte)
1678 0x00 - offset of element 1 (high byte)
1679 0x02 - length of element 0
1680 'X' - content of element 0
1681 'Y' - content of element 0
1682 CHANGED 0x04 - length of element 1
1683 CHANGED 'X'
1684 CHANGED 'Y'
1685 CHANGED 'Z' - content of element 1
1686 CHANGED 'W'
1687
1688 This change will be represented as two binary diffs. One diff for changing
1689 the offset, and one for changing the contents of the string.
1690
1691 Then let's replace the string in element 1 with a small number:
1692
1693 UPDATE t SET j = JSON_SET(j, '$[1]', 456)
1694
1695 This will change the type of element 1 from string to int16. Such small
1696 numbers are inlined in the value entry, where we normally store the offset of
1697 the value. The offset section of the value entry is therefore changed to hold
1698 the number 456. The length and contents of the original value ("XYZW") are
1699 not touched, but they are now unused and free to be reused. Resulting binary
1700 representation:
1701
1702 0x02 - type: small JSON array
1703 0x02 - number of elements (low byte)
1704 0x00 - number of elements (high byte)
1705 0x12 - number of bytes (low byte)
1706 0x00 - number of bytes (high byte)
1707 0x0C - type of element 0 (string)
1708 0x0A - offset of element 0 (low byte)
1709 0x00 - offset of element 0 (high byte)
1710 CHANGED 0x05 - type of element 1 (int16)
1711 CHANGED 0xC8 - value of element 1 (low byte)
1712 CHANGED 0x01 - value of element 1 (high byte)
1713 0x02 - length of element 0
1714 'X' - content of element 0
1715 'Y' - content of element 0
1716 (free) 0x04 - length of element 1
1717 (free) 'X'
1718 (free) 'Y'
1719 (free) 'Z' - content of element 1
1720 (free) 'W'
1721
1722 The change is represented as one binary diff that changes the value entry
1723 (type and inlined value).
1724 */
update_in_shadow(const Field_json * field,size_t pos,Json_wrapper * new_value,size_t data_offset,size_t data_length,const char * original,char * destination,bool * changed) const1725 bool Value::update_in_shadow(const Field_json *field, size_t pos,
1726 Json_wrapper *new_value, size_t data_offset,
1727 size_t data_length, const char *original,
1728 char *destination, bool *changed) const {
1729 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1730
1731 const bool inlined = (data_length == 0);
1732
1733 // Assume no changes. Update the flag when the document is actually changed.
1734 *changed = false;
1735
1736 /*
1737 Create a buffer large enough to hold the new value entry. (Plus one since
1738 some String functions insist on adding a terminating '\0'.)
1739 */
1740 StringBuffer<VALUE_ENTRY_SIZE_LARGE + 1> new_entry;
1741
1742 if (inlined) {
1743 new_entry.length(value_entry_size(m_large));
1744 Json_dom *dom = new_value->to_dom(field->table->in_use);
1745 if (dom == nullptr) return true; /* purecov: inspected */
1746 attempt_inline_value(dom, &new_entry, 0, m_large);
1747 } else {
1748 new_entry.append('\0'); // type, to be filled in later
1749 append_offset_or_size(&new_entry, data_offset, m_large);
1750
1751 const char *value = m_data + data_offset;
1752 const size_t value_offset = value - original;
1753 char *value_dest = destination + value_offset;
1754
1755 StringBuffer<STRING_BUFFER_USUAL_SIZE> buffer;
1756 if (new_value->to_binary(field->table->in_use, &buffer))
1757 return true; /* purecov: inspected */
1758
1759 DBUG_ASSERT(buffer.length() > 1);
1760
1761 // The first byte is the type byte, which should be in the value entry.
1762 new_entry[0] = buffer[0];
1763
1764 /*
1765 Create another diff for the changed data, but only if the new data is
1766 actually different from the old data.
1767 */
1768 const size_t length = buffer.length() - 1;
1769 DBUG_ASSERT(length == data_length);
1770 if (memcmp(value_dest, buffer.ptr() + 1, length) != 0) {
1771 memcpy(value_dest, buffer.ptr() + 1, length);
1772 if (field->table->add_binary_diff(field, value_offset, length))
1773 return true; /* purecov: inspected */
1774 *changed = true;
1775 }
1776 }
1777
1778 DBUG_ASSERT(new_entry.length() == value_entry_size(m_large));
1779
1780 /*
1781 Type and offset will often be unchanged. Don't create a change
1782 record unless they have actually changed.
1783 */
1784 const char *const entry = m_data + value_entry_offset(pos);
1785 if (memcmp(entry, new_entry.ptr(), new_entry.length()) != 0) {
1786 const size_t entry_offset = entry - original;
1787 memcpy(destination + entry_offset, new_entry.ptr(), new_entry.length());
1788 if (field->table->add_binary_diff(field, entry_offset, new_entry.length()))
1789 return true; /* purecov: inspected */
1790 *changed = true;
1791 }
1792
1793 return false;
1794 }
1795
1796 /**
1797 Remove a value from an array or object. The updated JSON document is written
1798 to a shadow copy. The original document is left unchanged, unless the shadow
1799 copy is actually a pointer to the array backing this Value object. It is
1800 assumed that the shadow copy is at least as big as the original document, and
1801 that there is enough space at the given position to hold the new value.
1802
1803 Typically, if a document is modified multiple times in a single update
1804 statement, the first invocation of remove_in_shadow() will have a Value
1805 object that points into the binary data in the Field, and write to a separate
1806 destination buffer. Subsequent updates of the document will have a Value
1807 object that points to the partially updated value in the destination buffer,
1808 and write the new modifications to the same buffer.
1809
1810 All changes made to the binary value are recorded as binary diffs using
1811 TABLE::add_binary_diff().
1812
1813 @param field the column that is updated
1814 @param pos the element to remove
1815 @param original pointer to the start of the JSON document
1816 @param destination pointer to the shadow copy of the JSON document
1817 (it could be the same as @a original, in which case the
1818 original document will be modified)
1819 @return false on success, true if an error occurred
1820
1821 @par Example of partial update
1822
1823 Take the JSON document { "a": "x", "b": "y", "c": "z" }, whose serialized
1824 representation looks like the following:
1825
1826 0x00 - type: JSONB_TYPE_SMALL_OBJECT
1827 0x03 - number of elements (low byte)
1828 0x00 - number of elements (high byte)
1829 0x22 - number of bytes (low byte)
1830 0x00 - number of bytes (high byte)
1831 0x19 - offset of key "a" (high byte)
1832 0x00 - offset of key "a" (low byte)
1833 0x01 - length of key "a" (high byte)
1834 0x00 - length of key "a" (low byte)
1835 0x1a - offset of key "b" (high byte)
1836 0x00 - offset of key "b" (low byte)
1837 0x01 - length of key "b" (high byte)
1838 0x00 - length of key "b" (low byte)
1839 0x1b - offset of key "c" (high byte)
1840 0x00 - offset of key "c" (low byte)
1841 0x01 - length of key "c" (high byte)
1842 0x00 - length of key "c" (low byte)
1843 0x0c - type of value "a": JSONB_TYPE_STRING
1844 0x1c - offset of value "a" (high byte)
1845 0x00 - offset of value "a" (low byte)
1846 0x0c - type of value "b": JSONB_TYPE_STRING
1847 0x1e - offset of value "b" (high byte)
1848 0x00 - offset of value "b" (low byte)
1849 0x0c - type of value "c": JSONB_TYPE_STRING
1850 0x20 - offset of value "c" (high byte)
1851 0x00 - offset of value "c" (low byte)
1852 0x61 - first key ('a')
1853 0x62 - second key ('b')
1854 0x63 - third key ('c')
1855 0x01 - length of value "a"
1856 0x78 - contents of value "a" ('x')
1857 0x01 - length of value "b"
1858 0x79 - contents of value "b" ('y')
1859 0x01 - length of value "c"
1860 0x7a - contents of value "c" ('z')
1861
1862 We remove the member with name 'b' from the document, using a statement such
1863 as:
1864
1865 UPDATE t SET j = JSON_REMOVE(j, '$.b')
1866
1867 This function will then remove the element by moving the key entries and
1868 value entries that follow the removed member so that they overwrite the
1869 existing entries, and the element count is decremented.
1870
1871 The resulting binary document will look like this:
1872
1873 0x00 - type: JSONB_TYPE_SMALL_OBJECT
1874 CHANGED 0x02 - number of elements (low byte)
1875 0x00 - number of elements (high byte)
1876 0x22 - number of bytes (low byte)
1877 0x00 - number of bytes (high byte)
1878 0x19 - offset of key "a" (high byte)
1879 0x00 - offset of key "a" (low byte)
1880 0x01 - length of key "a" (high byte)
1881 0x00 - length of key "a" (low byte)
1882 CHANGED 0x1b - offset of key "c" (high byte)
1883 CHANGED 0x00 - offset of key "c" (low byte)
1884 CHANGED 0x01 - length of key "c" (high byte)
1885 CHANGED 0x00 - length of key "c" (low byte)
1886 CHANGED 0x0c - type of value "a": JSONB_TYPE_STRING
1887 CHANGED 0x1c - offset of value "a" (high byte)
1888 CHANGED 0x00 - offset of value "a" (low byte)
1889 CHANGED 0x0c - type of value "c": JSONB_TYPE_STRING
1890 CHANGED 0x20 - offset of value "c" (high byte)
1891 CHANGED 0x00 - offset of value "c" (low byte)
1892 (free) 0x00
1893 (free) 0x0c
1894 (free) 0x1e
1895 (free) 0x00
1896 (free) 0x0c
1897 (free) 0x20
1898 (free) 0x00
1899 0x61 - first key ('a')
1900 (free) 0x62
1901 0x63 - third key ('c')
1902 0x01 - length of value "a"
1903 0x78 - contents of value "a" ('x')
1904 (free) 0x01
1905 (free) 0x79
1906 0x01 - length of value "c"
1907 0x7a - contents of value "c" ('z')
1908
1909 Two binary diffs will be created. One diff changes the element count, and one
1910 diff changes the key and value entries.
1911 */
remove_in_shadow(const Field_json * field,size_t pos,const char * original,char * destination) const1912 bool Value::remove_in_shadow(const Field_json *field, size_t pos,
1913 const char *original, char *destination) const {
1914 DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1915
1916 const char *value_entry = m_data + value_entry_offset(pos);
1917 const char *next_value_entry = value_entry + value_entry_size(m_large);
1918
1919 /*
1920 If it's an object, we first remove the key entry by shifting all subsequent
1921 key entries to the left, and also all value entries up to the one that's
1922 being removed.
1923 */
1924 if (m_type == OBJECT) {
1925 const char *key_entry = m_data + key_entry_offset(pos);
1926 const char *next_key_entry = key_entry + key_entry_size(m_large);
1927 size_t len = value_entry - next_key_entry;
1928 memmove(destination + (key_entry - original), next_key_entry, len);
1929 if (field->table->add_binary_diff(field, key_entry - original, len))
1930 return true; /* purecov: inspected */
1931
1932 /*
1933 Adjust the destination of the value entry to account for the removed key
1934 entry.
1935 */
1936 value_entry -= key_entry_size(m_large);
1937 }
1938
1939 /*
1940 Next, remove the value entry by shifting all subsequent value entries to
1941 the left.
1942 */
1943 const char *value_entry_end = m_data + value_entry_offset(m_element_count);
1944 size_t len = value_entry_end - next_value_entry;
1945 memmove(destination + (value_entry - original), next_value_entry, len);
1946 if (field->table->add_binary_diff(field, value_entry - original, len))
1947 return true; /* purecov: inspected */
1948
1949 /*
1950 Finally, update the element count.
1951 */
1952 write_offset_or_size(destination + (m_data - original), m_element_count - 1,
1953 m_large);
1954 return field->table->add_binary_diff(field, m_data - original,
1955 offset_size(m_large));
1956 }
1957
1958 /**
1959 Get the amount of unused space in the binary representation of this value.
1960
1961 @param thd THD handle
1962 @param[out] space the amount of free space
1963 @return false on success, true on error
1964 */
get_free_space(const THD * thd,size_t * space) const1965 bool Value::get_free_space(const THD *thd, size_t *space) const {
1966 *space = 0;
1967
1968 switch (m_type) {
1969 case ARRAY:
1970 case OBJECT:
1971 break;
1972 default:
1973 // Scalars don't have any holes, so return immediately.
1974 return false;
1975 }
1976
1977 if (m_type == OBJECT) {
1978 // The first key should come right after the last value entry.
1979 const char *next_key = m_data + value_entry_offset(m_element_count);
1980
1981 // Sum up all unused space between keys.
1982 for (size_t i = 0; i < m_element_count; ++i) {
1983 Value key = this->key(i);
1984 if (key.type() == ERROR) {
1985 my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
1986 return true;
1987 }
1988 *space += key.get_data() - next_key;
1989 next_key = key.get_data() + key.get_data_length();
1990 }
1991 }
1992
1993 size_t next_value_offset;
1994 if (first_value_offset(&next_value_offset)) {
1995 my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
1996 return true;
1997 }
1998
1999 // Find the "holes" between and inside each element in the array or object.
2000 for (size_t i = 0; i < m_element_count; ++i) {
2001 size_t elt_start;
2002 size_t elt_end;
2003 bool inlined;
2004 if (element_offsets(i, &elt_start, &elt_end, &inlined)) {
2005 my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
2006 return true;
2007 }
2008
2009 if (inlined) continue;
2010
2011 if (elt_start < next_value_offset || elt_end > m_length) {
2012 my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
2013 return true;
2014 }
2015
2016 *space += elt_start - next_value_offset;
2017 next_value_offset = elt_end;
2018
2019 Value elt = element(i);
2020 switch (elt.type()) {
2021 case ARRAY:
2022 case OBJECT: {
2023 // Recursively process nested arrays or objects.
2024 if (check_stack_overrun(thd, STACK_MIN_SIZE, nullptr))
2025 return true; /* purecov: inspected */
2026 size_t elt_space;
2027 if (elt.get_free_space(thd, &elt_space)) return true;
2028 *space += elt_space;
2029 break;
2030 }
2031 case ERROR:
2032 /* purecov: begin inspected */
2033 my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
2034 return true;
2035 /* purecov: end */
2036 default:
2037 break;
2038 }
2039 }
2040
2041 *space += m_length - next_value_offset;
2042 return false;
2043 }
2044
2045 /**
2046 Check whether two binary JSON scalars are equal. This function is used by
2047 multi-valued index updating code. Unlike JSON comparator implemented in
2048 server, this code doesn't treat numeric types as the same, e.g. int 1 and
2049 uint 1 won't be treated as equal. This is fine as the mv index updating code
2050 compares old and new values of the same typed array field, i.e. all values
2051 being compared have the same type.
2052
2053 Since MV index doesn't support indexing of arrays/objects in arrays, these
2054 two aren't supported and cause assert.
2055 */
2056
eq(const Value & val) const2057 int Value::eq(const Value &val) const {
2058 DBUG_ASSERT(is_valid() && val.is_valid());
2059
2060 if (type() != val.type()) {
2061 return type() < val.type() ? -1 : 1;
2062 }
2063 switch (m_type) {
2064 case OBJECT:
2065 case ARRAY:
2066 DBUG_ASSERT(0);
2067 return -1;
2068 case OPAQUE:
2069 if (m_field_type != val.m_field_type)
2070 return m_field_type < val.m_field_type ? -1 : 1;
2071 /* Fall through */
2072 case STRING: {
2073 uint cmp_length = std::min(get_data_length(), val.get_data_length());
2074 int res;
2075 if (!(res = memcmp(get_data(), val.get_data(), cmp_length)))
2076 return (get_data_length() < val.get_data_length())
2077 ? -1
2078 : ((get_data_length() == val.get_data_length()) ? 0 : 1);
2079 return res;
2080 }
2081 case INT:
2082 case UINT:
2083 return (m_int_value == val.m_int_value)
2084 ? 0
2085 : ((m_int_value < val.m_int_value) ? -1 : 1);
2086 case DOUBLE:
2087 return (m_double_value == val.m_double_value)
2088 ? 0
2089 : ((m_double_value < val.m_double_value) ? -1 : 1);
2090 case LITERAL_NULL:
2091 case LITERAL_TRUE:
2092 case LITERAL_FALSE:
2093 return 0;
2094 default:
2095 DBUG_ASSERT(0); // Shouldn't happen
2096 break;
2097 }
2098 return -1;
2099 }
2100 #endif // ifdef MYSQL_SERVER
2101
2102 } // end namespace json_binary
2103