1 /* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 #include "sql/json_binary.h"
24 
25 #include <string.h>
26 #include <algorithm>  // std::min
27 #include <map>
28 #include <memory>
29 #include <string>
30 #include <utility>
31 
32 #include "m_ctype.h"
33 #include "my_byteorder.h"
34 #include "my_dbug.h"
35 #include "my_sys.h"
36 #include "mysqld_error.h"
37 #ifdef MYSQL_SERVER
38 #include "sql/check_stack.h"
39 #endif
40 #include "sql/field.h"     // Field_json
41 #include "sql/json_dom.h"  // Json_dom
42 #include "sql/json_syntax_check.h"
43 #include "sql/sql_class.h"  // THD
44 #include "sql/sql_const.h"
45 #include "sql/system_variables.h"
46 #include "sql/table.h"  // TABLE::add_binary_diff()
47 #include "sql_string.h"
48 #include "template_utils.h"  // down_cast
49 
50 namespace {
51 
52 constexpr char JSONB_TYPE_SMALL_OBJECT = 0x0;
53 constexpr char JSONB_TYPE_LARGE_OBJECT = 0x1;
54 constexpr char JSONB_TYPE_SMALL_ARRAY = 0x2;
55 constexpr char JSONB_TYPE_LARGE_ARRAY = 0x3;
56 constexpr char JSONB_TYPE_LITERAL = 0x4;
57 constexpr char JSONB_TYPE_INT16 = 0x5;
58 constexpr char JSONB_TYPE_UINT16 = 0x6;
59 constexpr char JSONB_TYPE_INT32 = 0x7;
60 constexpr char JSONB_TYPE_UINT32 = 0x8;
61 constexpr char JSONB_TYPE_INT64 = 0x9;
62 constexpr char JSONB_TYPE_UINT64 = 0xA;
63 constexpr char JSONB_TYPE_DOUBLE = 0xB;
64 constexpr char JSONB_TYPE_STRING = 0xC;
65 constexpr char JSONB_TYPE_OPAQUE = 0xF;
66 
67 constexpr char JSONB_NULL_LITERAL = 0x0;
68 constexpr char JSONB_TRUE_LITERAL = 0x1;
69 constexpr char JSONB_FALSE_LITERAL = 0x2;
70 
71 /*
72   The size of offset or size fields in the small and the large storage
73   format for JSON objects and JSON arrays.
74 */
75 constexpr uint8 SMALL_OFFSET_SIZE = 2;
76 constexpr uint8 LARGE_OFFSET_SIZE = 4;
77 
78 /*
79   The size of key entries for objects when using the small storage
80   format or the large storage format. In the small format it is 4
81   bytes (2 bytes for key length and 2 bytes for key offset). In the
82   large format it is 6 (2 bytes for length, 4 bytes for offset).
83 */
84 constexpr uint8 KEY_ENTRY_SIZE_SMALL = 2 + SMALL_OFFSET_SIZE;
85 constexpr uint8 KEY_ENTRY_SIZE_LARGE = 2 + LARGE_OFFSET_SIZE;
86 
87 /*
88   The size of value entries for objects or arrays. When using the
89   small storage format, the entry size is 3 (1 byte for type, 2 bytes
90   for offset). When using the large storage format, it is 5 (1 byte
91   for type, 4 bytes for offset).
92 */
93 constexpr uint8 VALUE_ENTRY_SIZE_SMALL = 1 + SMALL_OFFSET_SIZE;
94 constexpr uint8 VALUE_ENTRY_SIZE_LARGE = 1 + LARGE_OFFSET_SIZE;
95 
96 }  // namespace
97 
98 namespace json_binary {
99 
100 /// Status codes for JSON serialization.
101 enum enum_serialization_result {
102   /**
103     Success. The JSON value was successfully serialized.
104   */
105   OK,
106   /**
107     The JSON value was too big to be serialized. If this status code
108     is returned, and the small storage format is in use, the caller
109     should retry the serialization with the large storage format. If
110     this status code is returned, and the large format is in use,
111     my_error() will already have been called.
112   */
113   VALUE_TOO_BIG,
114   /**
115     Some other error occurred. my_error() will have been called with
116     more specific information about the failure.
117   */
118   FAILURE
119 };
120 
121 #ifdef MYSQL_SERVER
122 static enum_serialization_result serialize_json_value(
123     const THD *thd, const Json_dom *dom, size_t type_pos, String *dest,
124     size_t depth, bool small_parent);
125 static void write_offset_or_size(char *dest, size_t offset_or_size, bool large);
126 #endif  // ifdef MYSQL_SERVER
127 static uint8 offset_size(bool large);
128 
129 #ifdef MYSQL_SERVER
serialize(const THD * thd,const Json_dom * dom,String * dest)130 bool serialize(const THD *thd, const Json_dom *dom, String *dest) {
131   // Reset the destination buffer.
132   dest->length(0);
133   dest->set_charset(&my_charset_bin);
134 
135   // Reserve space (one byte) for the type identifier.
136   if (dest->append('\0')) return true; /* purecov: inspected */
137   return serialize_json_value(thd, dom, 0, dest, 0, false) != OK;
138 }
139 
140 /**
141   Reserve space for the given amount of extra bytes at the end of a
142   String buffer. If the String needs to allocate more memory, it will
143   grow by at least 50%, to avoid frequent reallocations.
144 */
reserve(String * buffer,size_t bytes_needed)145 static bool reserve(String *buffer, size_t bytes_needed) {
146   return buffer->reserve(bytes_needed, buffer->length() / 2);
147 }
148 
149 /** Encode a 16-bit int at the end of the destination string. */
append_int16(String * dest,int16 value)150 static bool append_int16(String *dest, int16 value) {
151   if (reserve(dest, sizeof(value))) return true; /* purecov: inspected */
152   int2store(dest->ptr() + dest->length(), value);
153   dest->length(dest->length() + sizeof(value));
154   return false;
155 }
156 
157 /** Encode a 32-bit int at the end of the destination string. */
append_int32(String * dest,int32 value)158 static bool append_int32(String *dest, int32 value) {
159   if (reserve(dest, sizeof(value))) return true; /* purecov: inspected */
160   int4store(dest->ptr() + dest->length(), value);
161   dest->length(dest->length() + sizeof(value));
162   return false;
163 }
164 
165 /** Encode a 64-bit int at the end of the destination string. */
append_int64(String * dest,int64 value)166 static bool append_int64(String *dest, int64 value) {
167   if (reserve(dest, sizeof(value))) return true; /* purecov: inspected */
168   int8store(dest->ptr() + dest->length(), value);
169   dest->length(dest->length() + sizeof(value));
170   return false;
171 }
172 
173 /**
174   Append an offset or a size to a String.
175 
176   @param dest  the destination String
177   @param offset_or_size  the offset or size to append
178   @param large  if true, use the large storage format (4 bytes);
179                 otherwise, use the small storage format (2 bytes)
180   @return false if successfully appended, true otherwise
181 */
append_offset_or_size(String * dest,size_t offset_or_size,bool large)182 static bool append_offset_or_size(String *dest, size_t offset_or_size,
183                                   bool large) {
184   if (large)
185     return append_int32(dest, static_cast<int32>(offset_or_size));
186   else
187     return append_int16(dest, static_cast<int16>(offset_or_size));
188 }
189 
190 /**
191   Insert an offset or a size at the specified position in a String. It
192   is assumed that the String has already allocated enough space to
193   hold the value.
194 
195   @param dest  the destination String
196   @param pos   the position in the String
197   @param offset_or_size  the offset or size to append
198   @param large  if true, use the large storage format (4 bytes);
199                 otherwise, use the small storage format (2 bytes)
200 */
insert_offset_or_size(String * dest,size_t pos,size_t offset_or_size,bool large)201 static void insert_offset_or_size(String *dest, size_t pos,
202                                   size_t offset_or_size, bool large) {
203   DBUG_ASSERT(pos + offset_size(large) <= dest->alloced_length());
204   write_offset_or_size(dest->ptr() + pos, offset_or_size, large);
205 }
206 
207 /**
208   Write an offset or a size to a char array. The char array is assumed to be
209   large enough to hold an offset or size value.
210 
211   @param dest            the array to write to
212   @param offset_or_size  the offset or size to write
213   @param large           if true, use the large storage format
214 */
write_offset_or_size(char * dest,size_t offset_or_size,bool large)215 static void write_offset_or_size(char *dest, size_t offset_or_size,
216                                  bool large) {
217   if (large)
218     int4store(dest, static_cast<uint32>(offset_or_size));
219   else
220     int2store(dest, static_cast<uint16>(offset_or_size));
221 }
222 
223 /**
224   Check if the size of a document exceeds the maximum JSON binary size
225   (4 GB, aka UINT_MAX32). Raise an error if it is too big.
226 
227   @param size  the size of the document
228   @return true if the document is too big, false otherwise
229 */
check_document_size(size_t size)230 static bool check_document_size(size_t size) {
231   if (size > UINT_MAX32) {
232     /* purecov: begin inspected */
233     my_error(ER_JSON_VALUE_TOO_BIG, MYF(0));
234     return true;
235     /* purecov: end */
236   }
237   return false;
238 }
239 
240 /**
241   Append a length to a String. The number of bytes used to store the length
242   uses a variable number of bytes depending on how large the length is. If the
243   highest bit in a byte is 1, then the length is continued on the next byte.
244   The least significant bits are stored in the first byte.
245 
246   @param  dest   the destination String
247   @param  length the length to write
248   @return false on success, true on error
249 */
append_variable_length(String * dest,size_t length)250 static bool append_variable_length(String *dest, size_t length) {
251   do {
252     // Filter out the seven least significant bits of length.
253     uchar ch = (length & 0x7F);
254 
255     /*
256       Right-shift length to drop the seven least significant bits. If there
257       is more data in length, set the high bit of the byte we're writing
258       to the String.
259     */
260     length >>= 7;
261     if (length != 0) ch |= 0x80;
262 
263     if (dest->append(ch)) return true; /* purecov: inspected */
264   } while (length != 0);
265 
266   if (check_document_size(dest->length() + length))
267     return true; /* purecov: inspected */
268 
269   // Successfully appended the length.
270   return false;
271 }
272 #endif  // ifdef MYSQL_SERVER
273 
274 /**
275   Read a variable length written by append_variable_length().
276 
277   @param[in] data  the buffer to read from
278   @param[in] data_length  the maximum number of bytes to read from data
279   @param[out] length  the length that was read
280   @param[out] num  the number of bytes needed to represent the length
281   @return  false on success, true if the variable length field is ill-formed
282 */
read_variable_length(const char * data,size_t data_length,uint32 * length,uint8 * num)283 static bool read_variable_length(const char *data, size_t data_length,
284                                  uint32 *length, uint8 *num) {
285   /*
286     It takes five bytes to represent UINT_MAX32, which is the largest
287     supported length, so don't look any further.
288   */
289   const size_t max_bytes = std::min(data_length, static_cast<size_t>(5));
290 
291   size_t len = 0;
292   for (size_t i = 0; i < max_bytes; i++) {
293     // Get the next 7 bits of the length.
294     len |= (data[i] & 0x7f) << (7 * i);
295     if ((data[i] & 0x80) == 0) {
296       // The length shouldn't exceed 32 bits.
297       if (len > UINT_MAX32) return true; /* purecov: inspected */
298 
299       // This was the last byte. Return successfully.
300       *num = static_cast<uint8>(i + 1);
301       *length = static_cast<uint32>(len);
302       return false;
303     }
304   }
305 
306   // No more available bytes. Return true to signal error.
307   return true; /* purecov: inspected */
308 }
309 
310 /**
311   Check if the specified offset or size is too big to store in the
312   binary JSON format.
313 
314   If the small storage format is used, the caller is expected to retry
315   serialization in the large storage format, so no error is generated
316   if the offset or size is too big. If the large storage format is
317   used, an error will be generated if the offset or size is too big.
318 
319   @param offset_or_size  the offset or size to check
320   @param large    if true, we are using the large storage format
321     for JSON arrays and objects, which allows offsets and sizes that
322     fit in a uint32; otherwise, we are using the small storage format,
323     which allow offsets and sizes that fit in a uint16.
324   @return true if offset_or_size is too big for the format, false
325     otherwise
326 */
327 #ifdef MYSQL_SERVER
is_too_big_for_json(size_t offset_or_size,bool large)328 static bool is_too_big_for_json(size_t offset_or_size, bool large) {
329   if (offset_or_size > UINT_MAX16) {
330     if (!large) return true;
331     return check_document_size(offset_or_size);
332   }
333 
334   return false;
335 }
336 
337 /**
338   Append all the key entries of a JSON object to a destination string.
339   The key entries are just a series of offset/length pairs that point
340   to where the actual key names are stored.
341 
342   @param[in]  object  the JSON object
343   @param[out] dest    the destination string
344   @param[in]  offset  the offset of the first key
345   @param[in]  large   if true, the large storage format will be used
346   @return serialization status
347 */
append_key_entries(const Json_object * object,String * dest,size_t offset,bool large)348 static enum_serialization_result append_key_entries(const Json_object *object,
349                                                     String *dest, size_t offset,
350                                                     bool large) {
351 #ifndef DBUG_OFF
352   const std::string *prev_key = nullptr;
353 #endif
354 
355   // Add the key entries.
356   for (Json_object::const_iterator it = object->begin(); it != object->end();
357        ++it) {
358     const std::string *key = &it->first;
359     size_t len = key->length();
360 
361 #ifndef DBUG_OFF
362     // Check that the DOM returns the keys in the correct order.
363     if (prev_key) {
364       DBUG_ASSERT(prev_key->length() <= len);
365       if (len == prev_key->length())
366         DBUG_ASSERT(memcmp(prev_key->data(), key->data(), len) < 0);
367     }
368     prev_key = key;
369 #endif
370 
371     // We only have two bytes for the key size. Check if the key is too big.
372     if (len > UINT_MAX16) {
373       my_error(ER_JSON_KEY_TOO_BIG, MYF(0));
374       return FAILURE;
375     }
376 
377     if (is_too_big_for_json(offset, large))
378       return VALUE_TOO_BIG; /* purecov: inspected */
379 
380     if (append_offset_or_size(dest, offset, large) ||
381         append_int16(dest, static_cast<int16>(len)))
382       return FAILURE; /* purecov: inspected */
383     offset += len;
384   }
385 
386   return OK;
387 }
388 #endif  // ifdef MYSQL_SERVER
389 
390 /**
391   Will a value of the specified type be inlined?
392   @param type  the type to check
393   @param large true if the large storage format is used
394   @return true if the value will be inlined
395 */
inlined_type(uint8 type,bool large)396 static bool inlined_type(uint8 type, bool large) {
397   switch (type) {
398     case JSONB_TYPE_LITERAL:
399     case JSONB_TYPE_INT16:
400     case JSONB_TYPE_UINT16:
401       return true;
402     case JSONB_TYPE_INT32:
403     case JSONB_TYPE_UINT32:
404       return large;
405     default:
406       return false;
407   }
408 }
409 
410 /**
411   Get the size of an offset value.
412   @param large true if the large storage format is used
413   @return the size of an offset
414 */
offset_size(bool large)415 static uint8 offset_size(bool large) {
416   return large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
417 }
418 
419 /**
420   Get the size of a key entry.
421   @param large true if the large storage format is used
422   @return the size of a key entry
423 */
key_entry_size(bool large)424 static uint8 key_entry_size(bool large) {
425   return large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
426 }
427 
428 /**
429   Get the size of a value entry.
430   @param large true if the large storage format is used
431   @return the size of a value entry
432 */
value_entry_size(bool large)433 static uint8 value_entry_size(bool large) {
434   return large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
435 }
436 
437 /**
438   Attempt to inline a value in its value entry at the beginning of an
439   object or an array. This function assumes that the destination
440   string has already allocated enough space to hold the inlined value.
441 
442   @param[in] value the JSON value
443   @param[out] dest the destination string
444   @param[in] pos   the offset where the value should be inlined
445   @param[in] large true if the large storage format is used
446   @return true if the value was inlined, false if it was not
447 */
448 #ifdef MYSQL_SERVER
attempt_inline_value(const Json_dom * value,String * dest,size_t pos,bool large)449 static bool attempt_inline_value(const Json_dom *value, String *dest,
450                                  size_t pos, bool large) {
451   int32 inlined_val;
452   char inlined_type;
453   switch (value->json_type()) {
454     case enum_json_type::J_NULL:
455       inlined_val = JSONB_NULL_LITERAL;
456       inlined_type = JSONB_TYPE_LITERAL;
457       break;
458     case enum_json_type::J_BOOLEAN:
459       inlined_val = down_cast<const Json_boolean *>(value)->value()
460                         ? JSONB_TRUE_LITERAL
461                         : JSONB_FALSE_LITERAL;
462       inlined_type = JSONB_TYPE_LITERAL;
463       break;
464     case enum_json_type::J_INT: {
465       const Json_int *i = down_cast<const Json_int *>(value);
466       if (!i->is_16bit() && !(large && i->is_32bit()))
467         return false;  // cannot inline this value
468       inlined_val = static_cast<int32>(i->value());
469       inlined_type = i->is_16bit() ? JSONB_TYPE_INT16 : JSONB_TYPE_INT32;
470       break;
471     }
472     case enum_json_type::J_UINT: {
473       const Json_uint *i = down_cast<const Json_uint *>(value);
474       if (!i->is_16bit() && !(large && i->is_32bit()))
475         return false;  // cannot inline this value
476       inlined_val = static_cast<int32>(i->value());
477       inlined_type = i->is_16bit() ? JSONB_TYPE_UINT16 : JSONB_TYPE_UINT32;
478       break;
479     }
480     default:
481       return false;  // cannot inline value of this type
482   }
483 
484   (*dest)[pos] = inlined_type;
485   insert_offset_or_size(dest, pos + 1, inlined_val, large);
486   return true;
487 }
488 
489 /**
490   Serialize a JSON array at the end of the destination string.
491 
492   @param thd    THD handle
493   @param array  the JSON array to serialize
494   @param dest   the destination string
495   @param large  if true, the large storage format will be used
496   @param depth  the current nesting level
497   @return serialization status
498 */
serialize_json_array(const THD * thd,const Json_array * array,String * dest,bool large,size_t depth)499 static enum_serialization_result serialize_json_array(const THD *thd,
500                                                       const Json_array *array,
501                                                       String *dest, bool large,
502                                                       size_t depth) {
503   if (check_stack_overrun(thd, STACK_MIN_SIZE, nullptr))
504     return FAILURE; /* purecov: inspected */
505 
506   const size_t start_pos = dest->length();
507   const size_t size = array->size();
508 
509   if (check_json_depth(++depth)) {
510     return FAILURE;
511   }
512 
513   if (is_too_big_for_json(size, large)) return VALUE_TOO_BIG;
514 
515   // First write the number of elements in the array.
516   if (append_offset_or_size(dest, size, large))
517     return FAILURE; /* purecov: inspected */
518 
519   // Reserve space for the size of the array in bytes. To be filled in later.
520   const size_t size_pos = dest->length();
521   if (append_offset_or_size(dest, 0, large))
522     return FAILURE; /* purecov: inspected */
523 
524   size_t entry_pos = dest->length();
525 
526   // Reserve space for the value entries at the beginning of the array.
527   const auto entry_size = value_entry_size(large);
528   if (dest->fill(dest->length() + size * entry_size, 0))
529     return FAILURE; /* purecov: inspected */
530 
531   for (const auto &child : *array) {
532     const Json_dom *elt = child.get();
533     if (!attempt_inline_value(elt, dest, entry_pos, large)) {
534       size_t offset = dest->length() - start_pos;
535       if (is_too_big_for_json(offset, large)) return VALUE_TOO_BIG;
536       insert_offset_or_size(dest, entry_pos + 1, offset, large);
537       auto res = serialize_json_value(thd, elt, entry_pos, dest, depth, !large);
538       if (res != OK) return res;
539     }
540     entry_pos += entry_size;
541   }
542 
543   // Finally, write the size of the object in bytes.
544   size_t bytes = dest->length() - start_pos;
545   if (is_too_big_for_json(bytes, large))
546     return VALUE_TOO_BIG; /* purecov: inspected */
547   insert_offset_or_size(dest, size_pos, bytes, large);
548 
549   return OK;
550 }
551 
552 /**
553   Serialize a JSON object at the end of the destination string.
554 
555   @param thd    THD handle
556   @param object the JSON object to serialize
557   @param dest   the destination string
558   @param large  if true, the large storage format will be used
559   @param depth  the current nesting level
560   @return serialization status
561 */
serialize_json_object(const THD * thd,const Json_object * object,String * dest,bool large,size_t depth)562 static enum_serialization_result serialize_json_object(
563     const THD *thd, const Json_object *object, String *dest, bool large,
564     size_t depth) {
565   if (check_stack_overrun(thd, STACK_MIN_SIZE, nullptr))
566     return FAILURE; /* purecov: inspected */
567 
568   const size_t start_pos = dest->length();
569   const size_t size = object->cardinality();
570 
571   if (check_json_depth(++depth)) {
572     return FAILURE;
573   }
574 
575   if (is_too_big_for_json(size, large))
576     return VALUE_TOO_BIG; /* purecov: inspected */
577 
578   // First write the number of members in the object.
579   if (append_offset_or_size(dest, size, large))
580     return FAILURE; /* purecov: inspected */
581 
582   // Reserve space for the size of the object in bytes. To be filled in later.
583   const size_t size_pos = dest->length();
584   if (append_offset_or_size(dest, 0, large))
585     return FAILURE; /* purecov: inspected */
586 
587   const auto key_entry_size = json_binary::key_entry_size(large);
588   const auto value_entry_size = json_binary::value_entry_size(large);
589 
590   /*
591     Calculate the offset of the first key relative to the start of the
592     object. The first key comes right after the value entries.
593   */
594   const size_t first_key_offset =
595       dest->length() + size * (key_entry_size + value_entry_size) - start_pos;
596 
597   // Append all the key entries.
598   enum_serialization_result res =
599       append_key_entries(object, dest, first_key_offset, large);
600   if (res != OK) return res;
601 
602   const size_t start_of_value_entries = dest->length();
603 
604   // Reserve space for the value entries. Will be filled in later.
605   dest->fill(dest->length() + size * value_entry_size, 0);
606 
607   // Add the actual keys.
608   for (const auto &member : *object) {
609     if (dest->append(member.first.c_str(), member.first.length()))
610       return FAILURE; /* purecov: inspected */
611   }
612 
613   // Add the values, and update the value entries accordingly.
614   size_t entry_pos = start_of_value_entries;
615   for (const auto &member : *object) {
616     const Json_dom *child = member.second.get();
617     if (!attempt_inline_value(child, dest, entry_pos, large)) {
618       size_t offset = dest->length() - start_pos;
619       if (is_too_big_for_json(offset, large)) return VALUE_TOO_BIG;
620       insert_offset_or_size(dest, entry_pos + 1, offset, large);
621       res = serialize_json_value(thd, child, entry_pos, dest, depth, !large);
622       if (res != OK) return res;
623     }
624     entry_pos += value_entry_size;
625   }
626 
627   // Finally, write the size of the object in bytes.
628   size_t bytes = dest->length() - start_pos;
629   if (is_too_big_for_json(bytes, large)) return VALUE_TOO_BIG;
630   insert_offset_or_size(dest, size_pos, bytes, large);
631 
632   return OK;
633 }
634 
635 /**
636   Serialize a JSON opaque value at the end of the destination string.
637   @param[in]  opaque    the JSON opaque value
638   @param[in]  type_pos  where to write the type specifier
639   @param[out] dest      the destination string
640   @return serialization status
641 */
serialize_opaque(const Json_opaque * opaque,size_t type_pos,String * dest)642 static enum_serialization_result serialize_opaque(const Json_opaque *opaque,
643                                                   size_t type_pos,
644                                                   String *dest) {
645   DBUG_ASSERT(type_pos < dest->length());
646   if (dest->append(static_cast<char>(opaque->type())) ||
647       append_variable_length(dest, opaque->size()) ||
648       dest->append(opaque->value(), opaque->size()))
649     return FAILURE; /* purecov: inspected */
650   (*dest)[type_pos] = JSONB_TYPE_OPAQUE;
651   return OK;
652 }
653 
654 /**
655   Serialize a DECIMAL value at the end of the destination string.
656   @param[in]  jd        the DECIMAL value
657   @param[in]  type_pos  where to write the type specifier
658   @param[out] dest      the destination string
659   @return serialization status
660 */
serialize_decimal(const Json_decimal * jd,size_t type_pos,String * dest)661 static enum_serialization_result serialize_decimal(const Json_decimal *jd,
662                                                    size_t type_pos,
663                                                    String *dest) {
664   // Store DECIMALs as opaque values.
665   const int bin_size = jd->binary_size();
666   char buf[Json_decimal::MAX_BINARY_SIZE];
667   if (jd->get_binary(buf)) return FAILURE; /* purecov: inspected */
668   Json_opaque o(MYSQL_TYPE_NEWDECIMAL, buf, bin_size);
669   return serialize_opaque(&o, type_pos, dest);
670 }
671 
672 /**
673   Serialize a DATETIME value at the end of the destination string.
674   @param[in]  jdt       the DATETIME value
675   @param[in]  type_pos  where to write the type specifier
676   @param[out] dest      the destination string
677   @return serialization status
678 */
serialize_datetime(const Json_datetime * jdt,size_t type_pos,String * dest)679 static enum_serialization_result serialize_datetime(const Json_datetime *jdt,
680                                                     size_t type_pos,
681                                                     String *dest) {
682   // Store datetime as opaque values.
683   char buf[Json_datetime::PACKED_SIZE];
684   jdt->to_packed(buf);
685   Json_opaque o(jdt->field_type(), buf, sizeof(buf));
686   return serialize_opaque(&o, type_pos, dest);
687 }
688 
689 /**
690   Serialize a JSON value at the end of the destination string.
691 
692   Also go back and update the type specifier for the value to specify
693   the correct type. For top-level documents, the type specifier is
694   located in the byte right in front of the value. For documents that
695   are nested within other documents, the type specifier is located in
696   the value entry portion at the beginning of the parent document.
697 
698   @param thd       THD handle
699   @param dom       the JSON value to serialize
700   @param type_pos  the position of the type specifier to update
701   @param dest      the destination string
702   @param depth     the current nesting level
703   @param small_parent
704                    tells if @a dom is contained in an array or object
705                    which is stored in the small storage format
706   @return          serialization status
707 */
serialize_json_value(const THD * thd,const Json_dom * dom,size_t type_pos,String * dest,size_t depth,bool small_parent)708 static enum_serialization_result serialize_json_value(
709     const THD *thd, const Json_dom *dom, size_t type_pos, String *dest,
710     size_t depth, bool small_parent) {
711   const size_t start_pos = dest->length();
712   DBUG_ASSERT(type_pos < start_pos);
713 
714   enum_serialization_result result;
715 
716   switch (dom->json_type()) {
717     case enum_json_type::J_ARRAY: {
718       const Json_array *array = down_cast<const Json_array *>(dom);
719       (*dest)[type_pos] = JSONB_TYPE_SMALL_ARRAY;
720       result = serialize_json_array(thd, array, dest, false, depth);
721       /*
722         If the array was too large to fit in the small storage format,
723         reset the destination buffer and retry with the large storage
724         format.
725 
726         Possible future optimization: Analyze size up front and pick the
727         correct format on the first attempt, so that we don't have to
728         redo parts of the serialization.
729       */
730       if (result == VALUE_TOO_BIG) {
731         // If the parent uses the small storage format, it needs to grow too.
732         if (small_parent) return VALUE_TOO_BIG;
733         dest->length(start_pos);
734         (*dest)[type_pos] = JSONB_TYPE_LARGE_ARRAY;
735         result = serialize_json_array(thd, array, dest, true, depth);
736       }
737       break;
738     }
739     case enum_json_type::J_OBJECT: {
740       const Json_object *object = down_cast<const Json_object *>(dom);
741       (*dest)[type_pos] = JSONB_TYPE_SMALL_OBJECT;
742       result = serialize_json_object(thd, object, dest, false, depth);
743       /*
744         If the object was too large to fit in the small storage format,
745         reset the destination buffer and retry with the large storage
746         format.
747 
748         Possible future optimization: Analyze size up front and pick the
749         correct format on the first attempt, so that we don't have to
750         redo parts of the serialization.
751       */
752       if (result == VALUE_TOO_BIG) {
753         // If the parent uses the small storage format, it needs to grow too.
754         if (small_parent) return VALUE_TOO_BIG;
755         dest->length(start_pos);
756         (*dest)[type_pos] = JSONB_TYPE_LARGE_OBJECT;
757         result = serialize_json_object(thd, object, dest, true, depth);
758       }
759       break;
760     }
761     case enum_json_type::J_STRING: {
762       const Json_string *jstr = down_cast<const Json_string *>(dom);
763       size_t size = jstr->size();
764       if (append_variable_length(dest, size) ||
765           dest->append(jstr->value().c_str(), size))
766         return FAILURE; /* purecov: inspected */
767       (*dest)[type_pos] = JSONB_TYPE_STRING;
768       result = OK;
769       break;
770     }
771     case enum_json_type::J_INT: {
772       const Json_int *i = down_cast<const Json_int *>(dom);
773       longlong val = i->value();
774       if (i->is_16bit()) {
775         if (append_int16(dest, static_cast<int16>(val)))
776           return FAILURE; /* purecov: inspected */
777         (*dest)[type_pos] = JSONB_TYPE_INT16;
778       } else if (i->is_32bit()) {
779         if (append_int32(dest, static_cast<int32>(val)))
780           return FAILURE; /* purecov: inspected */
781         (*dest)[type_pos] = JSONB_TYPE_INT32;
782       } else {
783         if (append_int64(dest, val)) return FAILURE; /* purecov: inspected */
784         (*dest)[type_pos] = JSONB_TYPE_INT64;
785       }
786       result = OK;
787       break;
788     }
789     case enum_json_type::J_UINT: {
790       const Json_uint *i = down_cast<const Json_uint *>(dom);
791       ulonglong val = i->value();
792       if (i->is_16bit()) {
793         if (append_int16(dest, static_cast<int16>(val)))
794           return FAILURE; /* purecov: inspected */
795         (*dest)[type_pos] = JSONB_TYPE_UINT16;
796       } else if (i->is_32bit()) {
797         if (append_int32(dest, static_cast<int32>(val)))
798           return FAILURE; /* purecov: inspected */
799         (*dest)[type_pos] = JSONB_TYPE_UINT32;
800       } else {
801         if (append_int64(dest, val)) return FAILURE; /* purecov: inspected */
802         (*dest)[type_pos] = JSONB_TYPE_UINT64;
803       }
804       result = OK;
805       break;
806     }
807     case enum_json_type::J_DOUBLE: {
808       // Store the double in a platform-independent eight-byte format.
809       const Json_double *d = down_cast<const Json_double *>(dom);
810       if (reserve(dest, 8)) return FAILURE; /* purecov: inspected */
811       float8store(dest->ptr() + dest->length(), d->value());
812       dest->length(dest->length() + 8);
813       (*dest)[type_pos] = JSONB_TYPE_DOUBLE;
814       result = OK;
815       break;
816     }
817     case enum_json_type::J_NULL:
818       if (dest->append(JSONB_NULL_LITERAL))
819         return FAILURE; /* purecov: inspected */
820       (*dest)[type_pos] = JSONB_TYPE_LITERAL;
821       result = OK;
822       break;
823     case enum_json_type::J_BOOLEAN: {
824       char c = (down_cast<const Json_boolean *>(dom)->value())
825                    ? JSONB_TRUE_LITERAL
826                    : JSONB_FALSE_LITERAL;
827       if (dest->append(c)) return FAILURE; /* purecov: inspected */
828       (*dest)[type_pos] = JSONB_TYPE_LITERAL;
829       result = OK;
830       break;
831     }
832     case enum_json_type::J_OPAQUE:
833       result =
834           serialize_opaque(down_cast<const Json_opaque *>(dom), type_pos, dest);
835       break;
836     case enum_json_type::J_DECIMAL:
837       result = serialize_decimal(down_cast<const Json_decimal *>(dom), type_pos,
838                                  dest);
839       break;
840     case enum_json_type::J_DATETIME:
841     case enum_json_type::J_DATE:
842     case enum_json_type::J_TIME:
843     case enum_json_type::J_TIMESTAMP:
844       result = serialize_datetime(down_cast<const Json_datetime *>(dom),
845                                   type_pos, dest);
846       break;
847     default:
848       /* purecov: begin deadcode */
849       DBUG_ASSERT(false);
850       my_error(ER_INTERNAL_ERROR, MYF(0), "JSON serialization failed");
851       return FAILURE;
852       /* purecov: end */
853   }
854 
855   if (result == OK && dest->length() > thd->variables.max_allowed_packet) {
856     my_error(ER_WARN_ALLOWED_PACKET_OVERFLOWED, MYF(0),
857              "json_binary::serialize", thd->variables.max_allowed_packet);
858     return FAILURE;
859   }
860 
861   return result;
862 }
863 #endif  // ifdef MYSQL_SERVER
864 
is_valid() const865 bool Value::is_valid() const {
866   switch (m_type) {
867     case ERROR:
868       return false;
869     case ARRAY:
870       // Check that all the array elements are valid.
871       for (size_t i = 0; i < element_count(); i++)
872         if (!element(i).is_valid()) return false; /* purecov: inspected */
873       return true;
874     case OBJECT: {
875       /*
876         Check that all keys and values are valid, and that the keys come
877         in the correct order.
878       */
879       const char *prev_key = nullptr;
880       size_t prev_key_len = 0;
881       for (size_t i = 0; i < element_count(); i++) {
882         Value k = key(i);
883         if (!k.is_valid() || !element(i).is_valid())
884           return false; /* purecov: inspected */
885         const char *curr_key = k.get_data();
886         size_t curr_key_len = k.get_data_length();
887         if (i > 0) {
888           if (prev_key_len > curr_key_len)
889             return false; /* purecov: inspected */
890           if (prev_key_len == curr_key_len &&
891               (memcmp(prev_key, curr_key, curr_key_len) >= 0))
892             return false; /* purecov: inspected */
893         }
894         prev_key = curr_key;
895         prev_key_len = curr_key_len;
896       }
897       return true;
898     }
899     default:
900       // This is a valid scalar value.
901       return true;
902   }
903 }
904 
905 /**
906   Create a Value object that represents an error condition.
907 */
err()908 static Value err() { return Value(Value::ERROR); }
909 
910 /**
911   Parse a JSON scalar value.
912 
913   @param type   the binary type of the scalar
914   @param data   pointer to the start of the binary representation of the scalar
915   @param len    the maximum number of bytes to read from data
916   @return  an object that represents the scalar value
917 */
parse_scalar(uint8 type,const char * data,size_t len)918 static Value parse_scalar(uint8 type, const char *data, size_t len) {
919   switch (type) {
920     case JSONB_TYPE_LITERAL:
921       if (len < 1) return err(); /* purecov: inspected */
922       switch (static_cast<uint8>(*data)) {
923         case JSONB_NULL_LITERAL:
924           return Value(Value::LITERAL_NULL);
925         case JSONB_TRUE_LITERAL:
926           return Value(Value::LITERAL_TRUE);
927         case JSONB_FALSE_LITERAL:
928           return Value(Value::LITERAL_FALSE);
929         default:
930           return err(); /* purecov: inspected */
931       }
932     case JSONB_TYPE_INT16:
933       if (len < 2) return err(); /* purecov: inspected */
934       return Value(Value::INT, sint2korr(data));
935     case JSONB_TYPE_INT32:
936       if (len < 4) return err(); /* purecov: inspected */
937       return Value(Value::INT, sint4korr(data));
938     case JSONB_TYPE_INT64:
939       if (len < 8) return err(); /* purecov: inspected */
940       return Value(Value::INT, sint8korr(data));
941     case JSONB_TYPE_UINT16:
942       if (len < 2) return err(); /* purecov: inspected */
943       return Value(Value::UINT, uint2korr(data));
944     case JSONB_TYPE_UINT32:
945       if (len < 4) return err(); /* purecov: inspected */
946       return Value(Value::UINT, uint4korr(data));
947     case JSONB_TYPE_UINT64:
948       if (len < 8) return err(); /* purecov: inspected */
949       return Value(Value::UINT, uint8korr(data));
950     case JSONB_TYPE_DOUBLE: {
951       if (len < 8) return err(); /* purecov: inspected */
952       return Value(float8get(data));
953     }
954     case JSONB_TYPE_STRING: {
955       uint32 str_len;
956       uint8 n;
957       if (read_variable_length(data, len, &str_len, &n))
958         return err();                      /* purecov: inspected */
959       if (len < n + str_len) return err(); /* purecov: inspected */
960       return Value(data + n, str_len);
961     }
962     case JSONB_TYPE_OPAQUE: {
963       /*
964         There should always be at least one byte, which tells the field
965         type of the opaque value.
966       */
967       if (len < 1) return err(); /* purecov: inspected */
968 
969       // The type is encoded as a uint8 that maps to an enum_field_types.
970       uint8 type_byte = static_cast<uint8>(*data);
971       enum_field_types field_type = static_cast<enum_field_types>(type_byte);
972 
973       // Then there's the length of the value.
974       uint32 val_len;
975       uint8 n;
976       if (read_variable_length(data + 1, len - 1, &val_len, &n))
977         return err();                          /* purecov: inspected */
978       if (len < 1 + n + val_len) return err(); /* purecov: inspected */
979       return Value(field_type, data + 1 + n, val_len);
980     }
981     default:
982       // Not a valid scalar type.
983       return err();
984   }
985 }
986 
987 /**
988   Read an offset or size field from a buffer. The offset could be either
989   a two byte unsigned integer or a four byte unsigned integer.
990 
991   @param data  the buffer to read from
992   @param large tells if the large or small storage format is used; true
993                means read four bytes, false means read two bytes
994 */
read_offset_or_size(const char * data,bool large)995 static uint32 read_offset_or_size(const char *data, bool large) {
996   return large ? uint4korr(data) : uint2korr(data);
997 }
998 
999 /**
1000   Parse a JSON array or object.
1001 
1002   @param t      type (either ARRAY or OBJECT)
1003   @param data   pointer to the start of the array or object
1004   @param len    the maximum number of bytes to read from data
1005   @param large  if true, the array or object is stored using the large
1006                 storage format; otherwise, it is stored using the small
1007                 storage format
1008   @return  an object that allows access to the array or object
1009 */
parse_array_or_object(Value::enum_type t,const char * data,size_t len,bool large)1010 static Value parse_array_or_object(Value::enum_type t, const char *data,
1011                                    size_t len, bool large) {
1012   DBUG_ASSERT(t == Value::ARRAY || t == Value::OBJECT);
1013 
1014   /*
1015     Make sure the document is long enough to contain the two length fields
1016     (both number of elements or members, and number of bytes).
1017   */
1018   const auto offset_size = json_binary::offset_size(large);
1019   if (len < 2 * offset_size) return err();
1020   const uint32 element_count = read_offset_or_size(data, large);
1021   const uint32 bytes = read_offset_or_size(data + offset_size, large);
1022 
1023   // The value can't have more bytes than what's available in the data buffer.
1024   if (bytes > len) return err();
1025 
1026   /*
1027     Calculate the size of the header. It consists of:
1028     - two length fields
1029     - if it is a JSON object, key entries with pointers to where the keys
1030       are stored
1031     - value entries with pointers to where the actual values are stored
1032   */
1033   size_t header_size = 2 * offset_size;
1034   if (t == Value::OBJECT) header_size += element_count * key_entry_size(large);
1035   header_size += element_count * value_entry_size(large);
1036 
1037   // The header should not be larger than the full size of the value.
1038   if (header_size > bytes) return err(); /* purecov: inspected */
1039 
1040   return Value(t, data, bytes, element_count, large);
1041 }
1042 
1043 /**
1044   Parse a JSON value within a larger JSON document.
1045 
1046   @param type   the binary type of the value to parse
1047   @param data   pointer to the start of the binary representation of the value
1048   @param len    the maximum number of bytes to read from data
1049   @return  an object that allows access to the value
1050 */
parse_value(uint8 type,const char * data,size_t len)1051 static Value parse_value(uint8 type, const char *data, size_t len) {
1052   switch (type) {
1053     case JSONB_TYPE_SMALL_OBJECT:
1054       return parse_array_or_object(Value::OBJECT, data, len, false);
1055     case JSONB_TYPE_LARGE_OBJECT:
1056       return parse_array_or_object(Value::OBJECT, data, len, true);
1057     case JSONB_TYPE_SMALL_ARRAY:
1058       return parse_array_or_object(Value::ARRAY, data, len, false);
1059     case JSONB_TYPE_LARGE_ARRAY:
1060       return parse_array_or_object(Value::ARRAY, data, len, true);
1061     default:
1062       return parse_scalar(type, data, len);
1063   }
1064 }
1065 
parse_binary(const char * data,size_t len)1066 Value parse_binary(const char *data, size_t len) {
1067   DBUG_TRACE;
1068   /*
1069     Each document should start with a one-byte type specifier, so an
1070     empty document is invalid according to the format specification.
1071     Empty documents may appear due to inserts using the IGNORE keyword
1072     or with non-strict SQL mode, which will insert an empty string if
1073     the value NULL is inserted into a NOT NULL column. We choose to
1074     interpret empty values as the JSON null literal.
1075   */
1076   if (len == 0) return Value(Value::LITERAL_NULL);
1077 
1078   Value ret = parse_value(data[0], data + 1, len - 1);
1079   return ret;
1080 }
1081 
1082 /**
1083   Get the element at the specified position of a JSON array or a JSON
1084   object. When called on a JSON object, it returns the value
1085   associated with the key returned by key(pos).
1086 
1087   @param pos  the index of the element
1088   @return a value representing the specified element, or a value where
1089   type() returns ERROR if pos does not point to an element
1090 */
element(size_t pos) const1091 Value Value::element(size_t pos) const {
1092   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1093 
1094   if (pos >= m_element_count) return err();
1095 
1096   const auto entry_size = value_entry_size(m_large);
1097   const auto entry_offset = value_entry_offset(pos);
1098 
1099   uint8 type = m_data[entry_offset];
1100 
1101   /*
1102     Check if this is an inlined scalar value. If so, return it.
1103     The scalar will be inlined just after the byte that identifies the
1104     type, so it's found on entry_offset + 1.
1105   */
1106   if (inlined_type(type, m_large))
1107     return parse_scalar(type, m_data + entry_offset + 1, entry_size - 1);
1108 
1109   /*
1110     Otherwise, it's a non-inlined value, and the offset to where the value
1111     is stored, can be found right after the type byte in the entry.
1112   */
1113   uint32 value_offset = read_offset_or_size(m_data + entry_offset + 1, m_large);
1114 
1115   if (m_length < value_offset || value_offset < entry_offset + entry_size)
1116     return err(); /* purecov: inspected */
1117 
1118   return parse_value(type, m_data + value_offset, m_length - value_offset);
1119 }
1120 
1121 /**
1122   Get the key of the member stored at the specified position in a JSON
1123   object.
1124 
1125   @param pos  the index of the member
1126   @return the key of the specified member, or a value where type()
1127   returns ERROR if pos does not point to a member
1128 */
key(size_t pos) const1129 Value Value::key(size_t pos) const {
1130   DBUG_ASSERT(m_type == OBJECT);
1131 
1132   if (pos >= m_element_count) return err();
1133 
1134   const auto offset_size = json_binary::offset_size(m_large);
1135   const auto key_entry_size = json_binary::key_entry_size(m_large);
1136   const auto value_entry_size = json_binary::value_entry_size(m_large);
1137 
1138   // The key entries are located after two length fields of size offset_size.
1139   const size_t entry_offset = key_entry_offset(pos);
1140 
1141   // The offset of the key is the first part of the key entry.
1142   const uint32 key_offset = read_offset_or_size(m_data + entry_offset, m_large);
1143 
1144   // The length of the key is the second part of the entry, always two bytes.
1145   const uint16 key_length = uint2korr(m_data + entry_offset + offset_size);
1146 
1147   /*
1148     The key must start somewhere after the last value entry, and it must
1149     end before the end of the m_data buffer.
1150   */
1151   if ((key_offset < entry_offset + (m_element_count - pos) * key_entry_size +
1152                         m_element_count * value_entry_size) ||
1153       (m_length < key_offset + key_length))
1154     return err(); /* purecov: inspected */
1155 
1156   return Value(m_data + key_offset, key_length);
1157 }
1158 
1159 /**
1160   Get the value associated with the specified key in a JSON object.
1161 
1162   @param[in] key  the key to look up
1163   @param[in] length  the length of the key
1164   @return the value associated with the key, if there is one. otherwise,
1165   returns ERROR
1166 */
lookup(const char * key,size_t length) const1167 Value Value::lookup(const char *key, size_t length) const {
1168   size_t index = lookup_index(key, length);
1169   if (index == element_count()) return err();
1170   return element(index);
1171 }
1172 
1173 /**
1174   Get the index of the element with the specified key in a JSON object.
1175 
1176   @param[in] key  the key to look up
1177   @param[in] length  the length of the key
1178   @return the index if the key is found, or `element_count()` if the
1179   key is not found
1180 */
lookup_index(const char * key,size_t length) const1181 size_t Value::lookup_index(const char *key, size_t length) const {
1182   DBUG_ASSERT(m_type == OBJECT);
1183 
1184   const auto offset_size = json_binary::offset_size(m_large);
1185   const auto entry_size = key_entry_size(m_large);
1186 
1187   const size_t first_entry_offset = key_entry_offset(0);
1188 
1189   size_t lo = 0U;               // lower bound for binary search (inclusive)
1190   size_t hi = m_element_count;  // upper bound for binary search (exclusive)
1191 
1192   while (lo < hi) {
1193     // Find the entry in the middle of the search interval.
1194     size_t idx = (lo + hi) / 2;
1195     size_t entry_offset = first_entry_offset + idx * entry_size;
1196 
1197     // Keys are ordered on length, so check length first.
1198     size_t key_len = uint2korr(m_data + entry_offset + offset_size);
1199     if (length > key_len) {
1200       lo = idx + 1;
1201     } else if (length < key_len) {
1202       hi = idx;
1203     } else {
1204       // The keys had the same length, so compare their contents.
1205       size_t key_offset = read_offset_or_size(m_data + entry_offset, m_large);
1206 
1207       int cmp = memcmp(key, m_data + key_offset, key_len);
1208       if (cmp > 0)
1209         lo = idx + 1;
1210       else if (cmp < 0)
1211         hi = idx;
1212       else
1213         return idx;
1214     }
1215   }
1216 
1217   return m_element_count;  // not found
1218 }
1219 
1220 /**
1221   Is this binary value pointing to data that is contained in the specified
1222   string.
1223 
1224   @param str     a string with binary data
1225   @retval true   if the string contains data pointed to from this object
1226   @retval false  otherwise
1227 */
is_backed_by(const String * str) const1228 bool Value::is_backed_by(const String *str) const {
1229   /*
1230     The m_data member is only valid for objects, arrays, strings and opaque
1231     values. Other types have copied the necessary data into the Value object
1232     and do not depend on data in any String object.
1233   */
1234   switch (m_type) {
1235     case OBJECT:
1236     case ARRAY:
1237     case STRING:
1238     case OPAQUE:
1239       return m_data >= str->ptr() && m_data < str->ptr() + str->length();
1240     default:
1241       return false;
1242   }
1243 }
1244 
1245 /**
1246   Copy the binary representation of this value into a buffer,
1247   replacing the contents of the receiving buffer.
1248 
1249   @param thd  THD handle
1250   @param buf  the receiving buffer
1251   @return false on success, true otherwise
1252 */
1253 #ifdef MYSQL_SERVER
raw_binary(const THD * thd,String * buf) const1254 bool Value::raw_binary(const THD *thd, String *buf) const {
1255   // It's not safe to overwrite ourselves.
1256   DBUG_ASSERT(!is_backed_by(buf));
1257 
1258   // Reset the buffer.
1259   buf->length(0);
1260   buf->set_charset(&my_charset_bin);
1261 
1262   switch (m_type) {
1263     case OBJECT:
1264     case ARRAY: {
1265       char tp = m_large ? (m_type == OBJECT ? JSONB_TYPE_LARGE_OBJECT
1266                                             : JSONB_TYPE_LARGE_ARRAY)
1267                         : (m_type == OBJECT ? JSONB_TYPE_SMALL_OBJECT
1268                                             : JSONB_TYPE_SMALL_ARRAY);
1269       return buf->append(tp) || buf->append(m_data, m_length);
1270     }
1271     case STRING:
1272       return buf->append(JSONB_TYPE_STRING) ||
1273              append_variable_length(buf, m_length) ||
1274              buf->append(m_data, m_length);
1275     case INT: {
1276       Json_int i(get_int64());
1277       return serialize(thd, &i, buf) != OK;
1278     }
1279     case UINT: {
1280       Json_uint i(get_uint64());
1281       return serialize(thd, &i, buf) != OK;
1282     }
1283     case DOUBLE: {
1284       Json_double d(get_double());
1285       return serialize(thd, &d, buf) != OK;
1286     }
1287     case LITERAL_NULL: {
1288       Json_null n;
1289       return serialize(thd, &n, buf) != OK;
1290     }
1291     case LITERAL_TRUE:
1292     case LITERAL_FALSE: {
1293       Json_boolean b(m_type == LITERAL_TRUE);
1294       return serialize(thd, &b, buf) != OK;
1295     }
1296     case OPAQUE:
1297       return buf->append(JSONB_TYPE_OPAQUE) || buf->append(field_type()) ||
1298              append_variable_length(buf, m_length) ||
1299              buf->append(m_data, m_length);
1300     case ERROR:
1301       break; /* purecov: inspected */
1302   }
1303 
1304   /* purecov: begin deadcode */
1305   DBUG_ASSERT(false);
1306   return true;
1307   /* purecov: end */
1308 }
1309 #endif  // ifdef MYSQL_SERVER
1310 
1311 /**
1312   Find the start offset and the end offset of the specified element.
1313   @param[in]  pos     which element to check
1314   @param[out] start   the start offset of the value
1315   @param[out] end     the end offset of the value (exclusive)
1316   @param[out] inlined set to true if the specified element is inlined
1317   @return true if the offsets cannot be determined, false if successful
1318 */
element_offsets(size_t pos,size_t * start,size_t * end,bool * inlined) const1319 bool Value::element_offsets(size_t pos, size_t *start, size_t *end,
1320                             bool *inlined) const {
1321   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1322   DBUG_ASSERT(pos < m_element_count);
1323 
1324   const char *entry = m_data + value_entry_offset(pos);
1325   if (entry + value_entry_size(m_large) > m_data + m_length)
1326     return true; /* purecov: inspected */
1327 
1328   if (inlined_type(*entry, m_large)) {
1329     *start = 0;
1330     *end = 0;
1331     *inlined = true;
1332     return false;
1333   }
1334 
1335   const size_t val_pos = read_offset_or_size(entry + 1, m_large);
1336   if (val_pos >= m_length) return true;
1337 
1338   size_t val_end = 0;
1339   switch (entry[0]) {
1340     case JSONB_TYPE_INT32:
1341     case JSONB_TYPE_UINT32:
1342       val_end = val_pos + 4;
1343       break;
1344     case JSONB_TYPE_INT64:
1345     case JSONB_TYPE_UINT64:
1346     case JSONB_TYPE_DOUBLE:
1347       val_end = val_pos + 8;
1348       break;
1349     case JSONB_TYPE_STRING:
1350     case JSONB_TYPE_OPAQUE:
1351     case JSONB_TYPE_SMALL_OBJECT:
1352     case JSONB_TYPE_LARGE_OBJECT:
1353     case JSONB_TYPE_SMALL_ARRAY:
1354     case JSONB_TYPE_LARGE_ARRAY: {
1355       Value v = element(pos);
1356       if (v.type() == ERROR) return true;
1357       val_end = (v.m_data - this->m_data) + v.m_length;
1358     } break;
1359     default:
1360       return true;
1361   }
1362 
1363   *start = val_pos;
1364   *end = val_end;
1365   *inlined = false;
1366   return false;
1367 }
1368 
1369 /**
1370   Find the lowest possible offset where a value can be located inside this
1371   array or object.
1372 
1373   @param[out] offset   the lowest offset where a value can be located
1374   @return false on success, true on error
1375 */
first_value_offset(size_t * offset) const1376 bool Value::first_value_offset(size_t *offset) const {
1377   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1378 
1379   /*
1380     Find the lowest offset where a value could be stored. Arrays can
1381     store them right after the last value entry. Objects can store
1382     them right after the last key.
1383   */
1384   if (m_type == ARRAY || m_element_count == 0) {
1385     *offset = value_entry_offset(m_element_count);
1386     return false;
1387   }
1388 
1389   Value key = this->key(m_element_count - 1);
1390   if (key.type() == ERROR) return true;
1391 
1392   *offset = key.get_data() + key.get_data_length() - m_data;
1393   return false;
1394 }
1395 
1396 /**
1397   Does this array or object have enough space to replace the value at
1398   the given position with another value of a given size?
1399 
1400   @param[in]  pos     the position in the array or object
1401   @param[in]  needed  the number of bytes needed for the new value
1402   @param[out] offset  if true is returned, this value is set to an
1403                       offset relative to the start of the array or
1404                       object, which tells where the replacement value
1405                       should be stored
1406   @return true if there is enough space, false otherwise
1407 */
has_space(size_t pos,size_t needed,size_t * offset) const1408 bool Value::has_space(size_t pos, size_t needed, size_t *offset) const {
1409   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1410   DBUG_ASSERT(pos < m_element_count);
1411 
1412   /*
1413     Find the lowest offset where a value could be stored. Arrays can
1414     store them right after the last value entry. Objects can store
1415     them right after the last key.
1416   */
1417   size_t first_value_offset;
1418   if (this->first_value_offset(&first_value_offset)) return false;
1419 
1420   /*
1421     No need to check further if we need more space than the total
1422     space available in the array or object.
1423   */
1424   if (needed > m_length - first_value_offset) return false;
1425 
1426   size_t val_start;
1427   size_t val_end;
1428   bool inlined;
1429   if (element_offsets(pos, &val_start, &val_end, &inlined)) return false;
1430 
1431   if (!inlined && val_end - val_start >= needed) {
1432     // Found enough space at the position where the original value was located.
1433     *offset = val_start;
1434     return true;
1435   }
1436 
1437   /*
1438     Need more space. Look for free space after the original value.
1439     There's potential free space after the end of the original value
1440     and up to the start of the next non-inlined value.
1441   */
1442   const auto entry_size = value_entry_size(m_large);
1443   size_t i = pos + 1;
1444   for (auto entry = m_data + value_entry_offset(pos); i < m_element_count;
1445        ++i) {
1446     entry += entry_size;
1447     // TODO Give up after N iterations?
1448     if (inlined_type(*entry, m_large)) continue;
1449     val_end = read_offset_or_size(entry + 1, m_large);
1450     if (val_end > m_length) return false;
1451     break;
1452   }
1453 
1454   if (i == m_element_count) {
1455     /*
1456       There are no non-inlined values behind the one we are updating,
1457       so we can use the rest of the space allocated for the array or
1458       object.
1459     */
1460     val_end = m_length;
1461   }
1462 
1463   if (!inlined && val_end - val_start >= needed) {
1464     *offset = val_start;
1465     return true;
1466   }
1467 
1468   /*
1469     Still not enough space. See if there's free space we can use in
1470     front of the original value. We can use space after the end of the
1471     first non-inlined value we find.
1472   */
1473   if (needed > val_end - first_value_offset) return false;
1474   for (i = pos; i > 0; --i) {
1475     size_t elt_start;
1476     size_t elt_end;
1477     bool elt_inlined;
1478     if (element_offsets(i - 1, &elt_start, &elt_end, &elt_inlined))
1479       return false;
1480     if (elt_inlined) continue;
1481     val_start = elt_end;
1482     break;
1483   }
1484 
1485   if (i == 0) {
1486     /*
1487       There are no non-inlined values ahead of the value we are
1488       updating, so we can start right after the value entries.
1489     */
1490     val_start = first_value_offset;
1491   }
1492 
1493   if (val_start >= first_value_offset && val_end <= m_length &&
1494       val_start <= val_end && val_end - val_start >= needed) {
1495     *offset = val_start;
1496     return true;
1497   }
1498 
1499   return false;
1500 }
1501 
1502 /**
1503   Get the offset of the key entry that describes the key of the member at a
1504   given position in this object.
1505 
1506   @param pos   the position of the member
1507   @return the offset of the key entry, relative to the start of the object
1508 */
key_entry_offset(size_t pos) const1509 inline size_t Value::key_entry_offset(size_t pos) const {
1510   DBUG_ASSERT(m_type == OBJECT);
1511   // The first key entry is located right after the two length fields.
1512   return 2 * offset_size(m_large) + key_entry_size(m_large) * pos;
1513 }
1514 
1515 /**
1516   Get the offset of the value entry that describes the element at a
1517   given position in this array or object.
1518 
1519   @param pos  the position of the element
1520   @return the offset of the entry, relative to the start of the array or object
1521 */
value_entry_offset(size_t pos) const1522 inline size_t Value::value_entry_offset(size_t pos) const {
1523   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1524   /*
1525     Value entries come after the two length fields if it's an array, or
1526     after the two length fields and all the key entries if it's an object.
1527   */
1528   size_t first_entry_offset = 2 * offset_size(m_large);
1529   if (m_type == OBJECT)
1530     first_entry_offset += m_element_count * key_entry_size(m_large);
1531 
1532   return first_entry_offset + value_entry_size(m_large) * pos;
1533 }
1534 
1535 #ifdef MYSQL_SERVER
space_needed(const THD * thd,const Json_wrapper * value,bool large,size_t * needed)1536 bool space_needed(const THD *thd, const Json_wrapper *value, bool large,
1537                   size_t *needed) {
1538   if (value->type() == enum_json_type::J_ERROR) {
1539     my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
1540     return true;
1541   }
1542 
1543   // Serialize the value to a temporary buffer to find out how big it is.
1544   StringBuffer<STRING_BUFFER_USUAL_SIZE> buf;
1545   if (value->to_binary(thd, &buf)) return true; /* purecov: inspected */
1546 
1547   DBUG_ASSERT(buf.length() > 1);
1548 
1549   // If the value can be inlined in the value entry, it doesn't need any space.
1550   if (inlined_type(buf[0], large)) {
1551     *needed = 0;
1552     return false;
1553   }
1554 
1555   /*
1556     The first byte in the buffer is the type identifier. We're only
1557     interested in the size of the data portion, so exclude the type byte
1558     from the returned size.
1559   */
1560   *needed = buf.length() - 1;
1561   return false;
1562 }
1563 
1564 /**
1565   Update a value in an array or object. The updated value is written to a
1566   shadow copy. The original array or object is left unchanged, unless the
1567   shadow copy is actually a pointer to the array backing this Value object. It
1568   is assumed that the shadow copy is at least as big as the original document,
1569   and that there is enough space at the given position to hold the new value.
1570 
1571   Typically, if a document is modified multiple times in a single update
1572   statement, the first invocation of update_in_shadow() will have a Value
1573   object that points into the binary data in the Field, and write to a separate
1574   destination buffer. Subsequent updates of the document will have a Value
1575   object that points to the partially updated value in the destination buffer,
1576   and write the new modifications to the same buffer.
1577 
1578   All changes made to the binary value are recorded as binary diffs using
1579   TABLE::add_binary_diff().
1580 
1581   @param field         the column that is updated
1582   @param pos           the element to update
1583   @param new_value     the new value of the element
1584   @param data_offset   where to write the value (offset relative to the
1585                        beginning of the array or object, obtained with
1586                        #has_space) or zero if the value can be inlined
1587   @param data_length   the length of the new value in bytes or zero if
1588                        the value can be inlined
1589   @param original      pointer to the start of the JSON document
1590   @param destination   pointer to the shadow copy of the JSON document
1591                        (it could be the same as @a original, in which case the
1592                        original document will be modified)
1593   @param[out] changed  gets set to true if a change was made to the document,
1594                        or to false if this operation was a no-op
1595   @return false on success, true if an error occurred
1596 
1597   @par Example of partial update
1598 
1599   Given the JSON document [ "abc", "def" ], which is serialized like this in a
1600   JSON column:
1601 
1602       0x02 - type: small JSON array
1603       0x02 - number of elements (low byte)
1604       0x00 - number of elements (high byte)
1605       0x12 - number of bytes (low byte)
1606       0x00 - number of bytes (high byte)
1607       0x0C - type of element 0 (string)
1608       0x0A - offset of element 0 (low byte)
1609       0x00 - offset of element 0 (high byte)
1610       0x0C - type of element 1 (string)
1611       0x0E - offset of element 1 (low byte)
1612       0x00 - offset of element 1 (high byte)
1613       0x03 - length of element 0
1614       'a'
1615       'b'  - content of element 0
1616       'c'
1617       0x03 - length of element 1
1618       'd'
1619       'e'  - content of element 1
1620       'f'
1621 
1622   Let's change element 0 from "abc" to "XY" using the following statement:
1623 
1624       UPDATE t SET j = JSON_SET(j, '$[0]', 'XY')
1625 
1626   Since we're replacing one string with a shorter one, we can just overwrite
1627   the length byte with the new length, and the beginning of the original string
1628   data. Since the original string "abc" is longer than the new string "XY",
1629   we'll have a free byte at the end of the string. This byte is left as is
1630   ('c'). The resulting binary representation looks like this:
1631 
1632               0x02 - type: small JSON array
1633               0x02 - number of elements (low byte)
1634               0x00 - number of elements (high byte)
1635               0x12 - number of bytes (low byte)
1636               0x00 - number of bytes (high byte)
1637               0x0C - type of element 0 (string)
1638               0x0A - offset of element 0 (low byte)
1639               0x00 - offset of element 0 (high byte)
1640               0x0C - type of element 1 (string)
1641               0x0E - offset of element 1 (low byte)
1642               0x00 - offset of element 1 (high byte)
1643       CHANGED 0x02 - length of element 0
1644       CHANGED 'X'
1645       CHANGED 'Y'  - content of element 0
1646       (free)  'c'
1647               0x03 - length of element 1
1648               'd'
1649               'e'  - content of element 1
1650               'f'
1651 
1652   This change will be represented as one binary diff that covers the three
1653   changed bytes.
1654 
1655   Let's now change element 1 from "def" to "XYZW":
1656 
1657       UPDATE t SET j = JSON_SET(j, '$[1]', 'XYZW')
1658 
1659   Since the new string is one byte longer than the original string, we cannot
1660   simply overwrite the old one. But we can reuse the free byte from the
1661   previous update, which is immediately preceding the original value.
1662 
1663   To make use of this, we need to change the offset of element 1 to point to
1664   the free byte. Then we can overwrite the free byte and the original string
1665   data with the new length and string contents. Resulting binary
1666   representation:
1667 
1668               0x02 - type: small JSON array
1669               0x02 - number of elements (low byte)
1670               0x00 - number of elements (high byte)
1671               0x12 - number of bytes (low byte)
1672               0x00 - number of bytes (high byte)
1673               0x0C - type of element 0 (string)
1674               0x0A - offset of element 0 (low byte)
1675               0x00 - offset of element 0 (high byte)
1676               0x0C - type of element 1 (string)
1677       CHANGED 0x0D - offset of element 1 (low byte)
1678               0x00 - offset of element 1 (high byte)
1679               0x02 - length of element 0
1680               'X'  - content of element 0
1681               'Y'  - content of element 0
1682       CHANGED 0x04 - length of element 1
1683       CHANGED 'X'
1684       CHANGED 'Y'
1685       CHANGED 'Z'  - content of element 1
1686       CHANGED 'W'
1687 
1688   This change will be represented as two binary diffs. One diff for changing
1689   the offset, and one for changing the contents of the string.
1690 
1691   Then let's replace the string in element 1 with a small number:
1692 
1693       UPDATE t SET j = JSON_SET(j, '$[1]', 456)
1694 
1695   This will change the type of element 1 from string to int16. Such small
1696   numbers are inlined in the value entry, where we normally store the offset of
1697   the value. The offset section of the value entry is therefore changed to hold
1698   the number 456. The length and contents of the original value ("XYZW") are
1699   not touched, but they are now unused and free to be reused. Resulting binary
1700   representation:
1701 
1702               0x02 - type: small JSON array
1703               0x02 - number of elements (low byte)
1704               0x00 - number of elements (high byte)
1705               0x12 - number of bytes (low byte)
1706               0x00 - number of bytes (high byte)
1707               0x0C - type of element 0 (string)
1708               0x0A - offset of element 0 (low byte)
1709               0x00 - offset of element 0 (high byte)
1710       CHANGED 0x05 - type of element 1 (int16)
1711       CHANGED 0xC8 - value of element 1 (low byte)
1712       CHANGED 0x01 - value of element 1 (high byte)
1713               0x02 - length of element 0
1714               'X'  - content of element 0
1715               'Y'  - content of element 0
1716       (free)  0x04 - length of element 1
1717       (free)  'X'
1718       (free)  'Y'
1719       (free)  'Z'  - content of element 1
1720       (free)  'W'
1721 
1722   The change is represented as one binary diff that changes the value entry
1723   (type and inlined value).
1724 */
update_in_shadow(const Field_json * field,size_t pos,Json_wrapper * new_value,size_t data_offset,size_t data_length,const char * original,char * destination,bool * changed) const1725 bool Value::update_in_shadow(const Field_json *field, size_t pos,
1726                              Json_wrapper *new_value, size_t data_offset,
1727                              size_t data_length, const char *original,
1728                              char *destination, bool *changed) const {
1729   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1730 
1731   const bool inlined = (data_length == 0);
1732 
1733   // Assume no changes. Update the flag when the document is actually changed.
1734   *changed = false;
1735 
1736   /*
1737     Create a buffer large enough to hold the new value entry. (Plus one since
1738     some String functions insist on adding a terminating '\0'.)
1739   */
1740   StringBuffer<VALUE_ENTRY_SIZE_LARGE + 1> new_entry;
1741 
1742   if (inlined) {
1743     new_entry.length(value_entry_size(m_large));
1744     Json_dom *dom = new_value->to_dom(field->table->in_use);
1745     if (dom == nullptr) return true; /* purecov: inspected */
1746     attempt_inline_value(dom, &new_entry, 0, m_large);
1747   } else {
1748     new_entry.append('\0');  // type, to be filled in later
1749     append_offset_or_size(&new_entry, data_offset, m_large);
1750 
1751     const char *value = m_data + data_offset;
1752     const size_t value_offset = value - original;
1753     char *value_dest = destination + value_offset;
1754 
1755     StringBuffer<STRING_BUFFER_USUAL_SIZE> buffer;
1756     if (new_value->to_binary(field->table->in_use, &buffer))
1757       return true; /* purecov: inspected */
1758 
1759     DBUG_ASSERT(buffer.length() > 1);
1760 
1761     // The first byte is the type byte, which should be in the value entry.
1762     new_entry[0] = buffer[0];
1763 
1764     /*
1765       Create another diff for the changed data, but only if the new data is
1766       actually different from the old data.
1767     */
1768     const size_t length = buffer.length() - 1;
1769     DBUG_ASSERT(length == data_length);
1770     if (memcmp(value_dest, buffer.ptr() + 1, length) != 0) {
1771       memcpy(value_dest, buffer.ptr() + 1, length);
1772       if (field->table->add_binary_diff(field, value_offset, length))
1773         return true; /* purecov: inspected */
1774       *changed = true;
1775     }
1776   }
1777 
1778   DBUG_ASSERT(new_entry.length() == value_entry_size(m_large));
1779 
1780   /*
1781     Type and offset will often be unchanged. Don't create a change
1782     record unless they have actually changed.
1783   */
1784   const char *const entry = m_data + value_entry_offset(pos);
1785   if (memcmp(entry, new_entry.ptr(), new_entry.length()) != 0) {
1786     const size_t entry_offset = entry - original;
1787     memcpy(destination + entry_offset, new_entry.ptr(), new_entry.length());
1788     if (field->table->add_binary_diff(field, entry_offset, new_entry.length()))
1789       return true; /* purecov: inspected */
1790     *changed = true;
1791   }
1792 
1793   return false;
1794 }
1795 
1796 /**
1797   Remove a value from an array or object. The updated JSON document is written
1798   to a shadow copy. The original document is left unchanged, unless the shadow
1799   copy is actually a pointer to the array backing this Value object. It is
1800   assumed that the shadow copy is at least as big as the original document, and
1801   that there is enough space at the given position to hold the new value.
1802 
1803   Typically, if a document is modified multiple times in a single update
1804   statement, the first invocation of remove_in_shadow() will have a Value
1805   object that points into the binary data in the Field, and write to a separate
1806   destination buffer. Subsequent updates of the document will have a Value
1807   object that points to the partially updated value in the destination buffer,
1808   and write the new modifications to the same buffer.
1809 
1810   All changes made to the binary value are recorded as binary diffs using
1811   TABLE::add_binary_diff().
1812 
1813   @param field         the column that is updated
1814   @param pos           the element to remove
1815   @param original      pointer to the start of the JSON document
1816   @param destination   pointer to the shadow copy of the JSON document
1817                        (it could be the same as @a original, in which case the
1818                        original document will be modified)
1819   @return false on success, true if an error occurred
1820 
1821   @par Example of partial update
1822 
1823   Take the JSON document { "a": "x", "b": "y", "c": "z" }, whose serialized
1824   representation looks like the following:
1825 
1826               0x00 - type: JSONB_TYPE_SMALL_OBJECT
1827               0x03 - number of elements (low byte)
1828               0x00 - number of elements (high byte)
1829               0x22 - number of bytes (low byte)
1830               0x00 - number of bytes (high byte)
1831               0x19 - offset of key "a" (high byte)
1832               0x00 - offset of key "a" (low byte)
1833               0x01 - length of key "a" (high byte)
1834               0x00 - length of key "a" (low byte)
1835               0x1a - offset of key "b" (high byte)
1836               0x00 - offset of key "b" (low byte)
1837               0x01 - length of key "b" (high byte)
1838               0x00 - length of key "b" (low byte)
1839               0x1b - offset of key "c" (high byte)
1840               0x00 - offset of key "c" (low byte)
1841               0x01 - length of key "c" (high byte)
1842               0x00 - length of key "c" (low byte)
1843               0x0c - type of value "a": JSONB_TYPE_STRING
1844               0x1c - offset of value "a" (high byte)
1845               0x00 - offset of value "a" (low byte)
1846               0x0c - type of value "b": JSONB_TYPE_STRING
1847               0x1e - offset of value "b" (high byte)
1848               0x00 - offset of value "b" (low byte)
1849               0x0c - type of value "c": JSONB_TYPE_STRING
1850               0x20 - offset of value "c" (high byte)
1851               0x00 - offset of value "c" (low byte)
1852               0x61 - first key  ('a')
1853               0x62 - second key ('b')
1854               0x63 - third key  ('c')
1855               0x01 - length of value "a"
1856               0x78 - contents of value "a" ('x')
1857               0x01 - length of value "b"
1858               0x79 - contents of value "b" ('y')
1859               0x01 - length of value "c"
1860               0x7a - contents of value "c" ('z')
1861 
1862   We remove the member with name 'b' from the document, using a statement such
1863   as:
1864 
1865       UPDATE t SET j = JSON_REMOVE(j, '$.b')
1866 
1867   This function will then remove the element by moving the key entries and
1868   value entries that follow the removed member so that they overwrite the
1869   existing entries, and the element count is decremented.
1870 
1871   The resulting binary document will look like this:
1872 
1873               0x00 - type: JSONB_TYPE_SMALL_OBJECT
1874       CHANGED 0x02 - number of elements (low byte)
1875               0x00 - number of elements (high byte)
1876               0x22 - number of bytes (low byte)
1877               0x00 - number of bytes (high byte)
1878               0x19 - offset of key "a" (high byte)
1879               0x00 - offset of key "a" (low byte)
1880               0x01 - length of key "a" (high byte)
1881               0x00 - length of key "a" (low byte)
1882       CHANGED 0x1b - offset of key "c" (high byte)
1883       CHANGED 0x00 - offset of key "c" (low byte)
1884       CHANGED 0x01 - length of key "c" (high byte)
1885       CHANGED 0x00 - length of key "c" (low byte)
1886       CHANGED 0x0c - type of value "a": JSONB_TYPE_STRING
1887       CHANGED 0x1c - offset of value "a" (high byte)
1888       CHANGED 0x00 - offset of value "a" (low byte)
1889       CHANGED 0x0c - type of value "c": JSONB_TYPE_STRING
1890       CHANGED 0x20 - offset of value "c" (high byte)
1891       CHANGED 0x00 - offset of value "c" (low byte)
1892       (free)  0x00
1893       (free)  0x0c
1894       (free)  0x1e
1895       (free)  0x00
1896       (free)  0x0c
1897       (free)  0x20
1898       (free)  0x00
1899               0x61 - first key  ('a')
1900       (free)  0x62
1901               0x63 - third key  ('c')
1902               0x01 - length of value "a"
1903               0x78 - contents of value "a" ('x')
1904       (free)  0x01
1905       (free)  0x79
1906               0x01 - length of value "c"
1907               0x7a - contents of value "c" ('z')
1908 
1909   Two binary diffs will be created. One diff changes the element count, and one
1910   diff changes the key and value entries.
1911 */
remove_in_shadow(const Field_json * field,size_t pos,const char * original,char * destination) const1912 bool Value::remove_in_shadow(const Field_json *field, size_t pos,
1913                              const char *original, char *destination) const {
1914   DBUG_ASSERT(m_type == ARRAY || m_type == OBJECT);
1915 
1916   const char *value_entry = m_data + value_entry_offset(pos);
1917   const char *next_value_entry = value_entry + value_entry_size(m_large);
1918 
1919   /*
1920     If it's an object, we first remove the key entry by shifting all subsequent
1921     key entries to the left, and also all value entries up to the one that's
1922     being removed.
1923   */
1924   if (m_type == OBJECT) {
1925     const char *key_entry = m_data + key_entry_offset(pos);
1926     const char *next_key_entry = key_entry + key_entry_size(m_large);
1927     size_t len = value_entry - next_key_entry;
1928     memmove(destination + (key_entry - original), next_key_entry, len);
1929     if (field->table->add_binary_diff(field, key_entry - original, len))
1930       return true; /* purecov: inspected */
1931 
1932     /*
1933       Adjust the destination of the value entry to account for the removed key
1934       entry.
1935     */
1936     value_entry -= key_entry_size(m_large);
1937   }
1938 
1939   /*
1940     Next, remove the value entry by shifting all subsequent value entries to
1941     the left.
1942   */
1943   const char *value_entry_end = m_data + value_entry_offset(m_element_count);
1944   size_t len = value_entry_end - next_value_entry;
1945   memmove(destination + (value_entry - original), next_value_entry, len);
1946   if (field->table->add_binary_diff(field, value_entry - original, len))
1947     return true; /* purecov: inspected */
1948 
1949   /*
1950     Finally, update the element count.
1951   */
1952   write_offset_or_size(destination + (m_data - original), m_element_count - 1,
1953                        m_large);
1954   return field->table->add_binary_diff(field, m_data - original,
1955                                        offset_size(m_large));
1956 }
1957 
1958 /**
1959   Get the amount of unused space in the binary representation of this value.
1960 
1961   @param      thd    THD handle
1962   @param[out] space  the amount of free space
1963   @return false on success, true on error
1964 */
get_free_space(const THD * thd,size_t * space) const1965 bool Value::get_free_space(const THD *thd, size_t *space) const {
1966   *space = 0;
1967 
1968   switch (m_type) {
1969     case ARRAY:
1970     case OBJECT:
1971       break;
1972     default:
1973       // Scalars don't have any holes, so return immediately.
1974       return false;
1975   }
1976 
1977   if (m_type == OBJECT) {
1978     // The first key should come right after the last value entry.
1979     const char *next_key = m_data + value_entry_offset(m_element_count);
1980 
1981     // Sum up all unused space between keys.
1982     for (size_t i = 0; i < m_element_count; ++i) {
1983       Value key = this->key(i);
1984       if (key.type() == ERROR) {
1985         my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
1986         return true;
1987       }
1988       *space += key.get_data() - next_key;
1989       next_key = key.get_data() + key.get_data_length();
1990     }
1991   }
1992 
1993   size_t next_value_offset;
1994   if (first_value_offset(&next_value_offset)) {
1995     my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
1996     return true;
1997   }
1998 
1999   // Find the "holes" between and inside each element in the array or object.
2000   for (size_t i = 0; i < m_element_count; ++i) {
2001     size_t elt_start;
2002     size_t elt_end;
2003     bool inlined;
2004     if (element_offsets(i, &elt_start, &elt_end, &inlined)) {
2005       my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
2006       return true;
2007     }
2008 
2009     if (inlined) continue;
2010 
2011     if (elt_start < next_value_offset || elt_end > m_length) {
2012       my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
2013       return true;
2014     }
2015 
2016     *space += elt_start - next_value_offset;
2017     next_value_offset = elt_end;
2018 
2019     Value elt = element(i);
2020     switch (elt.type()) {
2021       case ARRAY:
2022       case OBJECT: {
2023         // Recursively process nested arrays or objects.
2024         if (check_stack_overrun(thd, STACK_MIN_SIZE, nullptr))
2025           return true; /* purecov: inspected */
2026         size_t elt_space;
2027         if (elt.get_free_space(thd, &elt_space)) return true;
2028         *space += elt_space;
2029         break;
2030       }
2031       case ERROR:
2032         /* purecov: begin inspected */
2033         my_error(ER_INVALID_JSON_BINARY_DATA, MYF(0));
2034         return true;
2035         /* purecov: end */
2036       default:
2037         break;
2038     }
2039   }
2040 
2041   *space += m_length - next_value_offset;
2042   return false;
2043 }
2044 
2045 /**
2046   Check whether two binary JSON scalars are equal. This function is used by
2047   multi-valued index updating code. Unlike JSON comparator implemented in
2048   server, this code doesn't treat numeric types as the same, e.g. int 1 and
2049   uint 1 won't be treated as equal. This is fine as the mv index updating code
2050   compares old and new values of the same typed array field, i.e. all values
2051   being compared have the same type.
2052 
2053   Since MV index doesn't support indexing of arrays/objects in arrays, these
2054   two aren't supported and cause assert.
2055 */
2056 
eq(const Value & val) const2057 int Value::eq(const Value &val) const {
2058   DBUG_ASSERT(is_valid() && val.is_valid());
2059 
2060   if (type() != val.type()) {
2061     return type() < val.type() ? -1 : 1;
2062   }
2063   switch (m_type) {
2064     case OBJECT:
2065     case ARRAY:
2066       DBUG_ASSERT(0);
2067       return -1;
2068     case OPAQUE:
2069       if (m_field_type != val.m_field_type)
2070         return m_field_type < val.m_field_type ? -1 : 1;
2071       /* Fall through */
2072     case STRING: {
2073       uint cmp_length = std::min(get_data_length(), val.get_data_length());
2074       int res;
2075       if (!(res = memcmp(get_data(), val.get_data(), cmp_length)))
2076         return (get_data_length() < val.get_data_length())
2077                    ? -1
2078                    : ((get_data_length() == val.get_data_length()) ? 0 : 1);
2079       return res;
2080     }
2081     case INT:
2082     case UINT:
2083       return (m_int_value == val.m_int_value)
2084                  ? 0
2085                  : ((m_int_value < val.m_int_value) ? -1 : 1);
2086     case DOUBLE:
2087       return (m_double_value == val.m_double_value)
2088                  ? 0
2089                  : ((m_double_value < val.m_double_value) ? -1 : 1);
2090     case LITERAL_NULL:
2091     case LITERAL_TRUE:
2092     case LITERAL_FALSE:
2093       return 0;
2094     default:
2095       DBUG_ASSERT(0);  // Shouldn't happen
2096       break;
2097   }
2098   return -1;
2099 }
2100 #endif  // ifdef MYSQL_SERVER
2101 
2102 }  // end namespace json_binary
2103