1 /*
2   Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
3   Copyright (c) 2020 MariaDB Foundation
4 
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; version 2 of the License.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
18 
19 #include "mysql_json.h"
20 #include "my_global.h"
21 #include "compat56.h"
22 #include "my_decimal.h"
23 #include "sql_time.h"
24 
TIME_from_longlong_date_packed(MYSQL_TIME * ltime,longlong tmp)25 static void TIME_from_longlong_date_packed(MYSQL_TIME *ltime, longlong tmp)
26 {
27   TIME_from_longlong_datetime_packed(ltime, tmp);
28   ltime->time_type= MYSQL_TIMESTAMP_DATE;
29 }
30 
31 
32 /*
33   Json values in MySQL comprises the standard set of JSON values plus a MySQL
34   specific set. A JSON number type is subdivided into int, uint, double and
35   decimal.
36 
37   MySQL also adds four built-in date/time values: date, time, datetime and
38   timestamp. An additional opaque value can store any other MySQL type.
39 */
40 
41 enum JSONB_LITERAL_TYPES {
42   JSONB_NULL_LITERAL=      0x0,
43   JSONB_TRUE_LITERAL=      0x1,
44   JSONB_FALSE_LITERAL=     0x2,
45 };
46 
47 /*
48   The size of offset or size fields in the small and the large storage
49   format for JSON objects and JSON arrays.
50 */
51 static const uchar SMALL_OFFSET_SIZE= 2;
52 static const uchar LARGE_OFFSET_SIZE= 4;
53 
54 /*
55   The size of key entries for objects when using the small storage
56   format or the large storage format. In the small format it is 4
57   bytes (2 bytes for key length and 2 bytes for key offset). In the
58   large format it is 6 (2 bytes for length, 4 bytes for offset).
59 */
60 static const uchar KEY_ENTRY_SIZE_SMALL= (2 + SMALL_OFFSET_SIZE);
61 static const uchar KEY_ENTRY_SIZE_LARGE= (2 + LARGE_OFFSET_SIZE);
62 
63 /*
64   The size of value entries for objects or arrays. When using the
65   small storage format, the entry size is 3 (1 byte for type, 2 bytes
66   for offset). When using the large storage format, it is 5 (1 byte
67   for type, 4 bytes for offset).
68 */
69 static const uchar VALUE_ENTRY_SIZE_SMALL= (1 + SMALL_OFFSET_SIZE);
70 static const uchar VALUE_ENTRY_SIZE_LARGE= (1 + LARGE_OFFSET_SIZE);
71 
72 /* The maximum number of nesting levels allowed in a JSON document. */
73 static const uchar JSON_DOCUMENT_MAX_DEPTH= 150;
74 
75 /**
76   Read an offset or size field from a buffer. The offset could be either
77   a two byte unsigned integer or a four byte unsigned integer.
78 
79   @param data  the buffer to read from
80   @param large tells if the large or small storage format is used; true
81                means read four bytes, false means read two bytes
82 */
read_offset_or_size(const uchar * data,bool large)83 static inline size_t read_offset_or_size(const uchar *data, bool large)
84 {
85   return large ? uint4korr(data) : uint2korr(data);
86 }
87 
key_size(bool large)88 static inline size_t key_size(bool large)
89 {
90   return large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
91 }
92 
value_size(bool large)93 static inline size_t value_size(bool large)
94 {
95   return large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
96 }
97 
98 /**
99   Inlined values are a space optimization. The actual value is stored
100   instead of the offset pointer to the location where a non-inlined
101   value would be located.
102 
103   @param[in] type   The type to check.
104   @param[in] large tells if the large or small storage format is used;
105 */
type_is_stored_inline(JSONB_TYPES type,bool large)106 static inline bool type_is_stored_inline(JSONB_TYPES type, bool large)
107 {
108   return (type == JSONB_TYPE_INT16 ||
109           type == JSONB_TYPE_UINT16 ||
110           type == JSONB_TYPE_LITERAL ||
111           (large && (type == JSONB_TYPE_INT32 ||
112                      type == JSONB_TYPE_UINT32)));
113 }
114 
115 /**
116   Read a variable length integer. A variable length integer uses the 8th bit in
117   each byte to mark if there are more bytes needed to store the integer. The
118   other 7 bits in the byte are used to store the actual integer's bits.
119 
120   @param[in]  data         the buffer to read from
121   @param[in]  data_length  the maximum number of bytes to read from data
122   @param[out] length       the length that was read
123   @param[out] num          the number of bytes needed to represent the length
124   @return  false on success, true on error
125 */
read_variable_length(const uchar * data,size_t data_length,size_t * length,size_t * num)126 static inline bool read_variable_length(const uchar *data, size_t data_length,
127                                         size_t *length, size_t *num)
128 {
129   /*
130     It takes five bytes to represent UINT_MAX32, which is the largest
131     supported length, so don't look any further.
132 
133     Use data_length as max value to prevent segfault when reading a corrupted
134     JSON document.
135   */
136   const size_t MAX_BYTES= MY_MIN(data_length, 5);
137   size_t len= 0;
138   for (size_t i= 0; i < MAX_BYTES; i++)
139   {
140     /* Get the next 7 bits of the length. */
141     len|= (data[i] & 0x7f) << (7 * i);
142     if ((data[i] & 0x80) == 0)
143     {
144       /* The length shouldn't exceed 32 bits. */
145       if (len > UINT_MAX32)
146         return true;
147 
148       /* This was the last byte. Return successfully. */
149       *num= i + 1;
150       *length= len;
151       return false;
152     }
153   }
154 
155   /* No more available bytes. Return true to signal error. This implies a
156      corrupted JSON document. */
157   return true;
158 }
159 
160 /**
161    JSON formatting in MySQL escapes a few special characters to prevent
162    ambiguity.
163 */
append_string_json(String * buffer,const uchar * data,size_t len)164 static bool append_string_json(String *buffer, const uchar *data, size_t len)
165 {
166   const uchar *last= data + len;
167   for (; data < last; data++)
168   {
169     const uchar c= *data;
170     switch (c) {
171     case '\\':
172       buffer->append("\\\\");
173       break;
174     case '\n':
175       buffer->append("\\n");
176       break;
177     case '\r':
178       buffer->append("\\r");
179       break;
180     case '"':
181       buffer->append("\\\"");
182       break;
183     case '\b':
184       buffer->append("\\b");
185       break;
186     case '\f':
187       buffer->append("\\f");
188       break;
189     case '\t':
190       buffer->append("\\t");
191       break;
192     default:
193       buffer->append(c);
194       break;
195     }
196   }
197   return false;
198 }
199 
200 /*
201   Function used for JSON_OPAQUE type.
202 */
print_mysql_datetime_value(String * buffer,enum_field_types type,const uchar * data,size_t len)203 static bool print_mysql_datetime_value(String *buffer, enum_field_types type,
204                                        const uchar *data, size_t len)
205 {
206   if (len < 8)
207     return true;
208 
209   MYSQL_TIME t;
210   switch (type)
211   {
212     case MYSQL_TYPE_TIME:
213       TIME_from_longlong_time_packed(&t, sint8korr(data));
214       break;
215     case MYSQL_TYPE_DATE:
216       TIME_from_longlong_date_packed(&t, sint8korr(data));
217       break;
218     case MYSQL_TYPE_DATETIME:
219     case MYSQL_TYPE_TIMESTAMP:
220       TIME_from_longlong_datetime_packed(&t, sint8korr(data));
221       break;
222     default:
223       DBUG_ASSERT(0);
224       return true;
225   }
226   /* Wrap all datetime strings within double quotes. */
227   buffer->append('\"');
228   buffer->reserve(MAX_DATE_STRING_REP_LENGTH);
229   buffer->length(buffer->length() +
230                  my_TIME_to_str(&t, const_cast<char *>(buffer->end()), 6));
231   buffer->append('\"');
232   return false;
233 }
234 
parse_mysql_scalar(String * buffer,size_t value_json_type,const uchar * data,size_t len)235 static bool parse_mysql_scalar(String *buffer, size_t value_json_type,
236                                const uchar *data, size_t len)
237 {
238   switch (value_json_type) {
239   case JSONB_TYPE_LITERAL:
240   {
241     if (len < 1)
242       return true;
243     switch (static_cast<JSONB_LITERAL_TYPES>(*data)) {
244     case JSONB_NULL_LITERAL:
245       return buffer->append("null");
246     case JSONB_TRUE_LITERAL:
247       return buffer->append("true");
248     case JSONB_FALSE_LITERAL:
249       return buffer->append("false");
250     default: /* Invalid literal constant, malformed JSON. */
251       return true;
252     }
253   }
254   case JSONB_TYPE_INT16:
255     return len < 2 || buffer->append_longlong(sint2korr(data));
256   case JSONB_TYPE_INT32:
257     return len < 4 || buffer->append_longlong(sint4korr(data));
258   case JSONB_TYPE_INT64:
259     return len < 8 || buffer->append_longlong(sint8korr(data));
260   case JSONB_TYPE_UINT16:
261     return len < 2 || buffer->append_ulonglong(uint2korr(data));
262   case JSONB_TYPE_UINT32:
263     return len < 4 || buffer->append_ulonglong(uint4korr(data));
264   case JSONB_TYPE_UINT64:
265     return len < 8 || buffer->append_ulonglong(uint8korr(data));
266   case JSONB_TYPE_DOUBLE:
267     if (len < 8)
268       return true;
269     buffer->reserve(FLOATING_POINT_BUFFER, 2 * FLOATING_POINT_BUFFER);
270     buffer->qs_append(reinterpret_cast<const double *>(data));
271     return false;
272   case JSONB_TYPE_STRING:
273   {
274     size_t string_length, store_bytes;
275 
276     return read_variable_length(data, len, &string_length, &store_bytes) ||
277            len < store_bytes + string_length ||
278            buffer->append('"') ||
279            append_string_json(buffer, data + store_bytes, string_length) ||
280            buffer->append('"');
281   }
282   case JSONB_TYPE_OPAQUE:
283   {
284     /* The field_type maps directly to enum_field_types. */
285     const uchar type_value= *data;
286     const enum_field_types field_type= static_cast<enum_field_types>(type_value);
287 
288     size_t UNINIT_VAR(blob_length), length_bytes;
289     const uchar *blob_start;
290 
291     if (read_variable_length(data + 1, len, &blob_length, &length_bytes) ||
292         len < length_bytes + blob_length)
293       return true;
294     blob_start= data + length_bytes + 1;
295 
296     switch (field_type) {
297     case MYSQL_TYPE_TIME:
298     case MYSQL_TYPE_DATE:
299     case MYSQL_TYPE_DATETIME:
300     case MYSQL_TYPE_TIMESTAMP:
301       return print_mysql_datetime_value(buffer, field_type,
302                                         blob_start, blob_length);
303     case MYSQL_TYPE_NEWDECIMAL:
304     {
305       /* Expect at least two bytes, which contain precision and scale. */
306       if (blob_length < 2)
307         return true;
308 
309       const int precision= blob_start[0];
310       const int scale= blob_start[1];
311 
312       my_decimal d;
313 
314       /* The decimal value is encoded after the two prec/scale bytes. */
315       const size_t dec_size= my_decimal_get_binary_size(precision, scale);
316       if (dec_size != blob_length - 2 ||
317           binary2my_decimal(E_DEC_ERROR,
318                             reinterpret_cast<const uchar *>(blob_start + 2),
319                             &d, precision, scale) != E_DEC_OK)
320         return true;
321 
322       if (d.to_string_native(buffer, 0, 0, ' ', E_DEC_ERROR) != E_DEC_OK)
323         return true;
324       return false;
325     }
326     default:
327     {
328       /* Any other MySQL type is presented as a base64 encoded string. */
329       if (buffer->append("\"base64:type") ||
330           buffer->append_longlong(field_type) ||
331           buffer->append(':'))
332         return true;
333 
334       const size_t needed= my_base64_needed_encoded_length(
335           static_cast<int>(blob_length));
336       if (buffer->reserve(needed) ||
337           my_base64_encode(blob_start, blob_length,
338                            const_cast<char*>(buffer->end())))
339         return true;
340       /* -1 to override the null terminator from my_base64_encode */
341       DBUG_ASSERT(*(buffer->end() + needed) == '\0');
342       buffer->length(buffer->length() + needed - 1);
343       return buffer->append('"');
344     }
345     }
346   }
347   default:
348     return true;
349   }
350 }
351 
352 
353 /**
354   Read a value from a JSON Object or Array, given the position of it.
355   This function handles both inlined values as well as values stored at
356   an offset.
357 
358   @param[out] buffer            Where to print the results.
359   @param[in] data               The raw binary data of the Object or Array.
360   @param[in] len                The length of the binary data.
361   @param[in] value_type_offset  Where the type of the value is stored.
362   @param[in] large              true if the large storage format is used;
363   @param[in] depth              How deep the JSON object is in the hierarchy.
364 */
parse_mysql_scalar_or_value(String * buffer,const uchar * data,size_t len,size_t value_type_offset,bool large,size_t depth)365 static bool parse_mysql_scalar_or_value(String *buffer, const uchar *data,
366                                         size_t len, size_t value_type_offset,
367                                         bool large, size_t depth)
368 {
369   /* Get the type of the value stored at the key. */
370   const JSONB_TYPES value_type=
371     static_cast<JSONB_TYPES>(data[value_type_offset]);
372 
373   if (type_is_stored_inline(value_type, large))
374   {
375     const size_t value_start = value_type_offset + 1;
376     if (parse_mysql_scalar(buffer, value_type, data + value_start,
377                            len - value_start))
378       return true;
379   }
380   else
381   {
382     /* The offset to where the value is stored is relative to the start
383        of the Object / Array */
384     const size_t value_start= read_offset_or_size(
385                                       data + value_type_offset + 1, large);
386     if (parse_mysql_json_value(buffer, value_type, data + value_start,
387                                len - value_start, depth))
388       return true;
389   }
390   return false;
391 }
392 
parse_array_or_object(String * buffer,const uchar * data,size_t len,bool handle_as_object,bool large,size_t depth)393 static bool parse_array_or_object(String *buffer, const uchar *data, size_t len,
394                                   bool handle_as_object, bool large,
395                                   size_t depth)
396 {
397   if (++depth > JSON_DOCUMENT_MAX_DEPTH)
398     return true;
399 
400   /*
401     Make sure the document is long enough to contain the two length fields
402     (both number of elements or members, and number of bytes).
403   */
404   const size_t offset_size= large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
405   /* The length has to be at least double offset size (header). */
406   if (len < 2 * offset_size)
407     return true;
408 
409 
410   /*
411      Every JSON Object or Array contains two numbers in the header:
412      - The number of elements in the Object / Array (Keys)
413      - The total number of bytes occupied by the JSON Object / Array, including
414        the two numbers in the header.
415      Depending on the Object / Array type (small / large) the numbers are stored
416      in 2 bytes or 4 bytes each.
417   */
418   const size_t element_count= read_offset_or_size(data, large);
419   const size_t bytes= read_offset_or_size(data + offset_size, large);
420 
421   /* The value can't have more bytes than what's available in the buffer. */
422   if (bytes > len)
423     return true;
424 
425   if (buffer->append(handle_as_object ? '{' : '['))
426     return true;
427 
428 
429   for (size_t i= 0; i < element_count; i++)
430   {
431     if (handle_as_object)
432     {
433       /*
434         The JSON Object is stored as a header part and a data part.
435         Header consists of:
436         - two length fields,
437         - an array of pointers to keys.
438         - an array of tuples (type, pointer to values)
439           * For certain types, the pointer to values is replaced by the actual
440             value. (see type_is_stored_inline)
441         Data consists of:
442         - All Key data, in order
443         - All Value data, in order
444       */
445       const size_t key_offset= 2 * offset_size + i * key_size(large);
446       const size_t key_start= read_offset_or_size(data + key_offset, large);
447       /* The length of keys is always stored in 2 bytes (large == false) */
448       const size_t key_len= read_offset_or_size(
449                                    data + key_offset + offset_size, false);
450 
451       const size_t value_type_offset=(2 * offset_size +
452                                       element_count * key_size(large) +
453                                       i * value_size(large));
454 
455       /* First print the key. */
456       if (buffer->append('"') ||
457           append_string_json(buffer, data + key_start, key_len) ||
458           buffer->append("\": "))
459       {
460         return true;
461       }
462 
463       /* Then print the value. */
464       if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset,
465                                       large, depth))
466         return true;
467     }
468     else
469     {
470       /*
471          Arrays do not have the keys vector and its associated data.
472          We jump straight to reading values.
473       */
474       const size_t value_type_offset= 2 * offset_size + value_size(large) * i;
475 
476       if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset,
477                                       large, depth))
478         return true;
479     }
480 
481     if (i != element_count - 1 && buffer->append(", "))
482       return true;
483   }
484 
485   return buffer->append(handle_as_object ? '}' : ']');
486 }
487 
488 /**
489   Check the first byte of data which is the enum structure and based on it
490   perform parsing of object or array where each can have small or large
491   representation.
492 
493   @param[out] buffer            Where to print the results.
494   @param[in] type               Type of value {object, array, scalar}.
495   @param[in] data               Raw data for parsing.
496   @param[in] length             Length of data.
497   @param[in] depth              Depth size.
498 */
parse_mysql_json_value(String * buffer,JSONB_TYPES type,const uchar * data,size_t len,size_t depth)499 bool parse_mysql_json_value(String *buffer, JSONB_TYPES type, const uchar *data,
500                             size_t len, size_t depth)
501 {
502   const bool IS_OBJECT=true, IS_LARGE=true;
503   switch (type) {
504   case JSONB_TYPE_SMALL_OBJECT:
505     return parse_array_or_object(buffer, data, len, IS_OBJECT, !IS_LARGE, depth);
506   case JSONB_TYPE_LARGE_OBJECT:
507     return parse_array_or_object(buffer, data, len, IS_OBJECT, IS_LARGE, depth);
508   case JSONB_TYPE_SMALL_ARRAY:
509     return parse_array_or_object(buffer, data, len, !IS_OBJECT, !IS_LARGE, depth);
510   case JSONB_TYPE_LARGE_ARRAY:
511     return parse_array_or_object(buffer, data, len, !IS_OBJECT, IS_LARGE, depth);
512   default:
513     return parse_mysql_scalar(buffer, type, data, len);
514   }
515 }
516