1 /*
2 Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
3 Copyright (c) 2020 MariaDB Foundation
4
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
18
19 #include "mysql_json.h"
20 #include "my_global.h"
21 #include "compat56.h"
22 #include "my_decimal.h"
23 #include "sql_time.h"
24
TIME_from_longlong_date_packed(MYSQL_TIME * ltime,longlong tmp)25 static void TIME_from_longlong_date_packed(MYSQL_TIME *ltime, longlong tmp)
26 {
27 TIME_from_longlong_datetime_packed(ltime, tmp);
28 ltime->time_type= MYSQL_TIMESTAMP_DATE;
29 }
30
31
32 /*
33 Json values in MySQL comprises the standard set of JSON values plus a MySQL
34 specific set. A JSON number type is subdivided into int, uint, double and
35 decimal.
36
37 MySQL also adds four built-in date/time values: date, time, datetime and
38 timestamp. An additional opaque value can store any other MySQL type.
39 */
40
41 enum JSONB_LITERAL_TYPES {
42 JSONB_NULL_LITERAL= 0x0,
43 JSONB_TRUE_LITERAL= 0x1,
44 JSONB_FALSE_LITERAL= 0x2,
45 };
46
47 /*
48 The size of offset or size fields in the small and the large storage
49 format for JSON objects and JSON arrays.
50 */
51 static const uchar SMALL_OFFSET_SIZE= 2;
52 static const uchar LARGE_OFFSET_SIZE= 4;
53
54 /*
55 The size of key entries for objects when using the small storage
56 format or the large storage format. In the small format it is 4
57 bytes (2 bytes for key length and 2 bytes for key offset). In the
58 large format it is 6 (2 bytes for length, 4 bytes for offset).
59 */
60 static const uchar KEY_ENTRY_SIZE_SMALL= (2 + SMALL_OFFSET_SIZE);
61 static const uchar KEY_ENTRY_SIZE_LARGE= (2 + LARGE_OFFSET_SIZE);
62
63 /*
64 The size of value entries for objects or arrays. When using the
65 small storage format, the entry size is 3 (1 byte for type, 2 bytes
66 for offset). When using the large storage format, it is 5 (1 byte
67 for type, 4 bytes for offset).
68 */
69 static const uchar VALUE_ENTRY_SIZE_SMALL= (1 + SMALL_OFFSET_SIZE);
70 static const uchar VALUE_ENTRY_SIZE_LARGE= (1 + LARGE_OFFSET_SIZE);
71
72 /* The maximum number of nesting levels allowed in a JSON document. */
73 static const uchar JSON_DOCUMENT_MAX_DEPTH= 150;
74
75 /**
76 Read an offset or size field from a buffer. The offset could be either
77 a two byte unsigned integer or a four byte unsigned integer.
78
79 @param data the buffer to read from
80 @param large tells if the large or small storage format is used; true
81 means read four bytes, false means read two bytes
82 */
read_offset_or_size(const uchar * data,bool large)83 static inline size_t read_offset_or_size(const uchar *data, bool large)
84 {
85 return large ? uint4korr(data) : uint2korr(data);
86 }
87
key_size(bool large)88 static inline size_t key_size(bool large)
89 {
90 return large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
91 }
92
value_size(bool large)93 static inline size_t value_size(bool large)
94 {
95 return large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
96 }
97
98 /**
99 Inlined values are a space optimization. The actual value is stored
100 instead of the offset pointer to the location where a non-inlined
101 value would be located.
102
103 @param[in] type The type to check.
104 @param[in] large tells if the large or small storage format is used;
105 */
type_is_stored_inline(JSONB_TYPES type,bool large)106 static inline bool type_is_stored_inline(JSONB_TYPES type, bool large)
107 {
108 return (type == JSONB_TYPE_INT16 ||
109 type == JSONB_TYPE_UINT16 ||
110 type == JSONB_TYPE_LITERAL ||
111 (large && (type == JSONB_TYPE_INT32 ||
112 type == JSONB_TYPE_UINT32)));
113 }
114
115 /**
116 Read a variable length integer. A variable length integer uses the 8th bit in
117 each byte to mark if there are more bytes needed to store the integer. The
118 other 7 bits in the byte are used to store the actual integer's bits.
119
120 @param[in] data the buffer to read from
121 @param[in] data_length the maximum number of bytes to read from data
122 @param[out] length the length that was read
123 @param[out] num the number of bytes needed to represent the length
124 @return false on success, true on error
125 */
read_variable_length(const uchar * data,size_t data_length,size_t * length,size_t * num)126 static inline bool read_variable_length(const uchar *data, size_t data_length,
127 size_t *length, size_t *num)
128 {
129 /*
130 It takes five bytes to represent UINT_MAX32, which is the largest
131 supported length, so don't look any further.
132
133 Use data_length as max value to prevent segfault when reading a corrupted
134 JSON document.
135 */
136 const size_t MAX_BYTES= MY_MIN(data_length, 5);
137 size_t len= 0;
138 for (size_t i= 0; i < MAX_BYTES; i++)
139 {
140 /* Get the next 7 bits of the length. */
141 len|= (data[i] & 0x7f) << (7 * i);
142 if ((data[i] & 0x80) == 0)
143 {
144 /* The length shouldn't exceed 32 bits. */
145 if (len > UINT_MAX32)
146 return true;
147
148 /* This was the last byte. Return successfully. */
149 *num= i + 1;
150 *length= len;
151 return false;
152 }
153 }
154
155 /* No more available bytes. Return true to signal error. This implies a
156 corrupted JSON document. */
157 return true;
158 }
159
160 /**
161 JSON formatting in MySQL escapes a few special characters to prevent
162 ambiguity.
163 */
append_string_json(String * buffer,const uchar * data,size_t len)164 static bool append_string_json(String *buffer, const uchar *data, size_t len)
165 {
166 const uchar *last= data + len;
167 for (; data < last; data++)
168 {
169 const uchar c= *data;
170 switch (c) {
171 case '\\':
172 buffer->append("\\\\");
173 break;
174 case '\n':
175 buffer->append("\\n");
176 break;
177 case '\r':
178 buffer->append("\\r");
179 break;
180 case '"':
181 buffer->append("\\\"");
182 break;
183 case '\b':
184 buffer->append("\\b");
185 break;
186 case '\f':
187 buffer->append("\\f");
188 break;
189 case '\t':
190 buffer->append("\\t");
191 break;
192 default:
193 buffer->append(c);
194 break;
195 }
196 }
197 return false;
198 }
199
200 /*
201 Function used for JSON_OPAQUE type.
202 */
print_mysql_datetime_value(String * buffer,enum_field_types type,const uchar * data,size_t len)203 static bool print_mysql_datetime_value(String *buffer, enum_field_types type,
204 const uchar *data, size_t len)
205 {
206 if (len < 8)
207 return true;
208
209 MYSQL_TIME t;
210 switch (type)
211 {
212 case MYSQL_TYPE_TIME:
213 TIME_from_longlong_time_packed(&t, sint8korr(data));
214 break;
215 case MYSQL_TYPE_DATE:
216 TIME_from_longlong_date_packed(&t, sint8korr(data));
217 break;
218 case MYSQL_TYPE_DATETIME:
219 case MYSQL_TYPE_TIMESTAMP:
220 TIME_from_longlong_datetime_packed(&t, sint8korr(data));
221 break;
222 default:
223 DBUG_ASSERT(0);
224 return true;
225 }
226 /* Wrap all datetime strings within double quotes. */
227 buffer->append('\"');
228 buffer->reserve(MAX_DATE_STRING_REP_LENGTH);
229 buffer->length(buffer->length() +
230 my_TIME_to_str(&t, const_cast<char *>(buffer->end()), 6));
231 buffer->append('\"');
232 return false;
233 }
234
parse_mysql_scalar(String * buffer,size_t value_json_type,const uchar * data,size_t len)235 static bool parse_mysql_scalar(String *buffer, size_t value_json_type,
236 const uchar *data, size_t len)
237 {
238 switch (value_json_type) {
239 case JSONB_TYPE_LITERAL:
240 {
241 if (len < 1)
242 return true;
243 switch (static_cast<JSONB_LITERAL_TYPES>(*data)) {
244 case JSONB_NULL_LITERAL:
245 return buffer->append("null");
246 case JSONB_TRUE_LITERAL:
247 return buffer->append("true");
248 case JSONB_FALSE_LITERAL:
249 return buffer->append("false");
250 default: /* Invalid literal constant, malformed JSON. */
251 return true;
252 }
253 }
254 case JSONB_TYPE_INT16:
255 return len < 2 || buffer->append_longlong(sint2korr(data));
256 case JSONB_TYPE_INT32:
257 return len < 4 || buffer->append_longlong(sint4korr(data));
258 case JSONB_TYPE_INT64:
259 return len < 8 || buffer->append_longlong(sint8korr(data));
260 case JSONB_TYPE_UINT16:
261 return len < 2 || buffer->append_ulonglong(uint2korr(data));
262 case JSONB_TYPE_UINT32:
263 return len < 4 || buffer->append_ulonglong(uint4korr(data));
264 case JSONB_TYPE_UINT64:
265 return len < 8 || buffer->append_ulonglong(uint8korr(data));
266 case JSONB_TYPE_DOUBLE:
267 if (len < 8)
268 return true;
269 buffer->reserve(FLOATING_POINT_BUFFER, 2 * FLOATING_POINT_BUFFER);
270 buffer->qs_append(reinterpret_cast<const double *>(data));
271 return false;
272 case JSONB_TYPE_STRING:
273 {
274 size_t string_length, store_bytes;
275
276 return read_variable_length(data, len, &string_length, &store_bytes) ||
277 len < store_bytes + string_length ||
278 buffer->append('"') ||
279 append_string_json(buffer, data + store_bytes, string_length) ||
280 buffer->append('"');
281 }
282 case JSONB_TYPE_OPAQUE:
283 {
284 /* The field_type maps directly to enum_field_types. */
285 const uchar type_value= *data;
286 const enum_field_types field_type= static_cast<enum_field_types>(type_value);
287
288 size_t UNINIT_VAR(blob_length), length_bytes;
289 const uchar *blob_start;
290
291 if (read_variable_length(data + 1, len, &blob_length, &length_bytes) ||
292 len < length_bytes + blob_length)
293 return true;
294 blob_start= data + length_bytes + 1;
295
296 switch (field_type) {
297 case MYSQL_TYPE_TIME:
298 case MYSQL_TYPE_DATE:
299 case MYSQL_TYPE_DATETIME:
300 case MYSQL_TYPE_TIMESTAMP:
301 return print_mysql_datetime_value(buffer, field_type,
302 blob_start, blob_length);
303 case MYSQL_TYPE_NEWDECIMAL:
304 {
305 /* Expect at least two bytes, which contain precision and scale. */
306 if (blob_length < 2)
307 return true;
308
309 const int precision= blob_start[0];
310 const int scale= blob_start[1];
311
312 my_decimal d;
313
314 /* The decimal value is encoded after the two prec/scale bytes. */
315 const size_t dec_size= my_decimal_get_binary_size(precision, scale);
316 if (dec_size != blob_length - 2 ||
317 binary2my_decimal(E_DEC_ERROR,
318 reinterpret_cast<const uchar *>(blob_start + 2),
319 &d, precision, scale) != E_DEC_OK)
320 return true;
321
322 if (d.to_string_native(buffer, 0, 0, ' ', E_DEC_ERROR) != E_DEC_OK)
323 return true;
324 return false;
325 }
326 default:
327 {
328 /* Any other MySQL type is presented as a base64 encoded string. */
329 if (buffer->append("\"base64:type") ||
330 buffer->append_longlong(field_type) ||
331 buffer->append(':'))
332 return true;
333
334 const size_t needed= my_base64_needed_encoded_length(
335 static_cast<int>(blob_length));
336 if (buffer->reserve(needed) ||
337 my_base64_encode(blob_start, blob_length,
338 const_cast<char*>(buffer->end())))
339 return true;
340 /* -1 to override the null terminator from my_base64_encode */
341 DBUG_ASSERT(*(buffer->end() + needed) == '\0');
342 buffer->length(buffer->length() + needed - 1);
343 return buffer->append('"');
344 }
345 }
346 }
347 default:
348 return true;
349 }
350 }
351
352
353 /**
354 Read a value from a JSON Object or Array, given the position of it.
355 This function handles both inlined values as well as values stored at
356 an offset.
357
358 @param[out] buffer Where to print the results.
359 @param[in] data The raw binary data of the Object or Array.
360 @param[in] len The length of the binary data.
361 @param[in] value_type_offset Where the type of the value is stored.
362 @param[in] large true if the large storage format is used;
363 @param[in] depth How deep the JSON object is in the hierarchy.
364 */
parse_mysql_scalar_or_value(String * buffer,const uchar * data,size_t len,size_t value_type_offset,bool large,size_t depth)365 static bool parse_mysql_scalar_or_value(String *buffer, const uchar *data,
366 size_t len, size_t value_type_offset,
367 bool large, size_t depth)
368 {
369 /* Get the type of the value stored at the key. */
370 const JSONB_TYPES value_type=
371 static_cast<JSONB_TYPES>(data[value_type_offset]);
372
373 if (type_is_stored_inline(value_type, large))
374 {
375 const size_t value_start = value_type_offset + 1;
376 if (parse_mysql_scalar(buffer, value_type, data + value_start,
377 len - value_start))
378 return true;
379 }
380 else
381 {
382 /* The offset to where the value is stored is relative to the start
383 of the Object / Array */
384 const size_t value_start= read_offset_or_size(
385 data + value_type_offset + 1, large);
386 if (parse_mysql_json_value(buffer, value_type, data + value_start,
387 len - value_start, depth))
388 return true;
389 }
390 return false;
391 }
392
parse_array_or_object(String * buffer,const uchar * data,size_t len,bool handle_as_object,bool large,size_t depth)393 static bool parse_array_or_object(String *buffer, const uchar *data, size_t len,
394 bool handle_as_object, bool large,
395 size_t depth)
396 {
397 if (++depth > JSON_DOCUMENT_MAX_DEPTH)
398 return true;
399
400 /*
401 Make sure the document is long enough to contain the two length fields
402 (both number of elements or members, and number of bytes).
403 */
404 const size_t offset_size= large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
405 /* The length has to be at least double offset size (header). */
406 if (len < 2 * offset_size)
407 return true;
408
409
410 /*
411 Every JSON Object or Array contains two numbers in the header:
412 - The number of elements in the Object / Array (Keys)
413 - The total number of bytes occupied by the JSON Object / Array, including
414 the two numbers in the header.
415 Depending on the Object / Array type (small / large) the numbers are stored
416 in 2 bytes or 4 bytes each.
417 */
418 const size_t element_count= read_offset_or_size(data, large);
419 const size_t bytes= read_offset_or_size(data + offset_size, large);
420
421 /* The value can't have more bytes than what's available in the buffer. */
422 if (bytes > len)
423 return true;
424
425 if (buffer->append(handle_as_object ? '{' : '['))
426 return true;
427
428
429 for (size_t i= 0; i < element_count; i++)
430 {
431 if (handle_as_object)
432 {
433 /*
434 The JSON Object is stored as a header part and a data part.
435 Header consists of:
436 - two length fields,
437 - an array of pointers to keys.
438 - an array of tuples (type, pointer to values)
439 * For certain types, the pointer to values is replaced by the actual
440 value. (see type_is_stored_inline)
441 Data consists of:
442 - All Key data, in order
443 - All Value data, in order
444 */
445 const size_t key_offset= 2 * offset_size + i * key_size(large);
446 const size_t key_start= read_offset_or_size(data + key_offset, large);
447 /* The length of keys is always stored in 2 bytes (large == false) */
448 const size_t key_len= read_offset_or_size(
449 data + key_offset + offset_size, false);
450
451 const size_t value_type_offset=(2 * offset_size +
452 element_count * key_size(large) +
453 i * value_size(large));
454
455 /* First print the key. */
456 if (buffer->append('"') ||
457 append_string_json(buffer, data + key_start, key_len) ||
458 buffer->append("\": "))
459 {
460 return true;
461 }
462
463 /* Then print the value. */
464 if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset,
465 large, depth))
466 return true;
467 }
468 else
469 {
470 /*
471 Arrays do not have the keys vector and its associated data.
472 We jump straight to reading values.
473 */
474 const size_t value_type_offset= 2 * offset_size + value_size(large) * i;
475
476 if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset,
477 large, depth))
478 return true;
479 }
480
481 if (i != element_count - 1 && buffer->append(", "))
482 return true;
483 }
484
485 return buffer->append(handle_as_object ? '}' : ']');
486 }
487
488 /**
489 Check the first byte of data which is the enum structure and based on it
490 perform parsing of object or array where each can have small or large
491 representation.
492
493 @param[out] buffer Where to print the results.
494 @param[in] type Type of value {object, array, scalar}.
495 @param[in] data Raw data for parsing.
496 @param[in] length Length of data.
497 @param[in] depth Depth size.
498 */
parse_mysql_json_value(String * buffer,JSONB_TYPES type,const uchar * data,size_t len,size_t depth)499 bool parse_mysql_json_value(String *buffer, JSONB_TYPES type, const uchar *data,
500 size_t len, size_t depth)
501 {
502 const bool IS_OBJECT=true, IS_LARGE=true;
503 switch (type) {
504 case JSONB_TYPE_SMALL_OBJECT:
505 return parse_array_or_object(buffer, data, len, IS_OBJECT, !IS_LARGE, depth);
506 case JSONB_TYPE_LARGE_OBJECT:
507 return parse_array_or_object(buffer, data, len, IS_OBJECT, IS_LARGE, depth);
508 case JSONB_TYPE_SMALL_ARRAY:
509 return parse_array_or_object(buffer, data, len, !IS_OBJECT, !IS_LARGE, depth);
510 case JSONB_TYPE_LARGE_ARRAY:
511 return parse_array_or_object(buffer, data, len, !IS_OBJECT, IS_LARGE, depth);
512 default:
513 return parse_mysql_scalar(buffer, type, data, len);
514 }
515 }
516