1 /* Copyright (c) 2016, 2020, MariaDB Corporation.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
15
16 #include <my_global.h>
17 #include <string.h>
18 #include <m_ctype.h>
19 #include "json_lib.h"
20
21 /*
22 JSON escaping lets user specify UTF16 codes of characters.
23 So we're going to need the UTF16 charset capabilities. Let's import
24 them from the utf16 charset.
25 */
26 int my_utf16_uni(CHARSET_INFO *cs,
27 my_wc_t *pwc, const uchar *s, const uchar *e);
28 int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
29
30
json_string_set_str(json_string_t * s,const uchar * str,const uchar * end)31 void json_string_set_str(json_string_t *s,
32 const uchar *str, const uchar *end)
33 {
34 s->c_str= str;
35 s->str_end= end;
36 }
37
38
json_string_set_cs(json_string_t * s,CHARSET_INFO * i_cs)39 void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
40 {
41 s->cs= i_cs;
42 s->error= 0;
43 s->wc= i_cs->cset->mb_wc;
44 }
45
46
json_string_setup(json_string_t * s,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)47 static void json_string_setup(json_string_t *s,
48 CHARSET_INFO *i_cs, const uchar *str,
49 const uchar *end)
50 {
51 json_string_set_cs(s, i_cs);
52 json_string_set_str(s, str, end);
53 }
54
55
56 enum json_char_classes {
57 C_EOS, /* end of string */
58 C_LCURB, /* { */
59 C_RCURB, /* } */
60 C_LSQRB, /* [ */
61 C_RSQRB, /* ] */
62 C_COLON, /* : */
63 C_COMMA, /* , */
64 C_QUOTE, /* " */
65 C_DIGIT, /* -0123456789 */
66 C_LOW_F, /* 'f' (for "false") */
67 C_LOW_N, /* 'n' (for "null") */
68 C_LOW_T, /* 't' (for "true") */
69 C_ETC, /* everything else */
70 C_ERR, /* character disallowed in JSON */
71 C_BAD, /* invalid character, charset handler cannot read it */
72 NR_C_CLASSES, /* Counter for classes that handled with functions. */
73 C_SPACE /* space. Doesn't need specific handlers, so after the counter.*/
74 };
75
76
77 /*
78 This array maps first 128 Unicode Code Points into classes.
79 The remaining Unicode characters should be mapped to C_ETC.
80 */
81
82 static enum json_char_classes json_chr_map[128] = {
83 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
84 C_ERR, C_SPACE, C_SPACE, C_ERR, C_ERR, C_SPACE, C_ERR, C_ERR,
85 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
86 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
87
88 C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
89 C_ETC, C_ETC, C_ETC, C_ETC, C_COMMA, C_DIGIT, C_ETC, C_ETC,
90 C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
91 C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
92
93 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
94 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
95 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
96 C_ETC, C_ETC, C_ETC, C_LSQRB, C_ETC, C_RSQRB, C_ETC, C_ETC,
97
98 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_F, C_ETC,
99 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_N, C_ETC,
100 C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_T, C_ETC, C_ETC, C_ETC,
101 C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
102 };
103
104
105 /*
106 JSON parser actually has more states than the 'enum json_states'
107 declares. But the rest of the states aren't seen to the user so let's
108 specify them here to avoid confusion.
109 */
110
111 enum json_all_states {
112 JST_DONE= NR_JSON_USER_STATES, /* ok to finish */
113 JST_OBJ_CONT= NR_JSON_USER_STATES+1, /* object continues */
114 JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues */
115 JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
116 NR_JSON_STATES= NR_JSON_USER_STATES+4
117 };
118
119
120 typedef int (*json_state_handler)(json_engine_t *);
121
122
123 /* The string is broken. */
unexpected_eos(json_engine_t * j)124 static int unexpected_eos(json_engine_t *j)
125 {
126 j->s.error= JE_EOS;
127 return 1;
128 }
129
130
131 /* This symbol here breaks the JSON syntax. */
syntax_error(json_engine_t * j)132 static int syntax_error(json_engine_t *j)
133 {
134 j->s.error= JE_SYN;
135 return 1;
136 }
137
138
139 /* Value of object. */
mark_object(json_engine_t * j)140 static int mark_object(json_engine_t *j)
141 {
142 j->state= JST_OBJ_START;
143 if (++j->stack_p < JSON_DEPTH_LIMIT)
144 {
145 j->stack[j->stack_p]= JST_OBJ_CONT;
146 return 0;
147 }
148 j->s.error= JE_DEPTH;
149 return 1;
150 }
151
152
153 /* Read value of object. */
read_obj(json_engine_t * j)154 static int read_obj(json_engine_t *j)
155 {
156 j->state= JST_OBJ_START;
157 j->value_type= JSON_VALUE_OBJECT;
158 j->value= j->value_begin;
159 if (++j->stack_p < JSON_DEPTH_LIMIT)
160 {
161 j->stack[j->stack_p]= JST_OBJ_CONT;
162 return 0;
163 }
164 j->s.error= JE_DEPTH;
165 return 1;
166 }
167
168
169 /* Value of array. */
mark_array(json_engine_t * j)170 static int mark_array(json_engine_t *j)
171 {
172 j->state= JST_ARRAY_START;
173 if (++j->stack_p < JSON_DEPTH_LIMIT)
174 {
175 j->stack[j->stack_p]= JST_ARRAY_CONT;
176 j->value= j->value_begin;
177 return 0;
178 }
179 j->s.error= JE_DEPTH;
180 return 1;
181 }
182
183 /* Read value of object. */
read_array(json_engine_t * j)184 static int read_array(json_engine_t *j)
185 {
186 j->state= JST_ARRAY_START;
187 j->value_type= JSON_VALUE_ARRAY;
188 j->value= j->value_begin;
189 if (++j->stack_p < JSON_DEPTH_LIMIT)
190 {
191 j->stack[j->stack_p]= JST_ARRAY_CONT;
192 return 0;
193 }
194 j->s.error= JE_DEPTH;
195 return 1;
196 }
197
198
199
200 /*
201 Character classes inside the JSON string constant.
202 We mostly need this to parse escaping properly.
203 Escapings available in JSON are:
204 \" - quotation mark
205 \\ - backslash
206 \b - backspace UNICODE 8
207 \f - formfeed UNICODE 12
208 \n - newline UNICODE 10
209 \r - carriage return UNICODE 13
210 \t - horizontal tab UNICODE 9
211 \u{four-hex-digits} - code in UCS16 character set
212 */
213 enum json_string_char_classes {
214 S_0= 0,
215 S_1= 1,
216 S_2= 2,
217 S_3= 3,
218 S_4= 4,
219 S_5= 5,
220 S_6= 6,
221 S_7= 7,
222 S_8= 8,
223 S_9= 9,
224 S_A= 10,
225 S_B= 11,
226 S_C= 12,
227 S_D= 13,
228 S_E= 14,
229 S_F= 15,
230 S_ETC= 36, /* rest of characters. */
231 S_QUOTE= 37,
232 S_BKSL= 38, /* \ */
233 S_ERR= 100, /* disallowed */
234 };
235
236
237 /* This maps characters to their types inside a string constant. */
238 static enum json_string_char_classes json_instr_chr_map[128] = {
239 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
240 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
241 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
242 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
243
244 S_ETC, S_ETC, S_QUOTE, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
245 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
246 S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,
247 S_8, S_9, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
248
249 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
250 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
251 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
252 S_ETC, S_ETC, S_ETC, S_ETC, S_BKSL, S_ETC, S_ETC, S_ETC,
253
254 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
255 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
256 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
257 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC
258 };
259
260
read_4_hexdigits(json_string_t * s,uchar * dest)261 static int read_4_hexdigits(json_string_t *s, uchar *dest)
262 {
263 int i, t, c_len;
264 for (i=0; i<4; i++)
265 {
266 if ((c_len= json_next_char(s)) <= 0)
267 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
268
269 if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
270 return s->error= JE_SYN;
271
272 s->c_str+= c_len;
273 dest[i/2]+= (i % 2) ? t : t*16;
274 }
275 return 0;
276 }
277
278
json_handle_esc(json_string_t * s)279 static int json_handle_esc(json_string_t *s)
280 {
281 int t, c_len;
282
283 if ((c_len= json_next_char(s)) <= 0)
284 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
285
286 s->c_str+= c_len;
287 switch (s->c_next)
288 {
289 case 'b':
290 s->c_next= 8;
291 return 0;
292 case 'f':
293 s->c_next= 12;
294 return 0;
295 case 'n':
296 s->c_next= 10;
297 return 0;
298 case 'r':
299 s->c_next= 13;
300 return 0;
301 case 't':
302 s->c_next= 9;
303 return 0;
304 }
305
306 if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
307 {
308 s->c_str-= c_len;
309 return s->error= JE_ESCAPING;
310 }
311
312
313 if (s->c_next != 'u')
314 return 0;
315
316 {
317 /*
318 Read the four-hex-digits code.
319 If symbol is not in the Basic Multilingual Plane, we're reading
320 the string for the next four digits to compose the UTF-16 surrogate pair.
321 */
322 uchar code[4]= {0,0,0,0};
323
324 if (read_4_hexdigits(s, code))
325 return 1;
326
327 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
328 return 0;
329
330 if (c_len != MY_CS_TOOSMALL4)
331 return s->error= JE_BAD_CHR;
332
333 if ((c_len= json_next_char(s)) <= 0)
334 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
335 if (s->c_next != '\\')
336 return s->error= JE_SYN;
337
338 s->c_str+= c_len;
339 if ((c_len= json_next_char(s)) <= 0)
340 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
341 if (s->c_next != 'u')
342 return s->error= JE_SYN;
343 s->c_str+= c_len;
344
345 if (read_4_hexdigits(s, code+2))
346 return 1;
347
348 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
349 return 0;
350 }
351 return s->error= JE_BAD_CHR;
352 }
353
354
json_read_string_const_chr(json_string_t * js)355 int json_read_string_const_chr(json_string_t *js)
356 {
357 int c_len;
358
359 if ((c_len= json_next_char(js)) > 0)
360 {
361 js->c_str+= c_len;
362 return (js->c_next == '\\') ? json_handle_esc(js) : 0;
363 }
364 js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR;
365 return 1;
366 }
367
368
skip_str_constant(json_engine_t * j)369 static int skip_str_constant(json_engine_t *j)
370 {
371 int t, c_len;
372 for (;;)
373 {
374 if ((c_len= json_next_char(&j->s)) > 0)
375 {
376 j->s.c_str+= c_len;
377 if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
378 continue;
379
380 if (j->s.c_next == '"')
381 break;
382 if (j->s.c_next == '\\')
383 {
384 j->value_escaped= 1;
385 if (json_handle_esc(&j->s))
386 return 1;
387 continue;
388 }
389 /* Symbol not allowed in JSON. */
390 return j->s.error= JE_NOT_JSON_CHR;
391 }
392 else
393 return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
394 }
395
396 j->state= j->stack[j->stack_p];
397 return 0;
398 }
399
400
401 /* Scalar string. */
v_string(json_engine_t * j)402 static int v_string(json_engine_t *j)
403 {
404 return skip_str_constant(j) || json_scan_next(j);
405 }
406
407
408 /* Read scalar string. */
read_strn(json_engine_t * j)409 static int read_strn(json_engine_t *j)
410 {
411 j->value= j->s.c_str;
412 j->value_type= JSON_VALUE_STRING;
413 j->value_escaped= 0;
414
415 if (skip_str_constant(j))
416 return 1;
417
418 j->state= j->stack[j->stack_p];
419 j->value_len= (int)(j->s.c_str - j->value) - 1;
420 return 0;
421 }
422
423
424 /*
425 We have dedicated parser for numeric constants. It's similar
426 to the main JSON parser, we similarly define character classes,
427 map characters to classes and implement the state-per-class
428 table. Though we don't create functions that handle
429 particular classes, just specify what new state should parser
430 get in this case.
431 */
432 enum json_num_char_classes {
433 N_MINUS,
434 N_PLUS,
435 N_ZERO,
436 N_DIGIT,
437 N_POINT,
438 N_E,
439 N_END,
440 N_EEND,
441 N_ERR,
442 N_NUM_CLASSES
443 };
444
445
446 static enum json_num_char_classes json_num_chr_map[128] = {
447 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
448 N_ERR, N_END, N_END, N_ERR, N_ERR, N_END, N_ERR, N_ERR,
449 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
450 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
451
452 N_END, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
453 N_EEND, N_EEND, N_EEND, N_PLUS, N_END, N_MINUS, N_POINT, N_EEND,
454 N_ZERO, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
455 N_DIGIT, N_DIGIT, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
456
457 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
458 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
459 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
460 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
461
462 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
463 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
464 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
465 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
466 };
467
468
469 enum json_num_states {
470 NS_OK, /* Number ended. */
471 NS_GO, /* Initial state. */
472 NS_GO1, /* If the number starts with '-'. */
473 NS_Z, /* If the number starts with '0'. */
474 NS_Z1, /* If the numbers starts with '-0'. */
475 NS_INT, /* Integer part. */
476 NS_FRAC,/* Fractional part. */
477 NS_EX, /* Exponential part begins. */
478 NS_EX1, /* Exponential part continues. */
479 NS_NUM_STATES
480 };
481
482
483 static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
484 {
485 /* - + 0 1..9 POINT E END_OK ERROR */
486 /*OK*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
487 /*GO*/ { NS_GO1, JE_SYN, NS_Z, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
488 /*GO1*/ { JE_SYN, JE_SYN, NS_Z1, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
489 /*ZERO*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
490 /*ZE1*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
491 /*INT*/ { JE_SYN, JE_SYN, NS_INT, NS_INT, NS_FRAC, NS_EX, NS_OK, JE_BAD_CHR },
492 /*FRAC*/ { JE_SYN, JE_SYN, NS_FRAC, NS_FRAC,JE_SYN, NS_EX, NS_OK, JE_BAD_CHR },
493 /*EX*/ { NS_EX, NS_EX, NS_EX1, NS_EX1, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
494 /*EX1*/ { JE_SYN, JE_SYN, NS_EX1, NS_EX1, JE_SYN, JE_SYN, NS_OK, JE_BAD_CHR }
495 };
496
497
498 static uint json_num_state_flags[NS_NUM_STATES]=
499 {
500 /*OK*/ 0,
501 /*GO*/ 0,
502 /*GO1*/ JSON_NUM_NEG,
503 /*ZERO*/ 0,
504 /*ZE1*/ 0,
505 /*INT*/ 0,
506 /*FRAC*/ JSON_NUM_FRAC_PART,
507 /*EX*/ JSON_NUM_EXP,
508 /*EX1*/ 0,
509 };
510
511
skip_num_constant(json_engine_t * j)512 static int skip_num_constant(json_engine_t *j)
513 {
514 int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
515 int c_len;
516
517 j->num_flags= 0;
518 for (;;)
519 {
520 j->num_flags|= json_num_state_flags[state];
521 if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
522 {
523 if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
524 {
525 j->s.c_str+= c_len;
526 continue;
527 }
528 break;
529 }
530
531 if ((j->s.error=
532 json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
533 return 1;
534 else
535 break;
536 }
537
538 j->state= j->stack[j->stack_p];
539 return 0;
540 }
541
542
543 /* Scalar numeric. */
v_number(json_engine_t * j)544 static int v_number(json_engine_t *j)
545 {
546 return skip_num_constant(j) || json_scan_next(j);
547 }
548
549
550 /* Read numeric constant. */
read_num(json_engine_t * j)551 static int read_num(json_engine_t *j)
552 {
553 j->value= j->value_begin;
554 if (skip_num_constant(j) == 0)
555 {
556 j->value_type= JSON_VALUE_NUMBER;
557 j->value_len= (int)(j->s.c_str - j->value_begin);
558 return 0;
559 }
560 return 1;
561 }
562
563
564 /* Check that the JSON string matches the argument and skip it. */
skip_string_verbatim(json_string_t * s,const char * str)565 static int skip_string_verbatim(json_string_t *s, const char *str)
566 {
567 int c_len;
568 while (*str)
569 {
570 if ((c_len= json_next_char(s)) > 0)
571 {
572 if (s->c_next == (my_wc_t) *(str++))
573 {
574 s->c_str+= c_len;
575 continue;
576 }
577 return s->error= JE_SYN;
578 }
579 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
580 }
581
582 return 0;
583 }
584
585
586 /* Scalar false. */
v_false(json_engine_t * j)587 static int v_false(json_engine_t *j)
588 {
589 if (skip_string_verbatim(&j->s, "alse"))
590 return 1;
591 j->state= j->stack[j->stack_p];
592 return json_scan_next(j);
593 }
594
595
596 /* Scalar null. */
v_null(json_engine_t * j)597 static int v_null(json_engine_t *j)
598 {
599 if (skip_string_verbatim(&j->s, "ull"))
600 return 1;
601 j->state= j->stack[j->stack_p];
602 return json_scan_next(j);
603 }
604
605
606 /* Scalar true. */
v_true(json_engine_t * j)607 static int v_true(json_engine_t *j)
608 {
609 if (skip_string_verbatim(&j->s, "rue"))
610 return 1;
611 j->state= j->stack[j->stack_p];
612 return json_scan_next(j);
613 }
614
615
616 /* Read false. */
read_false(json_engine_t * j)617 static int read_false(json_engine_t *j)
618 {
619 j->value_type= JSON_VALUE_FALSE;
620 j->value= j->value_begin;
621 j->state= j->stack[j->stack_p];
622 j->value_len= 5;
623 return skip_string_verbatim(&j->s, "alse");
624 }
625
626
627 /* Read null. */
read_null(json_engine_t * j)628 static int read_null(json_engine_t *j)
629 {
630 j->value_type= JSON_VALUE_NULL;
631 j->value= j->value_begin;
632 j->state= j->stack[j->stack_p];
633 j->value_len= 4;
634 return skip_string_verbatim(&j->s, "ull");
635 }
636
637
638 /* Read true. */
read_true(json_engine_t * j)639 static int read_true(json_engine_t *j)
640 {
641 j->value_type= JSON_VALUE_TRUE;
642 j->value= j->value_begin;
643 j->state= j->stack[j->stack_p];
644 j->value_len= 4;
645 return skip_string_verbatim(&j->s, "rue");
646 }
647
648
649 /* Disallowed character. */
not_json_chr(json_engine_t * j)650 static int not_json_chr(json_engine_t *j)
651 {
652 j->s.error= JE_NOT_JSON_CHR;
653 return 1;
654 }
655
656
657 /* Bad character. */
bad_chr(json_engine_t * j)658 static int bad_chr(json_engine_t *j)
659 {
660 j->s.error= JE_BAD_CHR;
661 return 1;
662 }
663
664
665 /* Correct finish. */
done(json_engine_t * j)666 static int done(json_engine_t *j __attribute__((unused)))
667 {
668 return 1;
669 }
670
671
672 /* End of the object. */
end_object(json_engine_t * j)673 static int end_object(json_engine_t *j)
674 {
675 j->stack_p--;
676 j->state= JST_OBJ_END;
677 return 0;
678 }
679
680
681 /* End of the array. */
end_array(json_engine_t * j)682 static int end_array(json_engine_t *j)
683 {
684 j->stack_p--;
685 j->state= JST_ARRAY_END;
686 return 0;
687 }
688
689
690 /* Start reading key name. */
read_keyname(json_engine_t * j)691 static int read_keyname(json_engine_t *j)
692 {
693 j->state= JST_KEY;
694 return 0;
695 }
696
697
get_first_nonspace(json_string_t * js,int * t_next,int * c_len)698 static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
699 {
700 do
701 {
702 if ((*c_len= json_next_char(js)) <= 0)
703 *t_next= json_eos(js) ? C_EOS : C_BAD;
704 else
705 {
706 *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
707 js->c_str+= *c_len;
708 }
709 } while (*t_next == C_SPACE);
710 }
711
712
713 /* Next key name. */
next_key(json_engine_t * j)714 static int next_key(json_engine_t *j)
715 {
716 int t_next, c_len;
717 get_first_nonspace(&j->s, &t_next, &c_len);
718
719 if (t_next == C_QUOTE)
720 {
721 j->state= JST_KEY;
722 return 0;
723 }
724
725 j->s.error= (t_next == C_EOS) ? JE_EOS :
726 ((t_next == C_BAD) ? JE_BAD_CHR :
727 JE_SYN);
728 return 1;
729 }
730
731
732 /* Forward declarations. */
733 static int skip_colon(json_engine_t *j);
734 static int skip_key(json_engine_t *j);
735 static int struct_end_cb(json_engine_t *j);
736 static int struct_end_qb(json_engine_t *j);
737 static int struct_end_cm(json_engine_t *j);
738 static int struct_end_eos(json_engine_t *j);
739
740
next_item(json_engine_t * j)741 static int next_item(json_engine_t *j)
742 {
743 j->state= JST_VALUE;
744 return 0;
745 }
746
747
array_item(json_engine_t * j)748 static int array_item(json_engine_t *j)
749 {
750 j->state= JST_VALUE;
751 j->s.c_str-= j->sav_c_len;
752 return 0;
753 }
754
755
756 static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
757 /*
758 EOS { } [ ]
759 : , " -0..9 f
760 n t ETC ERR BAD
761 */
762 {
763 {/*VALUE*/
764 unexpected_eos, mark_object, syntax_error, mark_array, syntax_error,
765 syntax_error, syntax_error,v_string, v_number, v_false,
766 v_null, v_true, syntax_error, not_json_chr, bad_chr},
767 {/*KEY*/
768 unexpected_eos, skip_key, skip_key, skip_key, skip_key,
769 skip_key, skip_key, skip_colon, skip_key, skip_key,
770 skip_key, skip_key, skip_key, not_json_chr, bad_chr},
771 {/*OBJ_START*/
772 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
773 syntax_error, syntax_error, read_keyname, syntax_error, syntax_error,
774 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
775 {/*OBJ_END*/
776 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
777 syntax_error, struct_end_cm,syntax_error, syntax_error, syntax_error,
778 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
779 {/*ARRAY_START*/
780 unexpected_eos, array_item, syntax_error, array_item, end_array,
781 syntax_error, syntax_error, array_item, array_item, array_item,
782 array_item, array_item, syntax_error, not_json_chr, bad_chr},
783 {/*ARRAY_END*/
784 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
785 syntax_error, struct_end_cm, syntax_error, syntax_error, syntax_error,
786 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
787 {/*DONE*/
788 done, syntax_error, syntax_error, syntax_error, syntax_error,
789 syntax_error, syntax_error, syntax_error, syntax_error, syntax_error,
790 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
791 {/*OBJ_CONT*/
792 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
793 syntax_error, next_key, syntax_error, syntax_error, syntax_error,
794 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
795 {/*ARRAY_CONT*/
796 unexpected_eos, syntax_error, syntax_error, syntax_error, end_array,
797 syntax_error, next_item, syntax_error, syntax_error, syntax_error,
798 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
799 {/*READ_VALUE*/
800 unexpected_eos, read_obj, syntax_error, read_array, syntax_error,
801 syntax_error, syntax_error, read_strn, read_num, read_false,
802 read_null, read_true, syntax_error, not_json_chr, bad_chr},
803 };
804
805
806
json_scan_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)807 int json_scan_start(json_engine_t *je,
808 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
809 {
810 json_string_setup(&je->s, i_cs, str, end);
811 je->stack[0]= JST_DONE;
812 je->stack_p= 0;
813 je->state= JST_VALUE;
814 return 0;
815 }
816
817
818 /* Skip colon and the value. */
skip_colon(json_engine_t * j)819 static int skip_colon(json_engine_t *j)
820 {
821 int t_next, c_len;
822
823 get_first_nonspace(&j->s, &t_next, &c_len);
824
825 if (t_next == C_COLON)
826 {
827 get_first_nonspace(&j->s, &t_next, &c_len);
828 return json_actions[JST_VALUE][t_next](j);
829 }
830
831 j->s.error= (t_next == C_EOS) ? JE_EOS :
832 ((t_next == C_BAD) ? JE_BAD_CHR:
833 JE_SYN);
834
835 return 1;
836 }
837
838
839 /* Skip colon and the value. */
skip_key(json_engine_t * j)840 static int skip_key(json_engine_t *j)
841 {
842 int t_next, c_len;
843
844 if (json_instr_chr_map[j->s.c_next] == S_BKSL &&
845 json_handle_esc(&j->s))
846 return 1;
847
848 while (json_read_keyname_chr(j) == 0) {}
849
850 if (j->s.error)
851 return 1;
852
853 get_first_nonspace(&j->s, &t_next, &c_len);
854 return json_actions[JST_VALUE][t_next](j);
855 }
856
857
858 /*
859 Handle EOS after the end of an object or array.
860 To do that we should pop the stack to see if
861 we are inside an object, or an array, and
862 run our 'state machine' accordingly.
863 */
struct_end_eos(json_engine_t * j)864 static int struct_end_eos(json_engine_t *j)
865 { return json_actions[j->stack[j->stack_p]][C_EOS](j); }
866
867
868 /*
869 Handle '}' after the end of an object or array.
870 To do that we should pop the stack to see if
871 we are inside an object, or an array, and
872 run our 'state machine' accordingly.
873 */
struct_end_cb(json_engine_t * j)874 static int struct_end_cb(json_engine_t *j)
875 { return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
876
877
878 /*
879 Handle ']' after the end of an object or array.
880 To do that we should pop the stack to see if
881 we are inside an object, or an array, and
882 run our 'state machine' accordingly.
883 */
struct_end_qb(json_engine_t * j)884 static int struct_end_qb(json_engine_t *j)
885 { return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
886
887
888 /*
889 Handle ',' after the end of an object or array.
890 To do that we should pop the stack to see if
891 we are inside an object, or an array, and
892 run our 'state machine' accordingly.
893 */
struct_end_cm(json_engine_t * j)894 static int struct_end_cm(json_engine_t *j)
895 { return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
896
897
json_read_keyname_chr(json_engine_t * j)898 int json_read_keyname_chr(json_engine_t *j)
899 {
900 int c_len, t;
901
902 if ((c_len= json_next_char(&j->s)) > 0)
903 {
904 j->s.c_str+= c_len;
905 if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
906 return 0;
907
908 switch (t)
909 {
910 case S_QUOTE:
911 for (;;) /* Skip spaces until ':'. */
912 {
913 if ((c_len= json_next_char(&j->s)) > 0)
914 {
915 if (j->s.c_next == ':')
916 {
917 j->s.c_str+= c_len;
918 j->state= JST_VALUE;
919 return 1;
920 }
921
922 if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
923 {
924 j->s.c_str+= c_len;
925 continue;
926 }
927 j->s.error= JE_SYN;
928 break;
929 }
930 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
931 break;
932 }
933 return 1;
934 case S_BKSL:
935 return json_handle_esc(&j->s);
936 case S_ERR:
937 j->s.c_str-= c_len;
938 j->s.error= JE_STRING_CONST;
939 return 1;
940 }
941 }
942 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
943 return 1;
944 }
945
946
json_read_value(json_engine_t * j)947 int json_read_value(json_engine_t *j)
948 {
949 int t_next, c_len, res;
950
951 j->value_type= JSON_VALUE_UNINITALIZED;
952 if (j->state == JST_KEY)
953 {
954 while (json_read_keyname_chr(j) == 0) {}
955
956 if (j->s.error)
957 return 1;
958 }
959
960 get_first_nonspace(&j->s, &t_next, &c_len);
961
962 j->value_begin= j->s.c_str-c_len;
963 res= json_actions[JST_READ_VALUE][t_next](j);
964 j->value_end= j->s.c_str;
965 return res;
966 }
967
968
json_scan_next(json_engine_t * j)969 int json_scan_next(json_engine_t *j)
970 {
971 int t_next;
972
973 get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
974 return json_actions[j->state][t_next](j);
975 }
976
977
978 enum json_path_chr_classes {
979 P_EOS, /* end of string */
980 P_USD, /* $ */
981 P_ASTER, /* * */
982 P_LSQRB, /* [ */
983 P_RSQRB, /* ] */
984 P_POINT, /* . */
985 P_ZERO, /* 0 */
986 P_DIGIT, /* 123456789 */
987 P_L, /* l (for "lax") */
988 P_S, /* s (for "strict") */
989 P_SPACE, /* space */
990 P_BKSL, /* \ */
991 P_QUOTE, /* " */
992 P_ETC, /* everything else */
993 P_ERR, /* character disallowed in JSON*/
994 P_BAD, /* invalid character */
995 N_PATH_CLASSES,
996 };
997
998
999 static enum json_path_chr_classes json_path_chr_map[128] = {
1000 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
1001 P_ERR, P_SPACE, P_SPACE, P_ERR, P_ERR, P_SPACE, P_ERR, P_ERR,
1002 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
1003 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
1004
1005 P_SPACE, P_ETC, P_QUOTE, P_ETC, P_USD, P_ETC, P_ETC, P_ETC,
1006 P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC,
1007 P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
1008 P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1009
1010 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1011 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
1012 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1013 P_ETC, P_ETC, P_ETC, P_LSQRB, P_BKSL, P_RSQRB, P_ETC, P_ETC,
1014
1015 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1016 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
1017 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1018 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC
1019 };
1020
1021
1022 enum json_path_states {
1023 PS_GO, /* Initial state. */
1024 PS_LAX, /* Parse the 'lax' keyword. */
1025 PS_PT, /* New path's step begins. */
1026 PS_AR, /* Parse array step. */
1027 PS_SAR, /* space after the '['. */
1028 PS_AWD, /* Array wildcard. */
1029 PS_Z, /* '0' (as an array item number). */
1030 PS_INT, /* Parse integer (as an array item number). */
1031 PS_AS, /* Space. */
1032 PS_KEY, /* Key. */
1033 PS_KNM, /* Parse key name. */
1034 PS_KWD, /* Key wildcard. */
1035 PS_AST, /* Asterisk. */
1036 PS_DWD, /* Double wildcard. */
1037 PS_KEYX, /* Key started with quote ("). */
1038 PS_KNMX, /* Parse quoted key name. */
1039 N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1040 PS_SCT, /* Parse the 'strict' keyword. */
1041 PS_EKY, /* '.' after the keyname so next step is the key. */
1042 PS_EKYX, /* Closing " for the quoted keyname. */
1043 PS_EAR, /* '[' after the keyname so next step is the array. */
1044 PS_ESC, /* Escaping in the keyname. */
1045 PS_ESCX, /* Escaping in the quoted keyname. */
1046 PS_OK, /* Path normally ended. */
1047 PS_KOK /* EOS after the keyname so end the path normally. */
1048 };
1049
1050
1051 static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1052 {
1053 /*
1054 EOS $, * [ ] . 0
1055 1..9 L S SPACE \ " ETC
1056 ERR BAD
1057 */
1058 /* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1059 JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1060 JE_NOT_JSON_CHR, JE_BAD_CHR},
1061 /* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1062 JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1063 JE_NOT_JSON_CHR, JE_BAD_CHR},
1064 /* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1065 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1066 JE_NOT_JSON_CHR, JE_BAD_CHR},
1067 /* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
1068 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1069 JE_NOT_JSON_CHR, JE_BAD_CHR},
1070 /* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
1071 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1072 JE_NOT_JSON_CHR, JE_BAD_CHR},
1073 /* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1074 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1075 JE_NOT_JSON_CHR, JE_BAD_CHR},
1076 /* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1077 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1078 JE_NOT_JSON_CHR, JE_BAD_CHR},
1079 /* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT,
1080 PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1081 JE_NOT_JSON_CHR, JE_BAD_CHR},
1082 /* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN,
1083 JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1084 JE_NOT_JSON_CHR, JE_BAD_CHR},
1085 /* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
1086 PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1087 JE_NOT_JSON_CHR, JE_BAD_CHR},
1088 /* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
1089 PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1090 JE_NOT_JSON_CHR, JE_BAD_CHR},
1091 /* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,
1092 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1093 JE_NOT_JSON_CHR, JE_BAD_CHR},
1094 /* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1095 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1096 JE_NOT_JSON_CHR, JE_BAD_CHR},
1097 /* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1098 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1099 JE_NOT_JSON_CHR, JE_BAD_CHR},
1100 /* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1101 PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1102 JE_NOT_JSON_CHR, JE_BAD_CHR},
1103 /* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1104 PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
1105 JE_NOT_JSON_CHR, JE_BAD_CHR},
1106 };
1107
1108
json_path_setup(json_path_t * p,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)1109 int json_path_setup(json_path_t *p,
1110 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1111 {
1112 int c_len, t_next, state= PS_GO;
1113 enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1114
1115 json_string_setup(&p->s, i_cs, str, end);
1116
1117 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1118 p->last_step= p->steps;
1119 p->mode_strict= FALSE;
1120 p->types_used= JSON_PATH_KEY_NULL;
1121
1122 do
1123 {
1124 if ((c_len= json_next_char(&p->s)) <= 0)
1125 t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1126 else
1127 t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1128
1129 if ((state= json_path_transitions[state][t_next]) < 0)
1130 return p->s.error= state;
1131
1132 p->s.c_str+= c_len;
1133
1134 switch (state)
1135 {
1136 case PS_LAX:
1137 if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1138 return 1;
1139 p->mode_strict= FALSE;
1140 continue;
1141 case PS_SCT:
1142 if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1143 return 1;
1144 p->mode_strict= TRUE;
1145 state= PS_LAX;
1146 continue;
1147 case PS_KWD:
1148 case PS_AWD:
1149 p->last_step->type|= JSON_PATH_WILD;
1150 p->types_used|= JSON_PATH_WILD;
1151 continue;
1152 case PS_INT:
1153 p->last_step->n_item*= 10;
1154 p->last_step->n_item+= p->s.c_next - '0';
1155 continue;
1156 case PS_EKYX:
1157 p->last_step->key_end= p->s.c_str - c_len;
1158 state= PS_PT;
1159 continue;
1160 case PS_EKY:
1161 p->last_step->key_end= p->s.c_str - c_len;
1162 state= PS_KEY;
1163 /* fall through */
1164 case PS_KEY:
1165 p->last_step++;
1166 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1167 return p->s.error= JE_DEPTH;
1168 p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1169 double_wildcard= JSON_PATH_KEY_NULL;
1170 /* fall through */
1171 case PS_KEYX:
1172 p->last_step->key= p->s.c_str;
1173 continue;
1174 case PS_EAR:
1175 p->last_step->key_end= p->s.c_str - c_len;
1176 state= PS_AR;
1177 /* fall through */
1178 case PS_AR:
1179 p->last_step++;
1180 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1181 return p->s.error= JE_DEPTH;
1182 p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1183 double_wildcard= JSON_PATH_KEY_NULL;
1184 p->last_step->n_item= 0;
1185 continue;
1186 case PS_ESC:
1187 if (json_handle_esc(&p->s))
1188 return 1;
1189 state= PS_KNM;
1190 continue;
1191 case PS_ESCX:
1192 if (json_handle_esc(&p->s))
1193 return 1;
1194 state= PS_KNMX;
1195 continue;
1196 case PS_KOK:
1197 p->last_step->key_end= p->s.c_str - c_len;
1198 state= PS_OK;
1199 break; /* 'break' as the loop supposed to end after that. */
1200 case PS_DWD:
1201 double_wildcard= JSON_PATH_DOUBLE_WILD;
1202 continue;
1203 };
1204 } while (state != PS_OK);
1205
1206 return double_wildcard ? (p->s.error= JE_SYN) : 0;
1207 }
1208
1209
json_skip_to_level(json_engine_t * j,int level)1210 int json_skip_to_level(json_engine_t *j, int level)
1211 {
1212 do {
1213 if (j->stack_p < level)
1214 return 0;
1215 } while (json_scan_next(j) == 0);
1216
1217 return 1;
1218 }
1219
1220
1221 /*
1222 works as json_skip_level() but also counts items on the current
1223 level skipped.
1224 */
json_skip_level_and_count(json_engine_t * j,int * n_items_skipped)1225 int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1226 {
1227 int level= j->stack_p;
1228
1229 *n_items_skipped= 0;
1230 while (json_scan_next(j) == 0)
1231 {
1232 if (j->stack_p < level)
1233 return 0;
1234 if (j->stack_p == level && j->state == JST_VALUE)
1235 (*n_items_skipped)++;
1236 }
1237
1238 return 1;
1239 }
1240
1241
json_skip_key(json_engine_t * j)1242 int json_skip_key(json_engine_t *j)
1243 {
1244 if (json_read_value(j))
1245 return 1;
1246
1247 if (json_value_scalar(j))
1248 return 0;
1249
1250 return json_skip_level(j);
1251 }
1252
1253
1254 #define SKIPPED_STEP_MARK ((uint) ~0)
1255
1256 /*
1257 Current step of the patch matches the JSON construction.
1258 Now we should either stop the search or go to the next
1259 step of the path.
1260 */
handle_match(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1261 static int handle_match(json_engine_t *je, json_path_t *p,
1262 json_path_step_t **p_cur_step, uint *array_counters)
1263 {
1264 json_path_step_t *next_step= *p_cur_step + 1;
1265
1266 DBUG_ASSERT(*p_cur_step < p->last_step);
1267
1268 if (json_read_value(je))
1269 return 1;
1270
1271 if (json_value_scalar(je))
1272 {
1273 while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1274 {
1275 if (++next_step > p->last_step)
1276 {
1277 je->s.c_str= je->value_begin;
1278 return 1;
1279 }
1280 }
1281 return 0;
1282 }
1283
1284 if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1285 je->value_type & JSON_VALUE_OBJECT)
1286 {
1287 do
1288 {
1289 array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1290 if (++next_step > p->last_step)
1291 {
1292 je->s.c_str= je->value_begin;
1293 je->stack_p--;
1294 return 1;
1295 }
1296 } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1297 }
1298
1299
1300 array_counters[next_step - p->steps]= 0;
1301
1302 if ((int) je->value_type !=
1303 (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1304 return json_skip_level(je);
1305
1306 *p_cur_step= next_step;
1307 return 0;
1308 }
1309
1310
1311 /*
1312 Check if the name of the current JSON key matches
1313 the step of the path.
1314 */
json_key_matches(json_engine_t * je,json_string_t * k)1315 int json_key_matches(json_engine_t *je, json_string_t *k)
1316 {
1317 while (json_read_keyname_chr(je) == 0)
1318 {
1319 if (json_read_string_const_chr(k) ||
1320 je->s.c_next != k->c_next)
1321 return 0;
1322 }
1323
1324 return json_read_string_const_chr(k);
1325 }
1326
1327
json_find_path(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1328 int json_find_path(json_engine_t *je,
1329 json_path_t *p, json_path_step_t **p_cur_step,
1330 uint *array_counters)
1331 {
1332 json_string_t key_name;
1333
1334 json_string_set_cs(&key_name, p->s.cs);
1335
1336 do
1337 {
1338 json_path_step_t *cur_step= *p_cur_step;
1339 switch (je->state)
1340 {
1341 case JST_KEY:
1342 DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1343 if (!(cur_step->type & JSON_PATH_WILD))
1344 {
1345 json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1346 if (!json_key_matches(je, &key_name))
1347 {
1348 if (json_skip_key(je))
1349 goto exit;
1350 continue;
1351 }
1352 }
1353 if (cur_step == p->last_step ||
1354 handle_match(je, p, p_cur_step, array_counters))
1355 goto exit;
1356 break;
1357 case JST_VALUE:
1358 DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1359 if (cur_step->type & JSON_PATH_WILD ||
1360 cur_step->n_item == array_counters[cur_step - p->steps]++)
1361 {
1362 /* Array item matches. */
1363 if (cur_step == p->last_step ||
1364 handle_match(je, p, p_cur_step, array_counters))
1365 goto exit;
1366 }
1367 else
1368 json_skip_array_item(je);
1369 break;
1370 case JST_OBJ_END:
1371 do
1372 {
1373 (*p_cur_step)--;
1374 } while (*p_cur_step > p->steps &&
1375 array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1376 break;
1377 case JST_ARRAY_END:
1378 (*p_cur_step)--;
1379 break;
1380 default:
1381 DBUG_ASSERT(0);
1382 break;
1383 };
1384 } while (json_scan_next(je) == 0);
1385
1386 /* No luck. */
1387 return 1;
1388
1389 exit:
1390 return je->s.error;
1391 }
1392
1393
json_find_paths_first(json_engine_t * je,json_find_paths_t * state,uint n_paths,json_path_t * paths,uint * path_depths)1394 int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1395 uint n_paths, json_path_t *paths, uint *path_depths)
1396 {
1397 state->n_paths= n_paths;
1398 state->paths= paths;
1399 state->cur_depth= 0;
1400 state->path_depths= path_depths;
1401 return json_find_paths_next(je, state);
1402 }
1403
1404
json_find_paths_next(json_engine_t * je,json_find_paths_t * state)1405 int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1406 {
1407 uint p_c;
1408 int path_found, no_match_found;
1409 do
1410 {
1411 switch (je->state)
1412 {
1413 case JST_KEY:
1414 path_found= FALSE;
1415 no_match_found= TRUE;
1416 for (p_c=0; p_c < state->n_paths; p_c++)
1417 {
1418 json_path_step_t *cur_step;
1419 if (state->path_depths[p_c] <
1420 state->cur_depth /* Path already failed. */ ||
1421 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1422 JSON_PATH_KEY))
1423 continue;
1424
1425 if (!(cur_step->type & JSON_PATH_WILD))
1426 {
1427 json_string_t key_name;
1428 json_string_setup(&key_name, state->paths[p_c].s.cs,
1429 cur_step->key, cur_step->key_end);
1430 if (!json_key_matches(je, &key_name))
1431 continue;
1432 }
1433 if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1434 path_found= TRUE;
1435 else
1436 {
1437 no_match_found= FALSE;
1438 state->path_depths[p_c]= state->cur_depth + 1;
1439 }
1440 }
1441 if (path_found)
1442 /* Return the result. */
1443 goto exit;
1444 if (no_match_found)
1445 {
1446 /* No possible paths left to check. Just skip the level. */
1447 if (json_skip_level(je))
1448 goto exit;
1449 }
1450
1451 break;
1452 case JST_VALUE:
1453 path_found= FALSE;
1454 no_match_found= TRUE;
1455 for (p_c=0; p_c < state->n_paths; p_c++)
1456 {
1457 json_path_step_t *cur_step;
1458 if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1459 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1460 JSON_PATH_ARRAY))
1461 continue;
1462 if (cur_step->type & JSON_PATH_WILD ||
1463 cur_step->n_item == state->array_counters[state->cur_depth])
1464 {
1465 /* Array item matches. */
1466 if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1467 path_found= TRUE;
1468 else
1469 {
1470 no_match_found= FALSE;
1471 state->path_depths[p_c]= state->cur_depth + 1;
1472 }
1473 }
1474 }
1475
1476 if (path_found)
1477 goto exit;
1478
1479 if (no_match_found)
1480 json_skip_array_item(je);
1481
1482 state->array_counters[state->cur_depth]++;
1483 break;
1484 case JST_OBJ_START:
1485 case JST_ARRAY_START:
1486 for (p_c=0; p_c < state->n_paths; p_c++)
1487 {
1488 if (state->path_depths[p_c] < state->cur_depth)
1489 /* Path already failed. */
1490 continue;
1491 if (state->paths[p_c].steps[state->cur_depth].type &
1492 ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1493 state->path_depths[p_c]++;
1494 }
1495 state->cur_depth++;
1496 break;
1497 case JST_OBJ_END:
1498 case JST_ARRAY_END:
1499 for (p_c=0; p_c < state->n_paths; p_c++)
1500 {
1501 if (state->path_depths[p_c] < state->cur_depth)
1502 continue;
1503 state->path_depths[p_c]--;
1504 }
1505 state->cur_depth--;
1506 break;
1507 default:
1508 DBUG_ASSERT(0);
1509 break;
1510 };
1511 } while (json_scan_next(je) == 0);
1512
1513 /* No luck. */
1514 return 1;
1515
1516 exit:
1517 return je->s.error;
1518 }
1519
1520
json_append_ascii(CHARSET_INFO * json_cs,uchar * json,uchar * json_end,const uchar * ascii,const uchar * ascii_end)1521 int json_append_ascii(CHARSET_INFO *json_cs,
1522 uchar *json, uchar *json_end,
1523 const uchar *ascii, const uchar *ascii_end)
1524 {
1525 const uchar *json_start= json;
1526 while (ascii < ascii_end)
1527 {
1528 int c_len;
1529 if ((c_len= my_ci_wc_mb(json_cs, (my_wc_t) *ascii, json, json_end)) > 0)
1530 {
1531 json+= c_len;
1532 ascii++;
1533 continue;
1534 }
1535
1536 /* Error return. */
1537 return c_len;
1538 }
1539
1540 return (int)(json - json_start);
1541 }
1542
1543
json_unescape(CHARSET_INFO * json_cs,const uchar * json_str,const uchar * json_end,CHARSET_INFO * res_cs,uchar * res,uchar * res_end)1544 int json_unescape(CHARSET_INFO *json_cs,
1545 const uchar *json_str, const uchar *json_end,
1546 CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1547 {
1548 json_string_t s;
1549 const uchar *res_b= res;
1550
1551 json_string_setup(&s, json_cs, json_str, json_end);
1552 while (json_read_string_const_chr(&s) == 0)
1553 {
1554 int c_len;
1555 if ((c_len= my_ci_wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1556 {
1557 res+= c_len;
1558 continue;
1559 }
1560 if (c_len == MY_CS_ILUNI)
1561 {
1562 /*
1563 Result charset doesn't support the json's character.
1564 Let's replace it with the '?' symbol.
1565 */
1566 if ((c_len= my_ci_wc_mb(res_cs, '?', res, res_end)) > 0)
1567 {
1568 res+= c_len;
1569 continue;
1570 }
1571 }
1572 /* Result buffer is too small. */
1573 return -1;
1574 }
1575
1576 return s.error==JE_EOS ? (int)(res - res_b) : -1;
1577 }
1578
1579
1580 /* When we need to replace a character with the escaping. */
1581 enum json_esc_char_classes {
1582 ESC_= 0, /* No need to escape. */
1583 ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1584 ESC_B= 'b', /* Backspace. Escape as \b */
1585 ESC_F= 'f', /* Formfeed. Escape as \f */
1586 ESC_N= 'n', /* Newline. Escape as \n */
1587 ESC_R= 'r', /* Return. Escape as \r */
1588 ESC_T= 't', /* Tab. Escape as \s */
1589 ESC_BS= '\\' /* Backslash or '"'. Escape by the \\ prefix. */
1590 };
1591
1592
1593 /* This specifies how we should escape the character. */
1594 static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1595 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1596 ESC_B, ESC_T, ESC_N, ESC_U, ESC_F, ESC_R, ESC_U, ESC_U,
1597 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1598 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1599
1600 ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_, ESC_, ESC_,
1601 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1602 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1603 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1604
1605 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1606 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1607 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1608 ESC_, ESC_, ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_,
1609 };
1610
1611
1612 static const char hexconv[16] = "0123456789ABCDEF";
1613
1614
json_escape(CHARSET_INFO * str_cs,const uchar * str,const uchar * str_end,CHARSET_INFO * json_cs,uchar * json,uchar * json_end)1615 int json_escape(CHARSET_INFO *str_cs,
1616 const uchar *str, const uchar *str_end,
1617 CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1618 {
1619 const uchar *json_start= json;
1620
1621 while (str < str_end)
1622 {
1623 my_wc_t c_chr;
1624 int c_len;
1625 if ((c_len= my_ci_mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1626 {
1627 enum json_esc_char_classes c_class;
1628
1629 str+= c_len;
1630 if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1631 {
1632 if ((c_len= my_ci_wc_mb(json_cs, c_chr, json, json_end)) > 0)
1633 {
1634 json+= c_len;
1635 continue;
1636 }
1637 if (c_len < 0)
1638 {
1639 /* JSON buffer is depleted. */
1640 return -1;
1641 }
1642
1643 /* JSON charset cannot convert this character. */
1644 c_class= ESC_U;
1645 }
1646
1647 if ((c_len= my_ci_wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1648 (c_len= my_ci_wc_mb(json_cs, (c_class == ESC_BS) ? c_chr : c_class,
1649 json+= c_len, json_end)) <= 0)
1650 {
1651 /* JSON buffer is depleted. */
1652 return -1;
1653 }
1654 json+= c_len;
1655
1656 if (c_class != ESC_U)
1657 continue;
1658
1659 {
1660 /* We have to use /uXXXX escaping. */
1661 uchar utf16buf[4];
1662 uchar code_str[8];
1663 int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1664
1665 code_str[0]= hexconv[utf16buf[0] >> 4];
1666 code_str[1]= hexconv[utf16buf[0] & 15];
1667 code_str[2]= hexconv[utf16buf[1] >> 4];
1668 code_str[3]= hexconv[utf16buf[1] & 15];
1669
1670 if (u_len > 2)
1671 {
1672 code_str[4]= hexconv[utf16buf[2] >> 4];
1673 code_str[5]= hexconv[utf16buf[2] & 15];
1674 code_str[6]= hexconv[utf16buf[3] >> 4];
1675 code_str[7]= hexconv[utf16buf[3] & 15];
1676 }
1677
1678 if ((c_len= json_append_ascii(json_cs, json, json_end,
1679 code_str, code_str+u_len*2)) > 0)
1680 {
1681 json+= c_len;
1682 continue;
1683 }
1684 /* JSON buffer is depleted. */
1685 return -1;
1686 }
1687 }
1688 else /* c_len == 0, an illegal symbol. */
1689 return -1;
1690 }
1691
1692 return (int)(json - json_start);
1693 }
1694
1695
json_get_path_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end,json_path_t * p)1696 int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1697 const uchar *str, const uchar *end,
1698 json_path_t *p)
1699 {
1700 json_scan_start(je, i_cs, str, end);
1701 p->last_step= p->steps - 1;
1702 return 0;
1703 }
1704
1705
json_get_path_next(json_engine_t * je,json_path_t * p)1706 int json_get_path_next(json_engine_t *je, json_path_t *p)
1707 {
1708 if (p->last_step < p->steps)
1709 {
1710 if (json_read_value(je))
1711 return 1;
1712
1713 p->last_step= p->steps;
1714 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1715 p->steps[0].n_item= 0;
1716 return 0;
1717 }
1718 else
1719 {
1720 if (json_value_scalar(je))
1721 {
1722 if (p->last_step->type & JSON_PATH_ARRAY)
1723 p->last_step->n_item++;
1724 }
1725 else
1726 {
1727 p->last_step++;
1728 p->last_step->type= (enum json_path_step_types) je->value_type;
1729 p->last_step->n_item= 0;
1730 }
1731
1732 if (json_scan_next(je))
1733 return 1;
1734 }
1735
1736 do
1737 {
1738 switch (je->state)
1739 {
1740 case JST_KEY:
1741 p->last_step->key= je->s.c_str;
1742 do
1743 {
1744 p->last_step->key_end= je->s.c_str;
1745 } while (json_read_keyname_chr(je) == 0);
1746 if (je->s.error)
1747 return 1;
1748 /* Now we have je.state == JST_VALUE, so let's handle it. */
1749
1750 /* fall through */
1751 case JST_VALUE:
1752 if (json_read_value(je))
1753 return 1;
1754 return 0;
1755 case JST_OBJ_END:
1756 case JST_ARRAY_END:
1757 p->last_step--;
1758 if (p->last_step->type & JSON_PATH_ARRAY)
1759 p->last_step->n_item++;
1760 break;
1761 default:
1762 break;
1763 }
1764 } while (json_scan_next(je) == 0);
1765
1766 return 1;
1767 }
1768
1769
json_path_parts_compare(const json_path_step_t * a,const json_path_step_t * a_end,const json_path_step_t * b,const json_path_step_t * b_end,enum json_value_types vt)1770 int json_path_parts_compare(
1771 const json_path_step_t *a, const json_path_step_t *a_end,
1772 const json_path_step_t *b, const json_path_step_t *b_end,
1773 enum json_value_types vt)
1774 {
1775 int res, res2;
1776
1777 while (a <= a_end)
1778 {
1779 if (b > b_end)
1780 {
1781 while (vt != JSON_VALUE_ARRAY &&
1782 (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY &&
1783 a->n_item == 0)
1784 {
1785 if (++a > a_end)
1786 return 0;
1787 }
1788 return -2;
1789 }
1790
1791 DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
1792
1793
1794 if (a->type & JSON_PATH_ARRAY)
1795 {
1796 if (b->type & JSON_PATH_ARRAY)
1797 {
1798 if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
1799 goto step_fits;
1800 goto step_failed;
1801 }
1802 if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
1803 goto step_fits_autowrap;
1804 goto step_failed;
1805 }
1806 else /* JSON_PATH_KEY */
1807 {
1808 if (!(b->type & JSON_PATH_KEY))
1809 goto step_failed;
1810
1811 if (!(a->type & JSON_PATH_WILD) &&
1812 (a->key_end - a->key != b->key_end - b->key ||
1813 memcmp(a->key, b->key, a->key_end - a->key) != 0))
1814 goto step_failed;
1815
1816 goto step_fits;
1817 }
1818 step_failed:
1819 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1820 return -1;
1821 b++;
1822 continue;
1823
1824 step_fits:
1825 b++;
1826 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1827 {
1828 a++;
1829 continue;
1830 }
1831
1832 /* Double wild handling needs recursions. */
1833 res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
1834 if (res == 0)
1835 return 0;
1836
1837 res2= json_path_parts_compare(a, a_end, b, b_end, vt);
1838
1839 return (res2 >= 0) ? res2 : res;
1840
1841 step_fits_autowrap:
1842 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1843 {
1844 a++;
1845 continue;
1846 }
1847
1848 /* Double wild handling needs recursions. */
1849 res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
1850 if (res == 0)
1851 return 0;
1852
1853 res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
1854
1855 return (res2 >= 0) ? res2 : res;
1856
1857 }
1858
1859 return b <= b_end;
1860 }
1861
1862
json_path_compare(const json_path_t * a,const json_path_t * b,enum json_value_types vt)1863 int json_path_compare(const json_path_t *a, const json_path_t *b,
1864 enum json_value_types vt)
1865 {
1866 return json_path_parts_compare(a->steps+1, a->last_step,
1867 b->steps+1, b->last_step, vt);
1868 }
1869
1870
smart_read_value(json_engine_t * je,const char ** value,int * value_len)1871 static enum json_types smart_read_value(json_engine_t *je,
1872 const char **value, int *value_len)
1873 {
1874 if (json_read_value(je))
1875 goto err_return;
1876
1877 *value= (char *) je->value;
1878
1879 if (json_value_scalar(je))
1880 *value_len= je->value_len;
1881 else
1882 {
1883 if (json_skip_level(je))
1884 goto err_return;
1885
1886 *value_len= (int) ((char *) je->s.c_str - *value);
1887 }
1888
1889 compile_time_assert((int) JSON_VALUE_OBJECT == (int) JSV_OBJECT);
1890 compile_time_assert((int) JSON_VALUE_ARRAY == (int) JSV_ARRAY);
1891 compile_time_assert((int) JSON_VALUE_STRING == (int) JSV_STRING);
1892 compile_time_assert((int) JSON_VALUE_NUMBER == (int) JSV_NUMBER);
1893 compile_time_assert((int) JSON_VALUE_TRUE == (int) JSV_TRUE);
1894 compile_time_assert((int) JSON_VALUE_FALSE == (int) JSV_FALSE);
1895 compile_time_assert((int) JSON_VALUE_NULL == (int) JSV_NULL);
1896
1897 return (enum json_types) je->value_type;
1898
1899 err_return:
1900 return JSV_BAD_JSON;
1901 }
1902
1903
json_type(const char * js,const char * js_end,const char ** value,int * value_len)1904 enum json_types json_type(const char *js, const char *js_end,
1905 const char **value, int *value_len)
1906 {
1907 json_engine_t je;
1908
1909 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1910 (const uchar *) js_end);
1911
1912 return smart_read_value(&je, value, value_len);
1913 }
1914
1915
json_get_array_item(const char * js,const char * js_end,int n_item,const char ** value,int * value_len)1916 enum json_types json_get_array_item(const char *js, const char *js_end,
1917 int n_item,
1918 const char **value, int *value_len)
1919 {
1920 json_engine_t je;
1921 int c_item= 0;
1922
1923 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1924 (const uchar *) js_end);
1925
1926 if (json_read_value(&je) ||
1927 je.value_type != JSON_VALUE_ARRAY)
1928 goto err_return;
1929
1930 while (!json_scan_next(&je))
1931 {
1932 switch (je.state)
1933 {
1934 case JST_VALUE:
1935 if (c_item == n_item)
1936 return smart_read_value(&je, value, value_len);
1937
1938 if (json_skip_key(&je))
1939 goto err_return;
1940
1941 c_item++;
1942 break;
1943
1944 case JST_ARRAY_END:
1945 *value= (const char *) (je.s.c_str - je.sav_c_len);
1946 *value_len= c_item;
1947 return JSV_NOTHING;
1948 }
1949 }
1950
1951 err_return:
1952 return JSV_BAD_JSON;
1953 }
1954
1955
1956 /** Simple json lookup for a value by the key.
1957
1958 Expects JSON object.
1959 Only scans the 'first level' of the object, not
1960 the nested structures.
1961
1962 @param js [in] json object to search in
1963 @param js_end [in] end of json string
1964 @param key [in] key to search for
1965 @param key_end [in] - " -
1966 @param value_start [out] pointer into js (value or closing })
1967 @param value_len [out] length of the value found or number of keys
1968
1969 @retval the type of the key value
1970 @retval JSV_BAD_JSON - syntax error found reading JSON.
1971 or not JSON object.
1972 @retval JSV_NOTHING - no such key found.
1973 */
json_get_object_key(const char * js,const char * js_end,const char * key,const char ** value,int * value_len)1974 enum json_types json_get_object_key(const char *js, const char *js_end,
1975 const char *key,
1976 const char **value, int *value_len)
1977 {
1978 const char *key_end= key + strlen(key);
1979 json_engine_t je;
1980 json_string_t key_name;
1981 int n_keys= 0;
1982
1983 json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1984
1985 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1986 (const uchar *) js_end);
1987
1988 if (json_read_value(&je) ||
1989 je.value_type != JSON_VALUE_OBJECT)
1990 goto err_return;
1991
1992 while (!json_scan_next(&je))
1993 {
1994 switch (je.state)
1995 {
1996 case JST_KEY:
1997 n_keys++;
1998 json_string_set_str(&key_name, (const uchar *) key,
1999 (const uchar *) key_end);
2000 if (json_key_matches(&je, &key_name))
2001 return smart_read_value(&je, value, value_len);
2002
2003 if (json_skip_key(&je))
2004 goto err_return;
2005
2006 break;
2007
2008 case JST_OBJ_END:
2009 *value= (const char *) (je.s.c_str - je.sav_c_len);
2010 *value_len= n_keys;
2011 return JSV_NOTHING;
2012 }
2013 }
2014
2015 err_return:
2016 return JSV_BAD_JSON;
2017 }
2018
2019
json_get_object_nkey(const char * js,const char * js_end,int nkey,const char ** keyname,const char ** keyname_end,const char ** value,int * value_len)2020 enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2021 const char *js_end __attribute__((unused)),
2022 int nkey __attribute__((unused)),
2023 const char **keyname __attribute__((unused)),
2024 const char **keyname_end __attribute__((unused)),
2025 const char **value __attribute__((unused)),
2026 int *value_len __attribute__((unused)))
2027 {
2028 return JSV_NOTHING;
2029 }
2030
2031
2032 /** Check if json is valid (well-formed)
2033
2034 @retval 0 - success, json is well-formed
2035 @retval 1 - error, json is invalid
2036 */
json_valid(const char * js,size_t js_len,CHARSET_INFO * cs)2037 int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2038 {
2039 json_engine_t je;
2040 json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2041 while (json_scan_next(&je) == 0) /* no-op */ ;
2042 return je.s.error == 0;
2043 }
2044
2045
2046 /*
2047 Expects the JSON object as an js argument, and the key name.
2048 Looks for this key in the object and returns
2049 the location of all the text related to it.
2050 The text includes the comma, separating this key.
2051
2052 comma_pos - the hint where the comma is. It is important
2053 if you plan to replace the key rather than just cut.
2054 1 - comma is on the left
2055 2 - comma is on the right.
2056 0 - no comma at all (the object has just this single key)
2057
2058 if no such key found *key_start is set to NULL.
2059 */
json_locate_key(const char * js,const char * js_end,const char * kname,const char ** key_start,const char ** key_end,int * comma_pos)2060 int json_locate_key(const char *js, const char *js_end,
2061 const char *kname,
2062 const char **key_start, const char **key_end,
2063 int *comma_pos)
2064 {
2065 const char *kname_end= kname + strlen(kname);
2066 json_engine_t je;
2067 json_string_t key_name;
2068 int t_next, c_len, match_result;
2069
2070 json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2071
2072 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2073 (const uchar *) js_end);
2074
2075 if (json_read_value(&je) ||
2076 je.value_type != JSON_VALUE_OBJECT)
2077 goto err_return;
2078
2079 *key_start= (const char *) je.s.c_str;
2080 *comma_pos= 0;
2081
2082 while (!json_scan_next(&je))
2083 {
2084 switch (je.state)
2085 {
2086 case JST_KEY:
2087 json_string_set_str(&key_name, (const uchar *) kname,
2088 (const uchar *) kname_end);
2089 match_result= json_key_matches(&je, &key_name);
2090 if (json_skip_key(&je))
2091 goto err_return;
2092 get_first_nonspace(&je.s, &t_next, &c_len);
2093 je.s.c_str-= c_len;
2094
2095 if (match_result)
2096 {
2097 *key_end= (const char *) je.s.c_str;
2098
2099 if (*comma_pos == 1)
2100 return 0;
2101
2102 DBUG_ASSERT(*comma_pos == 0);
2103
2104 if (t_next == C_COMMA)
2105 {
2106 *key_end+= c_len;
2107 *comma_pos= 2;
2108 }
2109 else if (t_next == C_RCURB)
2110 *comma_pos= 0;
2111 else
2112 goto err_return;
2113 return 0;
2114 }
2115
2116 *key_start= (const char *) je.s.c_str;
2117 *comma_pos= 1;
2118 break;
2119
2120 case JST_OBJ_END:
2121 *key_start= NULL;
2122 return 0;
2123 }
2124 }
2125
2126 err_return:
2127 return 1;
2128
2129 }
2130