1 #include <my_global.h>
2 #include <string.h>
3 #include <m_ctype.h>
4 #include "json_lib.h"
5
6 /*
7 JSON escaping lets user specify UTF16 codes of characters.
8 So we're going to need the UTF16 charset capabilities. Let's import
9 them from the utf16 charset.
10 */
11 int my_utf16_uni(CHARSET_INFO *cs,
12 my_wc_t *pwc, const uchar *s, const uchar *e);
13 int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
14
15
json_string_set_str(json_string_t * s,const uchar * str,const uchar * end)16 void json_string_set_str(json_string_t *s,
17 const uchar *str, const uchar *end)
18 {
19 s->c_str= str;
20 s->str_end= end;
21 }
22
23
json_string_set_cs(json_string_t * s,CHARSET_INFO * i_cs)24 void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
25 {
26 s->cs= i_cs;
27 s->error= 0;
28 s->wc= i_cs->cset->mb_wc;
29 }
30
31
json_string_setup(json_string_t * s,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)32 static void json_string_setup(json_string_t *s,
33 CHARSET_INFO *i_cs, const uchar *str,
34 const uchar *end)
35 {
36 json_string_set_cs(s, i_cs);
37 json_string_set_str(s, str, end);
38 }
39
40
41 enum json_char_classes {
42 C_EOS, /* end of string */
43 C_LCURB, /* { */
44 C_RCURB, /* } */
45 C_LSQRB, /* [ */
46 C_RSQRB, /* ] */
47 C_COLON, /* : */
48 C_COMMA, /* , */
49 C_QUOTE, /* " */
50 C_DIGIT, /* -0123456789 */
51 C_LOW_F, /* 'f' (for "false") */
52 C_LOW_N, /* 'n' (for "null") */
53 C_LOW_T, /* 't' (for "true") */
54 C_ETC, /* everything else */
55 C_ERR, /* character disallowed in JSON */
56 C_BAD, /* invalid character, charset handler cannot read it */
57 NR_C_CLASSES, /* Counter for classes that handled with functions. */
58 C_SPACE /* space. Doesn't need specific handlers, so after the counter.*/
59 };
60
61
62 /*
63 This array maps first 128 Unicode Code Points into classes.
64 The remaining Unicode characters should be mapped to C_ETC.
65 */
66
67 static enum json_char_classes json_chr_map[128] = {
68 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
69 C_ERR, C_SPACE, C_SPACE, C_ERR, C_ERR, C_SPACE, C_ERR, C_ERR,
70 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
71 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
72
73 C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
74 C_ETC, C_ETC, C_ETC, C_ETC, C_COMMA, C_DIGIT, C_ETC, C_ETC,
75 C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
76 C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
77
78 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
79 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
80 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
81 C_ETC, C_ETC, C_ETC, C_LSQRB, C_ETC, C_RSQRB, C_ETC, C_ETC,
82
83 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_F, C_ETC,
84 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_N, C_ETC,
85 C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_T, C_ETC, C_ETC, C_ETC,
86 C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
87 };
88
89
90 /*
91 JSON parser actually has more states than the 'enum json_states'
92 declares. But the rest of the states aren't seen to the user so let's
93 specify them here to avoid confusion.
94 */
95
96 enum json_all_states {
97 JST_DONE= NR_JSON_USER_STATES, /* ok to finish */
98 JST_OBJ_CONT= NR_JSON_USER_STATES+1, /* object continues */
99 JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues */
100 JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
101 NR_JSON_STATES= NR_JSON_USER_STATES+4
102 };
103
104
105 typedef int (*json_state_handler)(json_engine_t *);
106
107
108 /* The string is broken. */
unexpected_eos(json_engine_t * j)109 static int unexpected_eos(json_engine_t *j)
110 {
111 j->s.error= JE_EOS;
112 return 1;
113 }
114
115
116 /* This symbol here breaks the JSON syntax. */
syntax_error(json_engine_t * j)117 static int syntax_error(json_engine_t *j)
118 {
119 j->s.error= JE_SYN;
120 return 1;
121 }
122
123
124 /* Value of object. */
mark_object(json_engine_t * j)125 static int mark_object(json_engine_t *j)
126 {
127 j->state= JST_OBJ_START;
128 if (++j->stack_p < JSON_DEPTH_LIMIT)
129 {
130 j->stack[j->stack_p]= JST_OBJ_CONT;
131 return 0;
132 }
133 j->s.error= JE_DEPTH;
134 return 1;
135 }
136
137
138 /* Read value of object. */
read_obj(json_engine_t * j)139 static int read_obj(json_engine_t *j)
140 {
141 j->state= JST_OBJ_START;
142 j->value_type= JSON_VALUE_OBJECT;
143 j->value= j->value_begin;
144 if (++j->stack_p < JSON_DEPTH_LIMIT)
145 {
146 j->stack[j->stack_p]= JST_OBJ_CONT;
147 return 0;
148 }
149 j->s.error= JE_DEPTH;
150 return 1;
151 }
152
153
154 /* Value of array. */
mark_array(json_engine_t * j)155 static int mark_array(json_engine_t *j)
156 {
157 j->state= JST_ARRAY_START;
158 if (++j->stack_p < JSON_DEPTH_LIMIT)
159 {
160 j->stack[j->stack_p]= JST_ARRAY_CONT;
161 j->value= j->value_begin;
162 return 0;
163 }
164 j->s.error= JE_DEPTH;
165 return 1;
166 }
167
168 /* Read value of object. */
read_array(json_engine_t * j)169 static int read_array(json_engine_t *j)
170 {
171 j->state= JST_ARRAY_START;
172 j->value_type= JSON_VALUE_ARRAY;
173 j->value= j->value_begin;
174 if (++j->stack_p < JSON_DEPTH_LIMIT)
175 {
176 j->stack[j->stack_p]= JST_ARRAY_CONT;
177 return 0;
178 }
179 j->s.error= JE_DEPTH;
180 return 1;
181 }
182
183
184
185 /*
186 Character classes inside the JSON string constant.
187 We mostly need this to parse escaping properly.
188 Escapings available in JSON are:
189 \" - quotation mark
190 \\ - backslash
191 \b - backspace UNICODE 8
192 \f - formfeed UNICODE 12
193 \n - newline UNICODE 10
194 \r - carriage return UNICODE 13
195 \t - horizontal tab UNICODE 9
196 \u{four-hex-digits} - code in UCS16 character set
197 */
198 enum json_string_char_classes {
199 S_0= 0,
200 S_1= 1,
201 S_2= 2,
202 S_3= 3,
203 S_4= 4,
204 S_5= 5,
205 S_6= 6,
206 S_7= 7,
207 S_8= 8,
208 S_9= 9,
209 S_A= 10,
210 S_B= 11,
211 S_C= 12,
212 S_D= 13,
213 S_E= 14,
214 S_F= 15,
215 S_ETC= 36, /* rest of characters. */
216 S_QUOTE= 37,
217 S_BKSL= 38, /* \ */
218 S_ERR= 100, /* disallowed */
219 };
220
221
222 /* This maps characters to their types inside a string constant. */
223 static enum json_string_char_classes json_instr_chr_map[128] = {
224 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
225 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
226 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
227 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
228
229 S_ETC, S_ETC, S_QUOTE, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
230 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
231 S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,
232 S_8, S_9, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
233
234 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
235 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
236 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
237 S_ETC, S_ETC, S_ETC, S_ETC, S_BKSL, S_ETC, S_ETC, S_ETC,
238
239 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
240 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
241 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
242 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC
243 };
244
245
read_4_hexdigits(json_string_t * s,uchar * dest)246 static int read_4_hexdigits(json_string_t *s, uchar *dest)
247 {
248 int i, t, c_len;
249 for (i=0; i<4; i++)
250 {
251 if ((c_len= json_next_char(s)) <= 0)
252 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
253
254 if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
255 return s->error= JE_SYN;
256
257 s->c_str+= c_len;
258 dest[i/2]+= (i % 2) ? t : t*16;
259 }
260 return 0;
261 }
262
263
json_handle_esc(json_string_t * s)264 static int json_handle_esc(json_string_t *s)
265 {
266 int t, c_len;
267
268 if ((c_len= json_next_char(s)) <= 0)
269 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
270
271 s->c_str+= c_len;
272 switch (s->c_next)
273 {
274 case 'b':
275 s->c_next= 8;
276 return 0;
277 case 'f':
278 s->c_next= 12;
279 return 0;
280 case 'n':
281 s->c_next= 10;
282 return 0;
283 case 'r':
284 s->c_next= 13;
285 return 0;
286 case 't':
287 s->c_next= 9;
288 return 0;
289 }
290
291 if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
292 {
293 s->c_str-= c_len;
294 return s->error= JE_ESCAPING;
295 }
296
297
298 if (s->c_next != 'u')
299 return 0;
300
301 {
302 /*
303 Read the four-hex-digits code.
304 If symbol is not in the Basic Multilingual Plane, we're reading
305 the string for the next four digits to compose the UTF-16 surrogate pair.
306 */
307 uchar code[4]= {0,0,0,0};
308
309 if (read_4_hexdigits(s, code))
310 return 1;
311
312 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
313 return 0;
314
315 if (c_len != MY_CS_TOOSMALL4)
316 return s->error= JE_BAD_CHR;
317
318 if ((c_len= json_next_char(s)) <= 0)
319 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
320 if (s->c_next != '\\')
321 return s->error= JE_SYN;
322
323 s->c_str+= c_len;
324 if ((c_len= json_next_char(s)) <= 0)
325 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
326 if (s->c_next != 'u')
327 return s->error= JE_SYN;
328 s->c_str+= c_len;
329
330 if (read_4_hexdigits(s, code+2))
331 return 1;
332
333 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
334 return 0;
335 }
336 return s->error= JE_BAD_CHR;
337 }
338
339
json_read_string_const_chr(json_string_t * js)340 int json_read_string_const_chr(json_string_t *js)
341 {
342 int c_len;
343
344 if ((c_len= json_next_char(js)) > 0)
345 {
346 js->c_str+= c_len;
347 return (js->c_next == '\\') ? json_handle_esc(js) : 0;
348 }
349 js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR;
350 return 1;
351 }
352
353
skip_str_constant(json_engine_t * j)354 static int skip_str_constant(json_engine_t *j)
355 {
356 int t, c_len;
357 for (;;)
358 {
359 if ((c_len= json_next_char(&j->s)) > 0)
360 {
361 j->s.c_str+= c_len;
362 if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
363 continue;
364
365 if (j->s.c_next == '"')
366 break;
367 if (j->s.c_next == '\\')
368 {
369 j->value_escaped= 1;
370 if (json_handle_esc(&j->s))
371 return 1;
372 continue;
373 }
374 /* Symbol not allowed in JSON. */
375 return j->s.error= JE_NOT_JSON_CHR;
376 }
377 else
378 return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
379 }
380
381 j->state= j->stack[j->stack_p];
382 return 0;
383 }
384
385
386 /* Scalar string. */
v_string(json_engine_t * j)387 static int v_string(json_engine_t *j)
388 {
389 return skip_str_constant(j) || json_scan_next(j);
390 }
391
392
393 /* Read scalar string. */
read_strn(json_engine_t * j)394 static int read_strn(json_engine_t *j)
395 {
396 j->value= j->s.c_str;
397 j->value_type= JSON_VALUE_STRING;
398 j->value_escaped= 0;
399
400 if (skip_str_constant(j))
401 return 1;
402
403 j->state= j->stack[j->stack_p];
404 j->value_len= (int)(j->s.c_str - j->value) - 1;
405 return 0;
406 }
407
408
409 /*
410 We have dedicated parser for numeric constants. It's similar
411 to the main JSON parser, we similarly define character classes,
412 map characters to classes and implement the state-per-class
413 table. Though we don't create functions that handle
414 particular classes, just specify what new state should parser
415 get in this case.
416 */
417 enum json_num_char_classes {
418 N_MINUS,
419 N_PLUS,
420 N_ZERO,
421 N_DIGIT,
422 N_POINT,
423 N_E,
424 N_END,
425 N_EEND,
426 N_ERR,
427 N_NUM_CLASSES
428 };
429
430
431 static enum json_num_char_classes json_num_chr_map[128] = {
432 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
433 N_ERR, N_END, N_END, N_ERR, N_ERR, N_END, N_ERR, N_ERR,
434 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
435 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
436
437 N_END, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
438 N_EEND, N_EEND, N_EEND, N_PLUS, N_END, N_MINUS, N_POINT, N_EEND,
439 N_ZERO, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
440 N_DIGIT, N_DIGIT, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
441
442 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
443 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
444 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
445 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
446
447 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
448 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
449 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
450 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
451 };
452
453
454 enum json_num_states {
455 NS_OK, /* Number ended. */
456 NS_GO, /* Initial state. */
457 NS_GO1, /* If the number starts with '-'. */
458 NS_Z, /* If the number starts with '0'. */
459 NS_Z1, /* If the numbers starts with '-0'. */
460 NS_INT, /* Integer part. */
461 NS_FRAC,/* Fractional part. */
462 NS_EX, /* Exponential part begins. */
463 NS_EX1, /* Exponential part continues. */
464 NS_NUM_STATES
465 };
466
467
468 static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
469 {
470 /* - + 0 1..9 POINT E END_OK ERROR */
471 /*OK*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
472 /*GO*/ { NS_GO1, JE_SYN, NS_Z, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
473 /*GO1*/ { JE_SYN, JE_SYN, NS_Z1, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
474 /*ZERO*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
475 /*ZE1*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
476 /*INT*/ { JE_SYN, JE_SYN, NS_INT, NS_INT, NS_FRAC, NS_EX, NS_OK, JE_BAD_CHR },
477 /*FRAC*/ { JE_SYN, JE_SYN, NS_FRAC, NS_FRAC,JE_SYN, NS_EX, NS_OK, JE_BAD_CHR },
478 /*EX*/ { NS_EX, NS_EX, NS_EX1, NS_EX1, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
479 /*EX1*/ { JE_SYN, JE_SYN, NS_EX1, NS_EX1, JE_SYN, JE_SYN, NS_OK, JE_BAD_CHR }
480 };
481
482
483 static uint json_num_state_flags[NS_NUM_STATES]=
484 {
485 /*OK*/ 0,
486 /*GO*/ 0,
487 /*GO1*/ JSON_NUM_NEG,
488 /*ZERO*/ 0,
489 /*ZE1*/ 0,
490 /*INT*/ 0,
491 /*FRAC*/ JSON_NUM_FRAC_PART,
492 /*EX*/ JSON_NUM_EXP,
493 /*EX1*/ 0,
494 };
495
496
skip_num_constant(json_engine_t * j)497 static int skip_num_constant(json_engine_t *j)
498 {
499 int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
500 int c_len;
501
502 j->num_flags= 0;
503 for (;;)
504 {
505 j->num_flags|= json_num_state_flags[state];
506 if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
507 {
508 if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
509 {
510 j->s.c_str+= c_len;
511 continue;
512 }
513 break;
514 }
515
516 if ((j->s.error=
517 json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
518 return 1;
519 else
520 break;
521 }
522
523 j->state= j->stack[j->stack_p];
524 return 0;
525 }
526
527
528 /* Scalar numeric. */
v_number(json_engine_t * j)529 static int v_number(json_engine_t *j)
530 {
531 return skip_num_constant(j) || json_scan_next(j);
532 }
533
534
535 /* Read numeric constant. */
read_num(json_engine_t * j)536 static int read_num(json_engine_t *j)
537 {
538 j->value= j->value_begin;
539 if (skip_num_constant(j) == 0)
540 {
541 j->value_type= JSON_VALUE_NUMBER;
542 j->value_len= (int)(j->s.c_str - j->value_begin);
543 return 0;
544 }
545 return 1;
546 }
547
548
549 /* Check that the JSON string matches the argument and skip it. */
skip_string_verbatim(json_string_t * s,const char * str)550 static int skip_string_verbatim(json_string_t *s, const char *str)
551 {
552 int c_len;
553 while (*str)
554 {
555 if ((c_len= json_next_char(s)) > 0)
556 {
557 if (s->c_next == (my_wc_t) *(str++))
558 {
559 s->c_str+= c_len;
560 continue;
561 }
562 return s->error= JE_SYN;
563 }
564 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
565 }
566
567 return 0;
568 }
569
570
571 /* Scalar false. */
v_false(json_engine_t * j)572 static int v_false(json_engine_t *j)
573 {
574 if (skip_string_verbatim(&j->s, "alse"))
575 return 1;
576 j->state= j->stack[j->stack_p];
577 return json_scan_next(j);
578 }
579
580
581 /* Scalar null. */
v_null(json_engine_t * j)582 static int v_null(json_engine_t *j)
583 {
584 if (skip_string_verbatim(&j->s, "ull"))
585 return 1;
586 j->state= j->stack[j->stack_p];
587 return json_scan_next(j);
588 }
589
590
591 /* Scalar true. */
v_true(json_engine_t * j)592 static int v_true(json_engine_t *j)
593 {
594 if (skip_string_verbatim(&j->s, "rue"))
595 return 1;
596 j->state= j->stack[j->stack_p];
597 return json_scan_next(j);
598 }
599
600
601 /* Read false. */
read_false(json_engine_t * j)602 static int read_false(json_engine_t *j)
603 {
604 j->value_type= JSON_VALUE_FALSE;
605 j->value= j->value_begin;
606 j->state= j->stack[j->stack_p];
607 j->value_len= 5;
608 return skip_string_verbatim(&j->s, "alse");
609 }
610
611
612 /* Read null. */
read_null(json_engine_t * j)613 static int read_null(json_engine_t *j)
614 {
615 j->value_type= JSON_VALUE_NULL;
616 j->value= j->value_begin;
617 j->state= j->stack[j->stack_p];
618 j->value_len= 4;
619 return skip_string_verbatim(&j->s, "ull");
620 }
621
622
623 /* Read true. */
read_true(json_engine_t * j)624 static int read_true(json_engine_t *j)
625 {
626 j->value_type= JSON_VALUE_TRUE;
627 j->value= j->value_begin;
628 j->state= j->stack[j->stack_p];
629 j->value_len= 4;
630 return skip_string_verbatim(&j->s, "rue");
631 }
632
633
634 /* Disallowed character. */
not_json_chr(json_engine_t * j)635 static int not_json_chr(json_engine_t *j)
636 {
637 j->s.error= JE_NOT_JSON_CHR;
638 return 1;
639 }
640
641
642 /* Bad character. */
bad_chr(json_engine_t * j)643 static int bad_chr(json_engine_t *j)
644 {
645 j->s.error= JE_BAD_CHR;
646 return 1;
647 }
648
649
650 /* Correct finish. */
done(json_engine_t * j)651 static int done(json_engine_t *j __attribute__((unused)))
652 {
653 return 1;
654 }
655
656
657 /* End of the object. */
end_object(json_engine_t * j)658 static int end_object(json_engine_t *j)
659 {
660 j->stack_p--;
661 j->state= JST_OBJ_END;
662 return 0;
663 }
664
665
666 /* End of the array. */
end_array(json_engine_t * j)667 static int end_array(json_engine_t *j)
668 {
669 j->stack_p--;
670 j->state= JST_ARRAY_END;
671 return 0;
672 }
673
674
675 /* Start reading key name. */
read_keyname(json_engine_t * j)676 static int read_keyname(json_engine_t *j)
677 {
678 j->state= JST_KEY;
679 return 0;
680 }
681
682
get_first_nonspace(json_string_t * js,int * t_next,int * c_len)683 static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
684 {
685 do
686 {
687 if ((*c_len= json_next_char(js)) <= 0)
688 *t_next= json_eos(js) ? C_EOS : C_BAD;
689 else
690 {
691 *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
692 js->c_str+= *c_len;
693 }
694 } while (*t_next == C_SPACE);
695 }
696
697
698 /* Next key name. */
next_key(json_engine_t * j)699 static int next_key(json_engine_t *j)
700 {
701 int t_next, c_len;
702 get_first_nonspace(&j->s, &t_next, &c_len);
703
704 if (t_next == C_QUOTE)
705 {
706 j->state= JST_KEY;
707 return 0;
708 }
709
710 j->s.error= (t_next == C_EOS) ? JE_EOS :
711 ((t_next == C_BAD) ? JE_BAD_CHR :
712 JE_SYN);
713 return 1;
714 }
715
716
717 /* Forward declarations. */
718 static int skip_colon(json_engine_t *j);
719 static int skip_key(json_engine_t *j);
720 static int struct_end_cb(json_engine_t *j);
721 static int struct_end_qb(json_engine_t *j);
722 static int struct_end_cm(json_engine_t *j);
723 static int struct_end_eos(json_engine_t *j);
724
725
next_item(json_engine_t * j)726 static int next_item(json_engine_t *j)
727 {
728 j->state= JST_VALUE;
729 return 0;
730 }
731
732
array_item(json_engine_t * j)733 static int array_item(json_engine_t *j)
734 {
735 j->state= JST_VALUE;
736 j->s.c_str-= j->sav_c_len;
737 return 0;
738 }
739
740
741 static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
742 /*
743 EOS { } [ ]
744 : , " -0..9 f
745 n t ETC ERR BAD
746 */
747 {
748 {/*VALUE*/
749 unexpected_eos, mark_object, syntax_error, mark_array, syntax_error,
750 syntax_error, syntax_error,v_string, v_number, v_false,
751 v_null, v_true, syntax_error, not_json_chr, bad_chr},
752 {/*KEY*/
753 unexpected_eos, skip_key, skip_key, skip_key, skip_key,
754 skip_key, skip_key, skip_colon, skip_key, skip_key,
755 skip_key, skip_key, skip_key, not_json_chr, bad_chr},
756 {/*OBJ_START*/
757 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
758 syntax_error, syntax_error, read_keyname, syntax_error, syntax_error,
759 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
760 {/*OBJ_END*/
761 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
762 syntax_error, struct_end_cm,syntax_error, syntax_error, syntax_error,
763 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
764 {/*ARRAY_START*/
765 unexpected_eos, array_item, syntax_error, array_item, end_array,
766 syntax_error, syntax_error, array_item, array_item, array_item,
767 array_item, array_item, syntax_error, not_json_chr, bad_chr},
768 {/*ARRAY_END*/
769 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
770 syntax_error, struct_end_cm, syntax_error, syntax_error, syntax_error,
771 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
772 {/*DONE*/
773 done, syntax_error, syntax_error, syntax_error, syntax_error,
774 syntax_error, syntax_error, syntax_error, syntax_error, syntax_error,
775 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
776 {/*OBJ_CONT*/
777 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
778 syntax_error, next_key, syntax_error, syntax_error, syntax_error,
779 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
780 {/*ARRAY_CONT*/
781 unexpected_eos, syntax_error, syntax_error, syntax_error, end_array,
782 syntax_error, next_item, syntax_error, syntax_error, syntax_error,
783 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
784 {/*READ_VALUE*/
785 unexpected_eos, read_obj, syntax_error, read_array, syntax_error,
786 syntax_error, syntax_error, read_strn, read_num, read_false,
787 read_null, read_true, syntax_error, not_json_chr, bad_chr},
788 };
789
790
791
json_scan_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)792 int json_scan_start(json_engine_t *je,
793 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
794 {
795 json_string_setup(&je->s, i_cs, str, end);
796 je->stack[0]= JST_DONE;
797 je->stack_p= 0;
798 je->state= JST_VALUE;
799 return 0;
800 }
801
802
803 /* Skip colon and the value. */
skip_colon(json_engine_t * j)804 static int skip_colon(json_engine_t *j)
805 {
806 int t_next, c_len;
807
808 get_first_nonspace(&j->s, &t_next, &c_len);
809
810 if (t_next == C_COLON)
811 {
812 get_first_nonspace(&j->s, &t_next, &c_len);
813 return json_actions[JST_VALUE][t_next](j);
814 }
815
816 j->s.error= (t_next == C_EOS) ? JE_EOS :
817 ((t_next == C_BAD) ? JE_BAD_CHR:
818 JE_SYN);
819
820 return 1;
821 }
822
823
824 /* Skip colon and the value. */
skip_key(json_engine_t * j)825 static int skip_key(json_engine_t *j)
826 {
827 int t_next, c_len;
828
829 if (json_instr_chr_map[j->s.c_next] == S_BKSL &&
830 json_handle_esc(&j->s))
831 return 1;
832
833 while (json_read_keyname_chr(j) == 0) {}
834
835 if (j->s.error)
836 return 1;
837
838 get_first_nonspace(&j->s, &t_next, &c_len);
839 return json_actions[JST_VALUE][t_next](j);
840 }
841
842
843 /*
844 Handle EOS after the end of an object or array.
845 To do that we should pop the stack to see if
846 we are inside an object, or an array, and
847 run our 'state machine' accordingly.
848 */
struct_end_eos(json_engine_t * j)849 static int struct_end_eos(json_engine_t *j)
850 { return json_actions[j->stack[j->stack_p]][C_EOS](j); }
851
852
853 /*
854 Handle '}' after the end of an object or array.
855 To do that we should pop the stack to see if
856 we are inside an object, or an array, and
857 run our 'state machine' accordingly.
858 */
struct_end_cb(json_engine_t * j)859 static int struct_end_cb(json_engine_t *j)
860 { return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
861
862
863 /*
864 Handle ']' after the end of an object or array.
865 To do that we should pop the stack to see if
866 we are inside an object, or an array, and
867 run our 'state machine' accordingly.
868 */
struct_end_qb(json_engine_t * j)869 static int struct_end_qb(json_engine_t *j)
870 { return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
871
872
873 /*
874 Handle ',' after the end of an object or array.
875 To do that we should pop the stack to see if
876 we are inside an object, or an array, and
877 run our 'state machine' accordingly.
878 */
struct_end_cm(json_engine_t * j)879 static int struct_end_cm(json_engine_t *j)
880 { return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
881
882
json_read_keyname_chr(json_engine_t * j)883 int json_read_keyname_chr(json_engine_t *j)
884 {
885 int c_len, t;
886
887 if ((c_len= json_next_char(&j->s)) > 0)
888 {
889 j->s.c_str+= c_len;
890 if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
891 return 0;
892
893 switch (t)
894 {
895 case S_QUOTE:
896 for (;;) /* Skip spaces until ':'. */
897 {
898 if ((c_len= json_next_char(&j->s)) > 0)
899 {
900 if (j->s.c_next == ':')
901 {
902 j->s.c_str+= c_len;
903 j->state= JST_VALUE;
904 return 1;
905 }
906
907 if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
908 {
909 j->s.c_str+= c_len;
910 continue;
911 }
912 j->s.error= JE_SYN;
913 break;
914 }
915 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
916 break;
917 }
918 return 1;
919 case S_BKSL:
920 return json_handle_esc(&j->s);
921 case S_ERR:
922 j->s.c_str-= c_len;
923 j->s.error= JE_STRING_CONST;
924 return 1;
925 }
926 }
927 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
928 return 1;
929 }
930
931
json_read_value(json_engine_t * j)932 int json_read_value(json_engine_t *j)
933 {
934 int t_next, c_len, res;
935
936 j->value_type= JSON_VALUE_UNINITALIZED;
937 if (j->state == JST_KEY)
938 {
939 while (json_read_keyname_chr(j) == 0) {}
940
941 if (j->s.error)
942 return 1;
943 }
944
945 get_first_nonspace(&j->s, &t_next, &c_len);
946
947 j->value_begin= j->s.c_str-c_len;
948 res= json_actions[JST_READ_VALUE][t_next](j);
949 j->value_end= j->s.c_str;
950 return res;
951 }
952
953
json_scan_next(json_engine_t * j)954 int json_scan_next(json_engine_t *j)
955 {
956 int t_next;
957
958 get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
959 return json_actions[j->state][t_next](j);
960 }
961
962
963 enum json_path_chr_classes {
964 P_EOS, /* end of string */
965 P_USD, /* $ */
966 P_ASTER, /* * */
967 P_LSQRB, /* [ */
968 P_RSQRB, /* ] */
969 P_POINT, /* . */
970 P_ZERO, /* 0 */
971 P_DIGIT, /* 123456789 */
972 P_L, /* l (for "lax") */
973 P_S, /* s (for "strict") */
974 P_SPACE, /* space */
975 P_BKSL, /* \ */
976 P_QUOTE, /* " */
977 P_ETC, /* everything else */
978 P_ERR, /* character disallowed in JSON*/
979 P_BAD, /* invalid character */
980 N_PATH_CLASSES,
981 };
982
983
984 static enum json_path_chr_classes json_path_chr_map[128] = {
985 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
986 P_ERR, P_SPACE, P_SPACE, P_ERR, P_ERR, P_SPACE, P_ERR, P_ERR,
987 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
988 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
989
990 P_SPACE, P_ETC, P_QUOTE, P_ETC, P_USD, P_ETC, P_ETC, P_ETC,
991 P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC,
992 P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
993 P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
994
995 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
996 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
997 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
998 P_ETC, P_ETC, P_ETC, P_LSQRB, P_BKSL, P_RSQRB, P_ETC, P_ETC,
999
1000 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1001 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
1002 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1003 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC
1004 };
1005
1006
1007 enum json_path_states {
1008 PS_GO, /* Initial state. */
1009 PS_LAX, /* Parse the 'lax' keyword. */
1010 PS_PT, /* New path's step begins. */
1011 PS_AR, /* Parse array step. */
1012 PS_SAR, /* space after the '['. */
1013 PS_AWD, /* Array wildcard. */
1014 PS_Z, /* '0' (as an array item number). */
1015 PS_INT, /* Parse integer (as an array item number). */
1016 PS_AS, /* Space. */
1017 PS_KEY, /* Key. */
1018 PS_KNM, /* Parse key name. */
1019 PS_KWD, /* Key wildcard. */
1020 PS_AST, /* Asterisk. */
1021 PS_DWD, /* Double wildcard. */
1022 PS_KEYX, /* Key started with quote ("). */
1023 PS_KNMX, /* Parse quoted key name. */
1024 N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1025 PS_SCT, /* Parse the 'strict' keyword. */
1026 PS_EKY, /* '.' after the keyname so next step is the key. */
1027 PS_EKYX, /* Closing " for the quoted keyname. */
1028 PS_EAR, /* '[' after the keyname so next step is the array. */
1029 PS_ESC, /* Escaping in the keyname. */
1030 PS_ESCX, /* Escaping in the quoted keyname. */
1031 PS_OK, /* Path normally ended. */
1032 PS_KOK /* EOS after the keyname so end the path normally. */
1033 };
1034
1035
1036 static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1037 {
1038 /*
1039 EOS $, * [ ] . 0
1040 1..9 L S SPACE \ " ETC
1041 ERR BAD
1042 */
1043 /* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1044 JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1045 JE_NOT_JSON_CHR, JE_BAD_CHR},
1046 /* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1047 JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1048 JE_NOT_JSON_CHR, JE_BAD_CHR},
1049 /* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1050 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1051 JE_NOT_JSON_CHR, JE_BAD_CHR},
1052 /* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
1053 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1054 JE_NOT_JSON_CHR, JE_BAD_CHR},
1055 /* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
1056 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1057 JE_NOT_JSON_CHR, JE_BAD_CHR},
1058 /* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1059 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1060 JE_NOT_JSON_CHR, JE_BAD_CHR},
1061 /* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1062 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1063 JE_NOT_JSON_CHR, JE_BAD_CHR},
1064 /* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT,
1065 PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1066 JE_NOT_JSON_CHR, JE_BAD_CHR},
1067 /* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN,
1068 JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1069 JE_NOT_JSON_CHR, JE_BAD_CHR},
1070 /* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
1071 PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1072 JE_NOT_JSON_CHR, JE_BAD_CHR},
1073 /* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
1074 PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1075 JE_NOT_JSON_CHR, JE_BAD_CHR},
1076 /* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,
1077 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1078 JE_NOT_JSON_CHR, JE_BAD_CHR},
1079 /* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1080 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1081 JE_NOT_JSON_CHR, JE_BAD_CHR},
1082 /* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1083 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1084 JE_NOT_JSON_CHR, JE_BAD_CHR},
1085 /* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1086 PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1087 JE_NOT_JSON_CHR, JE_BAD_CHR},
1088 /* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1089 PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
1090 JE_NOT_JSON_CHR, JE_BAD_CHR},
1091 };
1092
1093
json_path_setup(json_path_t * p,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)1094 int json_path_setup(json_path_t *p,
1095 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1096 {
1097 int c_len, t_next, state= PS_GO;
1098 enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1099
1100 json_string_setup(&p->s, i_cs, str, end);
1101
1102 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1103 p->last_step= p->steps;
1104 p->mode_strict= FALSE;
1105 p->types_used= JSON_PATH_KEY_NULL;
1106
1107 do
1108 {
1109 if ((c_len= json_next_char(&p->s)) <= 0)
1110 t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1111 else
1112 t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1113
1114 if ((state= json_path_transitions[state][t_next]) < 0)
1115 return p->s.error= state;
1116
1117 p->s.c_str+= c_len;
1118
1119 switch (state)
1120 {
1121 case PS_LAX:
1122 if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1123 return 1;
1124 p->mode_strict= FALSE;
1125 continue;
1126 case PS_SCT:
1127 if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1128 return 1;
1129 p->mode_strict= TRUE;
1130 state= PS_LAX;
1131 continue;
1132 case PS_KWD:
1133 case PS_AWD:
1134 p->last_step->type|= JSON_PATH_WILD;
1135 p->types_used|= JSON_PATH_WILD;
1136 continue;
1137 case PS_INT:
1138 p->last_step->n_item*= 10;
1139 p->last_step->n_item+= p->s.c_next - '0';
1140 continue;
1141 case PS_EKYX:
1142 p->last_step->key_end= p->s.c_str - c_len;
1143 state= PS_PT;
1144 continue;
1145 case PS_EKY:
1146 p->last_step->key_end= p->s.c_str - c_len;
1147 state= PS_KEY;
1148 /* fall through */
1149 case PS_KEY:
1150 p->last_step++;
1151 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1152 return p->s.error= JE_DEPTH;
1153 p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1154 double_wildcard= JSON_PATH_KEY_NULL;
1155 /* fall through */
1156 case PS_KEYX:
1157 p->last_step->key= p->s.c_str;
1158 continue;
1159 case PS_EAR:
1160 p->last_step->key_end= p->s.c_str - c_len;
1161 state= PS_AR;
1162 /* fall through */
1163 case PS_AR:
1164 p->last_step++;
1165 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1166 return p->s.error= JE_DEPTH;
1167 p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1168 double_wildcard= JSON_PATH_KEY_NULL;
1169 p->last_step->n_item= 0;
1170 continue;
1171 case PS_ESC:
1172 if (json_handle_esc(&p->s))
1173 return 1;
1174 state= PS_KNM;
1175 continue;
1176 case PS_ESCX:
1177 if (json_handle_esc(&p->s))
1178 return 1;
1179 state= PS_KNMX;
1180 continue;
1181 case PS_KOK:
1182 p->last_step->key_end= p->s.c_str - c_len;
1183 state= PS_OK;
1184 break; /* 'break' as the loop supposed to end after that. */
1185 case PS_DWD:
1186 double_wildcard= JSON_PATH_DOUBLE_WILD;
1187 continue;
1188 };
1189 } while (state != PS_OK);
1190
1191 return double_wildcard ? (p->s.error= JE_SYN) : 0;
1192 }
1193
1194
json_skip_to_level(json_engine_t * j,int level)1195 int json_skip_to_level(json_engine_t *j, int level)
1196 {
1197 do {
1198 if (j->stack_p < level)
1199 return 0;
1200 } while (json_scan_next(j) == 0);
1201
1202 return 1;
1203 }
1204
1205
1206 /*
1207 works as json_skip_level() but also counts items on the current
1208 level skipped.
1209 */
json_skip_level_and_count(json_engine_t * j,int * n_items_skipped)1210 int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1211 {
1212 int level= j->stack_p;
1213
1214 *n_items_skipped= 0;
1215 while (json_scan_next(j) == 0)
1216 {
1217 if (j->stack_p < level)
1218 return 0;
1219 if (j->stack_p == level && j->state == JST_VALUE)
1220 (*n_items_skipped)++;
1221 }
1222
1223 return 1;
1224 }
1225
1226
json_skip_key(json_engine_t * j)1227 int json_skip_key(json_engine_t *j)
1228 {
1229 if (json_read_value(j))
1230 return 1;
1231
1232 if (json_value_scalar(j))
1233 return 0;
1234
1235 return json_skip_level(j);
1236 }
1237
1238
1239 #define SKIPPED_STEP_MARK ((uint) ~0)
1240
1241 /*
1242 Current step of the patch matches the JSON construction.
1243 Now we should either stop the search or go to the next
1244 step of the path.
1245 */
handle_match(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1246 static int handle_match(json_engine_t *je, json_path_t *p,
1247 json_path_step_t **p_cur_step, uint *array_counters)
1248 {
1249 json_path_step_t *next_step= *p_cur_step + 1;
1250
1251 DBUG_ASSERT(*p_cur_step < p->last_step);
1252
1253 if (json_read_value(je))
1254 return 1;
1255
1256 if (json_value_scalar(je))
1257 {
1258 while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1259 {
1260 if (++next_step > p->last_step)
1261 {
1262 je->s.c_str= je->value_begin;
1263 return 1;
1264 }
1265 }
1266 return 0;
1267 }
1268
1269 if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1270 je->value_type & JSON_VALUE_OBJECT)
1271 {
1272 do
1273 {
1274 array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1275 if (++next_step > p->last_step)
1276 {
1277 je->s.c_str= je->value_begin;
1278 je->stack_p--;
1279 return 1;
1280 }
1281 } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1282 }
1283
1284
1285 array_counters[next_step - p->steps]= 0;
1286
1287 if ((int) je->value_type !=
1288 (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1289 return json_skip_level(je);
1290
1291 *p_cur_step= next_step;
1292 return 0;
1293 }
1294
1295
1296 /*
1297 Check if the name of the current JSON key matches
1298 the step of the path.
1299 */
json_key_matches(json_engine_t * je,json_string_t * k)1300 int json_key_matches(json_engine_t *je, json_string_t *k)
1301 {
1302 while (json_read_keyname_chr(je) == 0)
1303 {
1304 if (json_read_string_const_chr(k) ||
1305 je->s.c_next != k->c_next)
1306 return 0;
1307 }
1308
1309 return json_read_string_const_chr(k);
1310 }
1311
1312
json_find_path(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1313 int json_find_path(json_engine_t *je,
1314 json_path_t *p, json_path_step_t **p_cur_step,
1315 uint *array_counters)
1316 {
1317 json_string_t key_name;
1318
1319 json_string_set_cs(&key_name, p->s.cs);
1320
1321 do
1322 {
1323 json_path_step_t *cur_step= *p_cur_step;
1324 switch (je->state)
1325 {
1326 case JST_KEY:
1327 DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1328 if (!(cur_step->type & JSON_PATH_WILD))
1329 {
1330 json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1331 if (!json_key_matches(je, &key_name))
1332 {
1333 if (json_skip_key(je))
1334 goto exit;
1335 continue;
1336 }
1337 }
1338 if (cur_step == p->last_step ||
1339 handle_match(je, p, p_cur_step, array_counters))
1340 goto exit;
1341 break;
1342 case JST_VALUE:
1343 DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1344 if (cur_step->type & JSON_PATH_WILD ||
1345 cur_step->n_item == array_counters[cur_step - p->steps]++)
1346 {
1347 /* Array item matches. */
1348 if (cur_step == p->last_step ||
1349 handle_match(je, p, p_cur_step, array_counters))
1350 goto exit;
1351 }
1352 else
1353 json_skip_array_item(je);
1354 break;
1355 case JST_OBJ_END:
1356 do
1357 {
1358 (*p_cur_step)--;
1359 } while (*p_cur_step > p->steps &&
1360 array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1361 break;
1362 case JST_ARRAY_END:
1363 (*p_cur_step)--;
1364 break;
1365 default:
1366 DBUG_ASSERT(0);
1367 break;
1368 };
1369 } while (json_scan_next(je) == 0);
1370
1371 /* No luck. */
1372 return 1;
1373
1374 exit:
1375 return je->s.error;
1376 }
1377
1378
json_find_paths_first(json_engine_t * je,json_find_paths_t * state,uint n_paths,json_path_t * paths,uint * path_depths)1379 int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1380 uint n_paths, json_path_t *paths, uint *path_depths)
1381 {
1382 state->n_paths= n_paths;
1383 state->paths= paths;
1384 state->cur_depth= 0;
1385 state->path_depths= path_depths;
1386 return json_find_paths_next(je, state);
1387 }
1388
1389
json_find_paths_next(json_engine_t * je,json_find_paths_t * state)1390 int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1391 {
1392 uint p_c;
1393 int path_found, no_match_found;
1394 do
1395 {
1396 switch (je->state)
1397 {
1398 case JST_KEY:
1399 path_found= FALSE;
1400 no_match_found= TRUE;
1401 for (p_c=0; p_c < state->n_paths; p_c++)
1402 {
1403 json_path_step_t *cur_step;
1404 if (state->path_depths[p_c] <
1405 state->cur_depth /* Path already failed. */ ||
1406 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1407 JSON_PATH_KEY))
1408 continue;
1409
1410 if (!(cur_step->type & JSON_PATH_WILD))
1411 {
1412 json_string_t key_name;
1413 json_string_setup(&key_name, state->paths[p_c].s.cs,
1414 cur_step->key, cur_step->key_end);
1415 if (!json_key_matches(je, &key_name))
1416 continue;
1417 }
1418 if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1419 path_found= TRUE;
1420 else
1421 {
1422 no_match_found= FALSE;
1423 state->path_depths[p_c]= state->cur_depth + 1;
1424 }
1425 }
1426 if (path_found)
1427 /* Return the result. */
1428 goto exit;
1429 if (no_match_found)
1430 {
1431 /* No possible paths left to check. Just skip the level. */
1432 if (json_skip_level(je))
1433 goto exit;
1434 }
1435
1436 break;
1437 case JST_VALUE:
1438 path_found= FALSE;
1439 no_match_found= TRUE;
1440 for (p_c=0; p_c < state->n_paths; p_c++)
1441 {
1442 json_path_step_t *cur_step;
1443 if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1444 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1445 JSON_PATH_ARRAY))
1446 continue;
1447 if (cur_step->type & JSON_PATH_WILD ||
1448 cur_step->n_item == state->array_counters[state->cur_depth])
1449 {
1450 /* Array item matches. */
1451 if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1452 path_found= TRUE;
1453 else
1454 {
1455 no_match_found= FALSE;
1456 state->path_depths[p_c]= state->cur_depth + 1;
1457 }
1458 }
1459 }
1460
1461 if (path_found)
1462 goto exit;
1463
1464 if (no_match_found)
1465 json_skip_array_item(je);
1466
1467 state->array_counters[state->cur_depth]++;
1468 break;
1469 case JST_OBJ_START:
1470 case JST_ARRAY_START:
1471 for (p_c=0; p_c < state->n_paths; p_c++)
1472 {
1473 if (state->path_depths[p_c] < state->cur_depth)
1474 /* Path already failed. */
1475 continue;
1476 if (state->paths[p_c].steps[state->cur_depth].type &
1477 ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1478 state->path_depths[p_c]++;
1479 }
1480 state->cur_depth++;
1481 break;
1482 case JST_OBJ_END:
1483 case JST_ARRAY_END:
1484 for (p_c=0; p_c < state->n_paths; p_c++)
1485 {
1486 if (state->path_depths[p_c] < state->cur_depth)
1487 continue;
1488 state->path_depths[p_c]--;
1489 }
1490 state->cur_depth--;
1491 break;
1492 default:
1493 DBUG_ASSERT(0);
1494 break;
1495 };
1496 } while (json_scan_next(je) == 0);
1497
1498 /* No luck. */
1499 return 1;
1500
1501 exit:
1502 return je->s.error;
1503 }
1504
1505
json_append_ascii(CHARSET_INFO * json_cs,uchar * json,uchar * json_end,const uchar * ascii,const uchar * ascii_end)1506 int json_append_ascii(CHARSET_INFO *json_cs,
1507 uchar *json, uchar *json_end,
1508 const uchar *ascii, const uchar *ascii_end)
1509 {
1510 const uchar *json_start= json;
1511 while (ascii < ascii_end)
1512 {
1513 int c_len;
1514 if ((c_len= json_cs->cset->wc_mb(json_cs, (my_wc_t) *ascii,
1515 json, json_end)) > 0)
1516 {
1517 json+= c_len;
1518 ascii++;
1519 continue;
1520 }
1521
1522 /* Error return. */
1523 return c_len;
1524 }
1525
1526 return (int)(json - json_start);
1527 }
1528
1529
json_unescape(CHARSET_INFO * json_cs,const uchar * json_str,const uchar * json_end,CHARSET_INFO * res_cs,uchar * res,uchar * res_end)1530 int json_unescape(CHARSET_INFO *json_cs,
1531 const uchar *json_str, const uchar *json_end,
1532 CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1533 {
1534 json_string_t s;
1535 const uchar *res_b= res;
1536
1537 json_string_setup(&s, json_cs, json_str, json_end);
1538 while (json_read_string_const_chr(&s) == 0)
1539 {
1540 int c_len;
1541 if ((c_len= res_cs->cset->wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1542 {
1543 res+= c_len;
1544 continue;
1545 }
1546 if (c_len == MY_CS_ILUNI)
1547 {
1548 /*
1549 Result charset doesn't support the json's character.
1550 Let's replace it with the '?' symbol.
1551 */
1552 if ((c_len= res_cs->cset->wc_mb(res_cs, '?', res, res_end)) > 0)
1553 {
1554 res+= c_len;
1555 continue;
1556 }
1557 }
1558 /* Result buffer is too small. */
1559 return -1;
1560 }
1561
1562 return s.error==JE_EOS ? (int)(res - res_b) : -1;
1563 }
1564
1565
1566 /* When we need to replace a character with the escaping. */
1567 enum json_esc_char_classes {
1568 ESC_= 0, /* No need to escape. */
1569 ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1570 ESC_B= 'b', /* Backspace. Escape as \b */
1571 ESC_F= 'f', /* Formfeed. Escape as \f */
1572 ESC_N= 'n', /* Newline. Escape as \n */
1573 ESC_R= 'r', /* Return. Escape as \r */
1574 ESC_T= 't', /* Tab. Escape as \s */
1575 ESC_BS= '\\' /* Backslash or '"'. Escape by the \\ prefix. */
1576 };
1577
1578
1579 /* This specifies how we should escape the character. */
1580 static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1581 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1582 ESC_B, ESC_T, ESC_N, ESC_U, ESC_F, ESC_R, ESC_U, ESC_U,
1583 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1584 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1585
1586 ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_, ESC_, ESC_,
1587 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1588 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1589 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1590
1591 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1592 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1593 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1594 ESC_, ESC_, ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_,
1595 };
1596
1597
1598 static const char hexconv[16] = "0123456789ABCDEF";
1599
1600
json_escape(CHARSET_INFO * str_cs,const uchar * str,const uchar * str_end,CHARSET_INFO * json_cs,uchar * json,uchar * json_end)1601 int json_escape(CHARSET_INFO *str_cs,
1602 const uchar *str, const uchar *str_end,
1603 CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1604 {
1605 const uchar *json_start= json;
1606
1607 while (str < str_end)
1608 {
1609 my_wc_t c_chr;
1610 int c_len;
1611 if ((c_len= str_cs->cset->mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1612 {
1613 enum json_esc_char_classes c_class;
1614
1615 str+= c_len;
1616 if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1617 {
1618 if ((c_len= json_cs->cset->wc_mb(json_cs, c_chr, json, json_end)) > 0)
1619 {
1620 json+= c_len;
1621 continue;
1622 }
1623 if (c_len < 0)
1624 {
1625 /* JSON buffer is depleted. */
1626 return -1;
1627 }
1628
1629 /* JSON charset cannot convert this character. */
1630 c_class= ESC_U;
1631 }
1632
1633 if ((c_len= json_cs->cset->wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1634 (c_len= json_cs->cset->wc_mb(json_cs,
1635 (c_class == ESC_BS) ? c_chr : c_class,
1636 json+= c_len, json_end)) <= 0)
1637 {
1638 /* JSON buffer is depleted. */
1639 return -1;
1640 }
1641 json+= c_len;
1642
1643 if (c_class != ESC_U)
1644 continue;
1645
1646 {
1647 /* We have to use /uXXXX escaping. */
1648 uchar utf16buf[4];
1649 uchar code_str[8];
1650 int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1651
1652 code_str[0]= hexconv[utf16buf[0] >> 4];
1653 code_str[1]= hexconv[utf16buf[0] & 15];
1654 code_str[2]= hexconv[utf16buf[1] >> 4];
1655 code_str[3]= hexconv[utf16buf[1] & 15];
1656
1657 if (u_len > 2)
1658 {
1659 code_str[4]= hexconv[utf16buf[2] >> 4];
1660 code_str[5]= hexconv[utf16buf[2] & 15];
1661 code_str[6]= hexconv[utf16buf[3] >> 4];
1662 code_str[7]= hexconv[utf16buf[3] & 15];
1663 }
1664
1665 if ((c_len= json_append_ascii(json_cs, json, json_end,
1666 code_str, code_str+u_len*2)) > 0)
1667 {
1668 json+= c_len;
1669 continue;
1670 }
1671 /* JSON buffer is depleted. */
1672 return -1;
1673 }
1674 }
1675 else /* c_len == 0, an illegal symbol. */
1676 return -1;
1677 }
1678
1679 return (int)(json - json_start);
1680 }
1681
1682
json_get_path_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end,json_path_t * p)1683 int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1684 const uchar *str, const uchar *end,
1685 json_path_t *p)
1686 {
1687 json_scan_start(je, i_cs, str, end);
1688 p->last_step= p->steps - 1;
1689 return 0;
1690 }
1691
1692
json_get_path_next(json_engine_t * je,json_path_t * p)1693 int json_get_path_next(json_engine_t *je, json_path_t *p)
1694 {
1695 if (p->last_step < p->steps)
1696 {
1697 if (json_read_value(je))
1698 return 1;
1699
1700 p->last_step= p->steps;
1701 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1702 p->steps[0].n_item= 0;
1703 return 0;
1704 }
1705 else
1706 {
1707 if (json_value_scalar(je))
1708 {
1709 if (p->last_step->type & JSON_PATH_ARRAY)
1710 p->last_step->n_item++;
1711 }
1712 else
1713 {
1714 p->last_step++;
1715 p->last_step->type= (enum json_path_step_types) je->value_type;
1716 p->last_step->n_item= 0;
1717 }
1718
1719 if (json_scan_next(je))
1720 return 1;
1721 }
1722
1723 do
1724 {
1725 switch (je->state)
1726 {
1727 case JST_KEY:
1728 p->last_step->key= je->s.c_str;
1729 do
1730 {
1731 p->last_step->key_end= je->s.c_str;
1732 } while (json_read_keyname_chr(je) == 0);
1733 if (je->s.error)
1734 return 1;
1735 /* Now we have je.state == JST_VALUE, so let's handle it. */
1736
1737 /* fall through */
1738 case JST_VALUE:
1739 if (json_read_value(je))
1740 return 1;
1741 return 0;
1742 case JST_OBJ_END:
1743 case JST_ARRAY_END:
1744 p->last_step--;
1745 if (p->last_step->type & JSON_PATH_ARRAY)
1746 p->last_step->n_item++;
1747 break;
1748 default:
1749 break;
1750 }
1751 } while (json_scan_next(je) == 0);
1752
1753 return 1;
1754 }
1755
1756
json_path_parts_compare(const json_path_step_t * a,const json_path_step_t * a_end,const json_path_step_t * b,const json_path_step_t * b_end,enum json_value_types vt)1757 int json_path_parts_compare(
1758 const json_path_step_t *a, const json_path_step_t *a_end,
1759 const json_path_step_t *b, const json_path_step_t *b_end,
1760 enum json_value_types vt)
1761 {
1762 int res, res2;
1763
1764 while (a <= a_end)
1765 {
1766 if (b > b_end)
1767 {
1768 while (vt != JSON_VALUE_ARRAY &&
1769 (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY &&
1770 a->n_item == 0)
1771 {
1772 if (++a > a_end)
1773 return 0;
1774 }
1775 return -2;
1776 }
1777
1778 DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
1779
1780
1781 if (a->type & JSON_PATH_ARRAY)
1782 {
1783 if (b->type & JSON_PATH_ARRAY)
1784 {
1785 if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
1786 goto step_fits;
1787 goto step_failed;
1788 }
1789 if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
1790 goto step_fits_autowrap;
1791 goto step_failed;
1792 }
1793 else /* JSON_PATH_KEY */
1794 {
1795 if (!(b->type & JSON_PATH_KEY))
1796 goto step_failed;
1797
1798 if (!(a->type & JSON_PATH_WILD) &&
1799 (a->key_end - a->key != b->key_end - b->key ||
1800 memcmp(a->key, b->key, a->key_end - a->key) != 0))
1801 goto step_failed;
1802
1803 goto step_fits;
1804 }
1805 step_failed:
1806 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1807 return -1;
1808 b++;
1809 continue;
1810
1811 step_fits:
1812 b++;
1813 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1814 {
1815 a++;
1816 continue;
1817 }
1818
1819 /* Double wild handling needs recursions. */
1820 res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
1821 if (res == 0)
1822 return 0;
1823
1824 res2= json_path_parts_compare(a, a_end, b, b_end, vt);
1825
1826 return (res2 >= 0) ? res2 : res;
1827
1828 step_fits_autowrap:
1829 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1830 {
1831 a++;
1832 continue;
1833 }
1834
1835 /* Double wild handling needs recursions. */
1836 res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
1837 if (res == 0)
1838 return 0;
1839
1840 res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
1841
1842 return (res2 >= 0) ? res2 : res;
1843
1844 }
1845
1846 return b <= b_end;
1847 }
1848
1849
json_path_compare(const json_path_t * a,const json_path_t * b,enum json_value_types vt)1850 int json_path_compare(const json_path_t *a, const json_path_t *b,
1851 enum json_value_types vt)
1852 {
1853 return json_path_parts_compare(a->steps+1, a->last_step,
1854 b->steps+1, b->last_step, vt);
1855 }
1856
1857
smart_read_value(json_engine_t * je,const char ** value,int * value_len)1858 static enum json_types smart_read_value(json_engine_t *je,
1859 const char **value, int *value_len)
1860 {
1861 if (json_read_value(je))
1862 goto err_return;
1863
1864 *value= (char *) je->value;
1865
1866 if (json_value_scalar(je))
1867 *value_len= je->value_len;
1868 else
1869 {
1870 if (json_skip_level(je))
1871 goto err_return;
1872
1873 *value_len= (int) ((char *) je->s.c_str - *value);
1874 }
1875
1876 compile_time_assert((int) JSON_VALUE_OBJECT == (int) JSV_OBJECT);
1877 compile_time_assert((int) JSON_VALUE_ARRAY == (int) JSV_ARRAY);
1878 compile_time_assert((int) JSON_VALUE_STRING == (int) JSV_STRING);
1879 compile_time_assert((int) JSON_VALUE_NUMBER == (int) JSV_NUMBER);
1880 compile_time_assert((int) JSON_VALUE_TRUE == (int) JSV_TRUE);
1881 compile_time_assert((int) JSON_VALUE_FALSE == (int) JSV_FALSE);
1882 compile_time_assert((int) JSON_VALUE_NULL == (int) JSV_NULL);
1883
1884 return (enum json_types) je->value_type;
1885
1886 err_return:
1887 return JSV_BAD_JSON;
1888 }
1889
1890
json_type(const char * js,const char * js_end,const char ** value,int * value_len)1891 enum json_types json_type(const char *js, const char *js_end,
1892 const char **value, int *value_len)
1893 {
1894 json_engine_t je;
1895
1896 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1897 (const uchar *) js_end);
1898
1899 return smart_read_value(&je, value, value_len);
1900 }
1901
1902
json_get_array_item(const char * js,const char * js_end,int n_item,const char ** value,int * value_len)1903 enum json_types json_get_array_item(const char *js, const char *js_end,
1904 int n_item,
1905 const char **value, int *value_len)
1906 {
1907 json_engine_t je;
1908 int c_item= 0;
1909
1910 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1911 (const uchar *) js_end);
1912
1913 if (json_read_value(&je) ||
1914 je.value_type != JSON_VALUE_ARRAY)
1915 goto err_return;
1916
1917 while (!json_scan_next(&je))
1918 {
1919 switch (je.state)
1920 {
1921 case JST_VALUE:
1922 if (c_item == n_item)
1923 return smart_read_value(&je, value, value_len);
1924
1925 if (json_skip_key(&je))
1926 goto err_return;
1927
1928 c_item++;
1929 break;
1930
1931 case JST_ARRAY_END:
1932 *value= (const char *) (je.s.c_str - je.sav_c_len);
1933 *value_len= c_item;
1934 return JSV_NOTHING;
1935 }
1936 }
1937
1938 err_return:
1939 return JSV_BAD_JSON;
1940 }
1941
1942
1943 /** Simple json lookup for a value by the key.
1944
1945 Expects JSON object.
1946 Only scans the 'first level' of the object, not
1947 the nested structures.
1948
1949 @param js [in] json object to search in
1950 @param js_end [in] end of json string
1951 @param key [in] key to search for
1952 @param key_end [in] - " -
1953 @param value_start [out] pointer into js (value or closing })
1954 @param value_len [out] length of the value found or number of keys
1955
1956 @retval the type of the key value
1957 @retval JSV_BAD_JSON - syntax error found reading JSON.
1958 or not JSON object.
1959 @retval JSV_NOTHING - no such key found.
1960 */
json_get_object_key(const char * js,const char * js_end,const char * key,const char ** value,int * value_len)1961 enum json_types json_get_object_key(const char *js, const char *js_end,
1962 const char *key,
1963 const char **value, int *value_len)
1964 {
1965 const char *key_end= key + strlen(key);
1966 json_engine_t je;
1967 json_string_t key_name;
1968 int n_keys= 0;
1969
1970 json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1971
1972 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1973 (const uchar *) js_end);
1974
1975 if (json_read_value(&je) ||
1976 je.value_type != JSON_VALUE_OBJECT)
1977 goto err_return;
1978
1979 while (!json_scan_next(&je))
1980 {
1981 switch (je.state)
1982 {
1983 case JST_KEY:
1984 n_keys++;
1985 json_string_set_str(&key_name, (const uchar *) key,
1986 (const uchar *) key_end);
1987 if (json_key_matches(&je, &key_name))
1988 return smart_read_value(&je, value, value_len);
1989
1990 if (json_skip_key(&je))
1991 goto err_return;
1992
1993 break;
1994
1995 case JST_OBJ_END:
1996 *value= (const char *) (je.s.c_str - je.sav_c_len);
1997 *value_len= n_keys;
1998 return JSV_NOTHING;
1999 }
2000 }
2001
2002 err_return:
2003 return JSV_BAD_JSON;
2004 }
2005
2006
json_get_object_nkey(const char * js,const char * js_end,int nkey,const char ** keyname,const char ** keyname_end,const char ** value,int * value_len)2007 enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2008 const char *js_end __attribute__((unused)),
2009 int nkey __attribute__((unused)),
2010 const char **keyname __attribute__((unused)),
2011 const char **keyname_end __attribute__((unused)),
2012 const char **value __attribute__((unused)),
2013 int *value_len __attribute__((unused)))
2014 {
2015 return JSV_NOTHING;
2016 }
2017
2018
2019 /** Check if json is valid (well-formed)
2020
2021 @retval 0 - success, json is well-formed
2022 @retval 1 - error, json is invalid
2023 */
json_valid(const char * js,size_t js_len,CHARSET_INFO * cs)2024 int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2025 {
2026 json_engine_t je;
2027 json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2028 while (json_scan_next(&je) == 0) /* no-op */ ;
2029 return je.s.error == 0;
2030 }
2031
2032
2033 /*
2034 Expects the JSON object as an js argument, and the key name.
2035 Looks for this key in the object and returns
2036 the location of all the text related to it.
2037 The text includes the comma, separating this key.
2038
2039 comma_pos - the hint where the comma is. It is important
2040 if you plan to replace the key rather than just cut.
2041 1 - comma is on the left
2042 2 - comma is on the right.
2043 0 - no comma at all (the object has just this single key)
2044
2045 if no such key found *key_start is set to NULL.
2046 */
json_locate_key(const char * js,const char * js_end,const char * kname,const char ** key_start,const char ** key_end,int * comma_pos)2047 int json_locate_key(const char *js, const char *js_end,
2048 const char *kname,
2049 const char **key_start, const char **key_end,
2050 int *comma_pos)
2051 {
2052 const char *kname_end= kname + strlen(kname);
2053 json_engine_t je;
2054 json_string_t key_name;
2055 int t_next, c_len, match_result;
2056
2057 json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2058
2059 json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2060 (const uchar *) js_end);
2061
2062 if (json_read_value(&je) ||
2063 je.value_type != JSON_VALUE_OBJECT)
2064 goto err_return;
2065
2066 *key_start= (const char *) je.s.c_str;
2067 *comma_pos= 0;
2068
2069 while (!json_scan_next(&je))
2070 {
2071 switch (je.state)
2072 {
2073 case JST_KEY:
2074 json_string_set_str(&key_name, (const uchar *) kname,
2075 (const uchar *) kname_end);
2076 match_result= json_key_matches(&je, &key_name);
2077 if (json_skip_key(&je))
2078 goto err_return;
2079 get_first_nonspace(&je.s, &t_next, &c_len);
2080 je.s.c_str-= c_len;
2081
2082 if (match_result)
2083 {
2084 *key_end= (const char *) je.s.c_str;
2085
2086 if (*comma_pos == 1)
2087 return 0;
2088
2089 DBUG_ASSERT(*comma_pos == 0);
2090
2091 if (t_next == C_COMMA)
2092 {
2093 *key_end+= c_len;
2094 *comma_pos= 2;
2095 }
2096 else if (t_next == C_RCURB)
2097 *comma_pos= 0;
2098 else
2099 goto err_return;
2100 return 0;
2101 }
2102
2103 *key_start= (const char *) je.s.c_str;
2104 *comma_pos= 1;
2105 break;
2106
2107 case JST_OBJ_END:
2108 *key_start= NULL;
2109 return 0;
2110 }
2111 }
2112
2113 err_return:
2114 return 1;
2115
2116 }
2117