1 #include <my_global.h>
2 #include <string.h>
3 #include <m_ctype.h>
4 #include "json_lib.h"
5
6 /*
7 JSON escaping lets user specify UTF16 codes of characters.
8 So we're going to need the UTF16 charset capabilities. Let's import
9 them from the utf16 charset.
10 */
11 int my_utf16_uni(CHARSET_INFO *cs,
12 my_wc_t *pwc, const uchar *s, const uchar *e);
13 int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
14
15
json_string_set_str(json_string_t * s,const uchar * str,const uchar * end)16 void json_string_set_str(json_string_t *s,
17 const uchar *str, const uchar *end)
18 {
19 s->c_str= str;
20 s->str_end= end;
21 }
22
23
json_string_set_cs(json_string_t * s,CHARSET_INFO * i_cs)24 void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
25 {
26 s->cs= i_cs;
27 s->error= 0;
28 s->wc= i_cs->cset->mb_wc;
29 }
30
31
json_string_setup(json_string_t * s,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)32 static void json_string_setup(json_string_t *s,
33 CHARSET_INFO *i_cs, const uchar *str,
34 const uchar *end)
35 {
36 json_string_set_cs(s, i_cs);
37 json_string_set_str(s, str, end);
38 }
39
40
41 enum json_char_classes {
42 C_EOS, /* end of string */
43 C_LCURB, /* { */
44 C_RCURB, /* } */
45 C_LSQRB, /* [ */
46 C_RSQRB, /* ] */
47 C_COLON, /* : */
48 C_COMMA, /* , */
49 C_QUOTE, /* " */
50 C_DIGIT, /* -0123456789 */
51 C_LOW_F, /* 'f' (for "false") */
52 C_LOW_N, /* 'n' (for "null") */
53 C_LOW_T, /* 't' (for "true") */
54 C_ETC, /* everything else */
55 C_ERR, /* character disallowed in JSON */
56 C_BAD, /* invalid character, charset handler cannot read it */
57 NR_C_CLASSES, /* Counter for classes that handled with functions. */
58 C_SPACE /* space. Doesn't need specific handlers, so after the counter.*/
59 };
60
61
62 /*
63 This array maps first 128 Unicode Code Points into classes.
64 The remaining Unicode characters should be mapped to C_ETC.
65 */
66
67 static enum json_char_classes json_chr_map[128] = {
68 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
69 C_ERR, C_SPACE, C_SPACE, C_ERR, C_ERR, C_SPACE, C_ERR, C_ERR,
70 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
71 C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR,
72
73 C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
74 C_ETC, C_ETC, C_ETC, C_ETC, C_COMMA, C_DIGIT, C_ETC, C_ETC,
75 C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
76 C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
77
78 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
79 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
80 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
81 C_ETC, C_ETC, C_ETC, C_LSQRB, C_ETC, C_RSQRB, C_ETC, C_ETC,
82
83 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_F, C_ETC,
84 C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_N, C_ETC,
85 C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_T, C_ETC, C_ETC, C_ETC,
86 C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
87 };
88
89
90 /*
91 JSON parser actually has more states than the 'enum json_states'
92 declares. But the rest of the states aren't seen to the user so let's
93 specify them here to avoid confusion.
94 */
95
96 enum json_all_states {
97 JST_DONE= NR_JSON_USER_STATES, /* ok to finish */
98 JST_OBJ_CONT= NR_JSON_USER_STATES+1, /* object continues */
99 JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues */
100 JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
101 NR_JSON_STATES= NR_JSON_USER_STATES+4
102 };
103
104
105 typedef int (*json_state_handler)(json_engine_t *);
106
107
108 /* The string is broken. */
unexpected_eos(json_engine_t * j)109 static int unexpected_eos(json_engine_t *j)
110 {
111 j->s.error= JE_EOS;
112 return 1;
113 }
114
115
116 /* This symbol here breaks the JSON syntax. */
syntax_error(json_engine_t * j)117 static int syntax_error(json_engine_t *j)
118 {
119 j->s.error= JE_SYN;
120 return 1;
121 }
122
123
124 /* Value of object. */
mark_object(json_engine_t * j)125 static int mark_object(json_engine_t *j)
126 {
127 j->state= JST_OBJ_START;
128 if (++j->stack_p < JSON_DEPTH_LIMIT)
129 {
130 j->stack[j->stack_p]= JST_OBJ_CONT;
131 return 0;
132 }
133 j->s.error= JE_DEPTH;
134 return 1;
135 }
136
137
138 /* Read value of object. */
read_obj(json_engine_t * j)139 static int read_obj(json_engine_t *j)
140 {
141 j->state= JST_OBJ_START;
142 j->value_type= JSON_VALUE_OBJECT;
143 j->value= j->value_begin;
144 if (++j->stack_p < JSON_DEPTH_LIMIT)
145 {
146 j->stack[j->stack_p]= JST_OBJ_CONT;
147 return 0;
148 }
149 j->s.error= JE_DEPTH;
150 return 1;
151 }
152
153
154 /* Value of array. */
mark_array(json_engine_t * j)155 static int mark_array(json_engine_t *j)
156 {
157 j->state= JST_ARRAY_START;
158 if (++j->stack_p < JSON_DEPTH_LIMIT)
159 {
160 j->stack[j->stack_p]= JST_ARRAY_CONT;
161 j->value= j->value_begin;
162 return 0;
163 }
164 j->s.error= JE_DEPTH;
165 return 1;
166 }
167
168 /* Read value of object. */
read_array(json_engine_t * j)169 static int read_array(json_engine_t *j)
170 {
171 j->state= JST_ARRAY_START;
172 j->value_type= JSON_VALUE_ARRAY;
173 j->value= j->value_begin;
174 if (++j->stack_p < JSON_DEPTH_LIMIT)
175 {
176 j->stack[j->stack_p]= JST_ARRAY_CONT;
177 return 0;
178 }
179 j->s.error= JE_DEPTH;
180 return 1;
181 }
182
183
184
185 /*
186 Character classes inside the JSON string constant.
187 We mostly need this to parse escaping properly.
188 Escapings available in JSON are:
189 \" - quotation mark
190 \\ - backslash
191 \b - backspace UNICODE 8
192 \f - formfeed UNICODE 12
193 \n - newline UNICODE 10
194 \r - carriage return UNICODE 13
195 \t - horizontal tab UNICODE 9
196 \u{four-hex-digits} - code in UCS16 character set
197 */
198 enum json_string_char_classes {
199 S_0= 0,
200 S_1= 1,
201 S_2= 2,
202 S_3= 3,
203 S_4= 4,
204 S_5= 5,
205 S_6= 6,
206 S_7= 7,
207 S_8= 8,
208 S_9= 9,
209 S_A= 10,
210 S_B= 11,
211 S_C= 12,
212 S_D= 13,
213 S_E= 14,
214 S_F= 15,
215 S_ETC= 36, /* rest of characters. */
216 S_QUOTE= 37,
217 S_BKSL= 38, /* \ */
218 S_ERR= 100, /* disallowed */
219 };
220
221
222 /* This maps characters to their types inside a string constant. */
223 static enum json_string_char_classes json_instr_chr_map[128] = {
224 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
225 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
226 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
227 S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR,
228
229 S_ETC, S_ETC, S_QUOTE, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
230 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
231 S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7,
232 S_8, S_9, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
233
234 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
235 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
236 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
237 S_ETC, S_ETC, S_ETC, S_ETC, S_BKSL, S_ETC, S_ETC, S_ETC,
238
239 S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC,
240 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
241 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC,
242 S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC
243 };
244
245
read_4_hexdigits(json_string_t * s,uchar * dest)246 static int read_4_hexdigits(json_string_t *s, uchar *dest)
247 {
248 int i, t, c_len;
249 for (i=0; i<4; i++)
250 {
251 if ((c_len= json_next_char(s)) <= 0)
252 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
253
254 if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
255 return s->error= JE_SYN;
256
257 s->c_str+= c_len;
258 dest[i/2]+= (i % 2) ? t : t*16;
259 }
260 return 0;
261 }
262
263
json_handle_esc(json_string_t * s)264 static int json_handle_esc(json_string_t *s)
265 {
266 int t, c_len;
267
268 if ((c_len= json_next_char(s)) <= 0)
269 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
270
271 s->c_str+= c_len;
272 switch (s->c_next)
273 {
274 case 'b':
275 s->c_next= 8;
276 return 0;
277 case 'f':
278 s->c_next= 12;
279 return 0;
280 case 'n':
281 s->c_next= 10;
282 return 0;
283 case 'r':
284 s->c_next= 13;
285 return 0;
286 case 't':
287 s->c_next= 9;
288 return 0;
289 }
290
291 if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
292 {
293 s->c_str-= c_len;
294 return s->error= JE_ESCAPING;
295 }
296
297
298 if (s->c_next != 'u')
299 return 0;
300
301 {
302 /*
303 Read the four-hex-digits code.
304 If symbol is not in the Basic Multilingual Plane, we're reading
305 the string for the next four digits to compose the UTF-16 surrogate pair.
306 */
307 uchar code[4]= {0,0,0,0};
308
309 if (read_4_hexdigits(s, code))
310 return 1;
311
312 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
313 return 0;
314
315 if (c_len != MY_CS_TOOSMALL4)
316 return s->error= JE_BAD_CHR;
317
318 if ((c_len= json_next_char(s)) <= 0)
319 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
320 if (s->c_next != '\\')
321 return s->error= JE_SYN;
322
323 s->c_str+= c_len;
324 if ((c_len= json_next_char(s)) <= 0)
325 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
326 if (s->c_next != 'u')
327 return s->error= JE_SYN;
328 s->c_str+= c_len;
329
330 if (read_4_hexdigits(s, code+2))
331 return 1;
332
333 if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
334 return 0;
335 }
336 return s->error= JE_BAD_CHR;
337 }
338
339
json_read_string_const_chr(json_string_t * js)340 int json_read_string_const_chr(json_string_t *js)
341 {
342 int c_len;
343
344 if ((c_len= json_next_char(js)) > 0)
345 {
346 js->c_str+= c_len;
347 return (js->c_next == '\\') ? json_handle_esc(js) : 0;
348 }
349 js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR;
350 return 1;
351 }
352
353
skip_str_constant(json_engine_t * j)354 static int skip_str_constant(json_engine_t *j)
355 {
356 int t, c_len;
357 for (;;)
358 {
359 if ((c_len= json_next_char(&j->s)) > 0)
360 {
361 j->s.c_str+= c_len;
362 if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
363 continue;
364
365 if (j->s.c_next == '"')
366 break;
367 if (j->s.c_next == '\\')
368 {
369 j->value_escaped= 1;
370 if (json_handle_esc(&j->s))
371 return 1;
372 continue;
373 }
374 /* Symbol not allowed in JSON. */
375 return j->s.error= JE_NOT_JSON_CHR;
376 }
377 else
378 return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
379 }
380
381 j->state= j->stack[j->stack_p];
382 return 0;
383 }
384
385
386 /* Scalar string. */
v_string(json_engine_t * j)387 static int v_string(json_engine_t *j)
388 {
389 return skip_str_constant(j) || json_scan_next(j);
390 }
391
392
393 /* Read scalar string. */
read_strn(json_engine_t * j)394 static int read_strn(json_engine_t *j)
395 {
396 j->value= j->s.c_str;
397 j->value_type= JSON_VALUE_STRING;
398 j->value_escaped= 0;
399
400 if (skip_str_constant(j))
401 return 1;
402
403 j->state= j->stack[j->stack_p];
404 j->value_len= (int)(j->s.c_str - j->value) - 1;
405 return 0;
406 }
407
408
409 /*
410 We have dedicated parser for numeric constants. It's similar
411 to the main JSON parser, we similarly define character classes,
412 map characters to classes and implement the state-per-class
413 table. Though we don't create functions that handle
414 particular classes, just specify what new state should parser
415 get in this case.
416 */
417 enum json_num_char_classes {
418 N_MINUS,
419 N_PLUS,
420 N_ZERO,
421 N_DIGIT,
422 N_POINT,
423 N_E,
424 N_END,
425 N_EEND,
426 N_ERR,
427 N_NUM_CLASSES
428 };
429
430
431 static enum json_num_char_classes json_num_chr_map[128] = {
432 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
433 N_ERR, N_END, N_END, N_ERR, N_ERR, N_END, N_ERR, N_ERR,
434 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
435 N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR,
436
437 N_END, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
438 N_EEND, N_EEND, N_EEND, N_PLUS, N_END, N_MINUS, N_POINT, N_EEND,
439 N_ZERO, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
440 N_DIGIT, N_DIGIT, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
441
442 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
443 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
444 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
445 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
446
447 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND,
448 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
449 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND,
450 N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND,
451 };
452
453
454 enum json_num_states {
455 NS_OK, /* Number ended. */
456 NS_GO, /* Initial state. */
457 NS_GO1, /* If the number starts with '-'. */
458 NS_Z, /* If the number starts with '0'. */
459 NS_Z1, /* If the numbers starts with '-0'. */
460 NS_INT, /* Integer part. */
461 NS_FRAC,/* Fractional part. */
462 NS_EX, /* Exponential part begins. */
463 NS_EX1, /* Exponential part continues. */
464 NS_NUM_STATES
465 };
466
467
468 static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
469 {
470 /* - + 0 1..9 POINT E END_OK ERROR */
471 /*OK*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
472 /*GO*/ { NS_GO1, JE_SYN, NS_Z, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
473 /*GO1*/ { JE_SYN, JE_SYN, NS_Z1, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
474 /*ZERO*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
475 /*ZE1*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR },
476 /*INT*/ { JE_SYN, JE_SYN, NS_INT, NS_INT, NS_FRAC, NS_EX, NS_OK, JE_BAD_CHR },
477 /*FRAC*/ { JE_SYN, JE_SYN, NS_FRAC, NS_FRAC,JE_SYN, NS_EX, NS_OK, JE_BAD_CHR },
478 /*EX*/ { NS_EX, NS_EX, NS_EX1, NS_EX1, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR },
479 /*EX1*/ { JE_SYN, JE_SYN, NS_EX1, NS_EX1, JE_SYN, JE_SYN, NS_OK, JE_BAD_CHR }
480 };
481
482
483 static uint json_num_state_flags[NS_NUM_STATES]=
484 {
485 /*OK*/ 0,
486 /*GO*/ 0,
487 /*GO1*/ JSON_NUM_NEG,
488 /*ZERO*/ 0,
489 /*ZE1*/ 0,
490 /*INT*/ 0,
491 /*FRAC*/ JSON_NUM_FRAC_PART,
492 /*EX*/ JSON_NUM_EXP,
493 /*EX1*/ 0,
494 };
495
496
skip_num_constant(json_engine_t * j)497 static int skip_num_constant(json_engine_t *j)
498 {
499 int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
500 int c_len;
501
502 j->num_flags= 0;
503 for (;;)
504 {
505 j->num_flags|= json_num_state_flags[state];
506 if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
507 {
508 if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
509 {
510 j->s.c_str+= c_len;
511 continue;
512 }
513 break;
514 }
515
516 if ((j->s.error=
517 json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
518 return 1;
519 else
520 break;
521 }
522
523 j->state= j->stack[j->stack_p];
524 return 0;
525 }
526
527
528 /* Scalar numeric. */
v_number(json_engine_t * j)529 static int v_number(json_engine_t *j)
530 {
531 return skip_num_constant(j) || json_scan_next(j);
532 }
533
534
535 /* Read numeric constant. */
read_num(json_engine_t * j)536 static int read_num(json_engine_t *j)
537 {
538 j->value= j->value_begin;
539 if (skip_num_constant(j) == 0)
540 {
541 j->value_type= JSON_VALUE_NUMBER;
542 j->value_len= (int)(j->s.c_str - j->value_begin);
543 return 0;
544 }
545 return 1;
546 }
547
548
549 /* Check that the JSON string matches the argument and skip it. */
skip_string_verbatim(json_string_t * s,const char * str)550 static int skip_string_verbatim(json_string_t *s, const char *str)
551 {
552 int c_len;
553 while (*str)
554 {
555 if ((c_len= json_next_char(s)) > 0)
556 {
557 if (s->c_next == (my_wc_t) *(str++))
558 {
559 s->c_str+= c_len;
560 continue;
561 }
562 return s->error= JE_SYN;
563 }
564 return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
565 }
566
567 return 0;
568 }
569
570
571 /* Scalar false. */
v_false(json_engine_t * j)572 static int v_false(json_engine_t *j)
573 {
574 if (skip_string_verbatim(&j->s, "alse"))
575 return 1;
576 j->state= j->stack[j->stack_p];
577 return json_scan_next(j);
578 }
579
580
581 /* Scalar null. */
v_null(json_engine_t * j)582 static int v_null(json_engine_t *j)
583 {
584 if (skip_string_verbatim(&j->s, "ull"))
585 return 1;
586 j->state= j->stack[j->stack_p];
587 return json_scan_next(j);
588 }
589
590
591 /* Scalar true. */
v_true(json_engine_t * j)592 static int v_true(json_engine_t *j)
593 {
594 if (skip_string_verbatim(&j->s, "rue"))
595 return 1;
596 j->state= j->stack[j->stack_p];
597 return json_scan_next(j);
598 }
599
600
601 /* Read false. */
read_false(json_engine_t * j)602 static int read_false(json_engine_t *j)
603 {
604 j->value_type= JSON_VALUE_FALSE;
605 j->value= j->value_begin;
606 j->state= j->stack[j->stack_p];
607 j->value_len= 5;
608 return skip_string_verbatim(&j->s, "alse");
609 }
610
611
612 /* Read null. */
read_null(json_engine_t * j)613 static int read_null(json_engine_t *j)
614 {
615 j->value_type= JSON_VALUE_NULL;
616 j->value= j->value_begin;
617 j->state= j->stack[j->stack_p];
618 j->value_len= 4;
619 return skip_string_verbatim(&j->s, "ull");
620 }
621
622
623 /* Read true. */
read_true(json_engine_t * j)624 static int read_true(json_engine_t *j)
625 {
626 j->value_type= JSON_VALUE_TRUE;
627 j->value= j->value_begin;
628 j->state= j->stack[j->stack_p];
629 j->value_len= 4;
630 return skip_string_verbatim(&j->s, "rue");
631 }
632
633
634 /* Disallowed character. */
not_json_chr(json_engine_t * j)635 static int not_json_chr(json_engine_t *j)
636 {
637 j->s.error= JE_NOT_JSON_CHR;
638 return 1;
639 }
640
641
642 /* Bad character. */
bad_chr(json_engine_t * j)643 static int bad_chr(json_engine_t *j)
644 {
645 j->s.error= JE_BAD_CHR;
646 return 1;
647 }
648
649
650 /* Correct finish. */
done(json_engine_t * j)651 static int done(json_engine_t *j __attribute__((unused)))
652 {
653 return 1;
654 }
655
656
657 /* End of the object. */
end_object(json_engine_t * j)658 static int end_object(json_engine_t *j)
659 {
660 j->stack_p--;
661 j->state= JST_OBJ_END;
662 return 0;
663 }
664
665
666 /* End of the array. */
end_array(json_engine_t * j)667 static int end_array(json_engine_t *j)
668 {
669 j->stack_p--;
670 j->state= JST_ARRAY_END;
671 return 0;
672 }
673
674
675 /* Start reading key name. */
read_keyname(json_engine_t * j)676 static int read_keyname(json_engine_t *j)
677 {
678 j->state= JST_KEY;
679 return 0;
680 }
681
682
get_first_nonspace(json_string_t * js,int * t_next,int * c_len)683 static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
684 {
685 do
686 {
687 if ((*c_len= json_next_char(js)) <= 0)
688 *t_next= json_eos(js) ? C_EOS : C_BAD;
689 else
690 {
691 *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
692 js->c_str+= *c_len;
693 }
694 } while (*t_next == C_SPACE);
695 }
696
697
698 /* Next key name. */
next_key(json_engine_t * j)699 static int next_key(json_engine_t *j)
700 {
701 int t_next, c_len;
702 get_first_nonspace(&j->s, &t_next, &c_len);
703
704 if (t_next == C_QUOTE)
705 {
706 j->state= JST_KEY;
707 return 0;
708 }
709
710 j->s.error= (t_next == C_EOS) ? JE_EOS :
711 ((t_next == C_BAD) ? JE_BAD_CHR :
712 JE_SYN);
713 return 1;
714 }
715
716
717 /* Forward declarations. */
718 static int skip_colon(json_engine_t *j);
719 static int skip_key(json_engine_t *j);
720 static int struct_end_cb(json_engine_t *j);
721 static int struct_end_qb(json_engine_t *j);
722 static int struct_end_cm(json_engine_t *j);
723 static int struct_end_eos(json_engine_t *j);
724
725
next_item(json_engine_t * j)726 static int next_item(json_engine_t *j)
727 {
728 j->state= JST_VALUE;
729 return 0;
730 }
731
732
array_item(json_engine_t * j)733 static int array_item(json_engine_t *j)
734 {
735 j->state= JST_VALUE;
736 j->s.c_str-= j->sav_c_len;
737 return 0;
738 }
739
740
741 static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
742 /*
743 EOS { } [ ]
744 : , " -0..9 f
745 n t ETC ERR BAD
746 */
747 {
748 {/*VALUE*/
749 unexpected_eos, mark_object, syntax_error, mark_array, syntax_error,
750 syntax_error, syntax_error,v_string, v_number, v_false,
751 v_null, v_true, syntax_error, not_json_chr, bad_chr},
752 {/*KEY*/
753 unexpected_eos, skip_key, skip_key, skip_key, skip_key,
754 skip_key, skip_key, skip_colon, skip_key, skip_key,
755 skip_key, skip_key, skip_key, not_json_chr, bad_chr},
756 {/*OBJ_START*/
757 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
758 syntax_error, syntax_error, read_keyname, syntax_error, syntax_error,
759 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
760 {/*OBJ_END*/
761 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
762 syntax_error, struct_end_cm,syntax_error, syntax_error, syntax_error,
763 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
764 {/*ARRAY_START*/
765 unexpected_eos, array_item, syntax_error, array_item, end_array,
766 syntax_error, syntax_error, array_item, array_item, array_item,
767 array_item, array_item, syntax_error, not_json_chr, bad_chr},
768 {/*ARRAY_END*/
769 struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
770 syntax_error, struct_end_cm, syntax_error, syntax_error, syntax_error,
771 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
772 {/*DONE*/
773 done, syntax_error, syntax_error, syntax_error, syntax_error,
774 syntax_error, syntax_error, syntax_error, syntax_error, syntax_error,
775 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
776 {/*OBJ_CONT*/
777 unexpected_eos, syntax_error, end_object, syntax_error, syntax_error,
778 syntax_error, next_key, syntax_error, syntax_error, syntax_error,
779 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
780 {/*ARRAY_CONT*/
781 unexpected_eos, syntax_error, syntax_error, syntax_error, end_array,
782 syntax_error, next_item, syntax_error, syntax_error, syntax_error,
783 syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr},
784 {/*READ_VALUE*/
785 unexpected_eos, read_obj, syntax_error, read_array, syntax_error,
786 syntax_error, syntax_error, read_strn, read_num, read_false,
787 read_null, read_true, syntax_error, not_json_chr, bad_chr},
788 };
789
790
791
json_scan_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)792 int json_scan_start(json_engine_t *je,
793 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
794 {
795 json_string_setup(&je->s, i_cs, str, end);
796 je->stack[0]= JST_DONE;
797 je->stack_p= 0;
798 je->state= JST_VALUE;
799 return 0;
800 }
801
802
803 /* Skip colon and the value. */
skip_colon(json_engine_t * j)804 static int skip_colon(json_engine_t *j)
805 {
806 int t_next, c_len;
807
808 get_first_nonspace(&j->s, &t_next, &c_len);
809
810 if (t_next == C_COLON)
811 {
812 get_first_nonspace(&j->s, &t_next, &c_len);
813 return json_actions[JST_VALUE][t_next](j);
814 }
815
816 j->s.error= (t_next == C_EOS) ? JE_EOS :
817 ((t_next == C_BAD) ? JE_BAD_CHR:
818 JE_SYN);
819
820 return 1;
821 }
822
823
824 /* Skip colon and the value. */
skip_key(json_engine_t * j)825 static int skip_key(json_engine_t *j)
826 {
827 int t_next, c_len;
828
829 if (json_instr_chr_map[j->s.c_next] == S_BKSL &&
830 json_handle_esc(&j->s))
831 return 1;
832
833 while (json_read_keyname_chr(j) == 0) {}
834
835 if (j->s.error)
836 return 1;
837
838 get_first_nonspace(&j->s, &t_next, &c_len);
839 return json_actions[JST_VALUE][t_next](j);
840 }
841
842
843 /*
844 Handle EOS after the end of an object or array.
845 To do that we should pop the stack to see if
846 we are inside an object, or an array, and
847 run our 'state machine' accordingly.
848 */
struct_end_eos(json_engine_t * j)849 static int struct_end_eos(json_engine_t *j)
850 { return json_actions[j->stack[j->stack_p]][C_EOS](j); }
851
852
853 /*
854 Handle '}' after the end of an object or array.
855 To do that we should pop the stack to see if
856 we are inside an object, or an array, and
857 run our 'state machine' accordingly.
858 */
struct_end_cb(json_engine_t * j)859 static int struct_end_cb(json_engine_t *j)
860 { return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
861
862
863 /*
864 Handle ']' after the end of an object or array.
865 To do that we should pop the stack to see if
866 we are inside an object, or an array, and
867 run our 'state machine' accordingly.
868 */
struct_end_qb(json_engine_t * j)869 static int struct_end_qb(json_engine_t *j)
870 { return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
871
872
873 /*
874 Handle ',' after the end of an object or array.
875 To do that we should pop the stack to see if
876 we are inside an object, or an array, and
877 run our 'state machine' accordingly.
878 */
struct_end_cm(json_engine_t * j)879 static int struct_end_cm(json_engine_t *j)
880 { return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
881
882
json_read_keyname_chr(json_engine_t * j)883 int json_read_keyname_chr(json_engine_t *j)
884 {
885 int c_len, t;
886
887 if ((c_len= json_next_char(&j->s)) > 0)
888 {
889 j->s.c_str+= c_len;
890 if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
891 return 0;
892
893 switch (t)
894 {
895 case S_QUOTE:
896 for (;;) /* Skip spaces until ':'. */
897 {
898 if ((c_len= json_next_char(&j->s)) > 0)
899 {
900 if (j->s.c_next == ':')
901 {
902 j->s.c_str+= c_len;
903 j->state= JST_VALUE;
904 return 1;
905 }
906
907 if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
908 {
909 j->s.c_str+= c_len;
910 continue;
911 }
912 j->s.error= JE_SYN;
913 break;
914 }
915 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
916 break;
917 }
918 return 1;
919 case S_BKSL:
920 return json_handle_esc(&j->s);
921 case S_ERR:
922 j->s.c_str-= c_len;
923 j->s.error= JE_STRING_CONST;
924 return 1;
925 }
926 }
927 j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
928 return 1;
929 }
930
931
json_read_value(json_engine_t * j)932 int json_read_value(json_engine_t *j)
933 {
934 int t_next, c_len, res;
935
936 if (j->state == JST_KEY)
937 {
938 while (json_read_keyname_chr(j) == 0) {}
939
940 if (j->s.error)
941 return 1;
942 }
943
944 get_first_nonspace(&j->s, &t_next, &c_len);
945
946 j->value_begin= j->s.c_str-c_len;
947 res= json_actions[JST_READ_VALUE][t_next](j);
948 j->value_end= j->s.c_str;
949 return res;
950 }
951
952
json_scan_next(json_engine_t * j)953 int json_scan_next(json_engine_t *j)
954 {
955 int t_next;
956
957 get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
958 return json_actions[j->state][t_next](j);
959 }
960
961
962 enum json_path_chr_classes {
963 P_EOS, /* end of string */
964 P_USD, /* $ */
965 P_ASTER, /* * */
966 P_LSQRB, /* [ */
967 P_RSQRB, /* ] */
968 P_POINT, /* . */
969 P_ZERO, /* 0 */
970 P_DIGIT, /* 123456789 */
971 P_L, /* l (for "lax") */
972 P_S, /* s (for "strict") */
973 P_SPACE, /* space */
974 P_BKSL, /* \ */
975 P_QUOTE, /* " */
976 P_ETC, /* everything else */
977 P_ERR, /* character disallowed in JSON*/
978 P_BAD, /* invalid character */
979 N_PATH_CLASSES,
980 };
981
982
983 static enum json_path_chr_classes json_path_chr_map[128] = {
984 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
985 P_ERR, P_SPACE, P_SPACE, P_ERR, P_ERR, P_SPACE, P_ERR, P_ERR,
986 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
987 P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
988
989 P_SPACE, P_ETC, P_QUOTE, P_ETC, P_USD, P_ETC, P_ETC, P_ETC,
990 P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC,
991 P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
992 P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
993
994 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
995 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
996 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
997 P_ETC, P_ETC, P_ETC, P_LSQRB, P_BKSL, P_RSQRB, P_ETC, P_ETC,
998
999 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1000 P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC,
1001 P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
1002 P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC
1003 };
1004
1005
1006 enum json_path_states {
1007 PS_GO, /* Initial state. */
1008 PS_LAX, /* Parse the 'lax' keyword. */
1009 PS_PT, /* New path's step begins. */
1010 PS_AR, /* Parse array step. */
1011 PS_SAR, /* space after the '['. */
1012 PS_AWD, /* Array wildcard. */
1013 PS_Z, /* '0' (as an array item number). */
1014 PS_INT, /* Parse integer (as an array item number). */
1015 PS_AS, /* Space. */
1016 PS_KEY, /* Key. */
1017 PS_KNM, /* Parse key name. */
1018 PS_KWD, /* Key wildcard. */
1019 PS_AST, /* Asterisk. */
1020 PS_DWD, /* Double wildcard. */
1021 PS_KEYX, /* Key started with quote ("). */
1022 PS_KNMX, /* Parse quoted key name. */
1023 N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1024 PS_SCT, /* Parse the 'strict' keyword. */
1025 PS_EKY, /* '.' after the keyname so next step is the key. */
1026 PS_EKYX, /* Closing " for the quoted keyname. */
1027 PS_EAR, /* '[' after the keyname so next step is the array. */
1028 PS_ESC, /* Escaping in the keyname. */
1029 PS_ESCX, /* Escaping in the quoted keyname. */
1030 PS_OK, /* Path normally ended. */
1031 PS_KOK /* EOS after the keyname so end the path normally. */
1032 };
1033
1034
1035 static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1036 {
1037 /*
1038 EOS $, * [ ] . 0
1039 1..9 L S SPACE \ " ETC
1040 ERR BAD
1041 */
1042 /* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1043 JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1044 JE_NOT_JSON_CHR, JE_BAD_CHR},
1045 /* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1046 JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
1047 JE_NOT_JSON_CHR, JE_BAD_CHR},
1048 /* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1049 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1050 JE_NOT_JSON_CHR, JE_BAD_CHR},
1051 /* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
1052 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1053 JE_NOT_JSON_CHR, JE_BAD_CHR},
1054 /* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
1055 PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1056 JE_NOT_JSON_CHR, JE_BAD_CHR},
1057 /* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1058 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1059 JE_NOT_JSON_CHR, JE_BAD_CHR},
1060 /* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
1061 JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1062 JE_NOT_JSON_CHR, JE_BAD_CHR},
1063 /* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT,
1064 PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1065 JE_NOT_JSON_CHR, JE_BAD_CHR},
1066 /* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN,
1067 JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
1068 JE_NOT_JSON_CHR, JE_BAD_CHR},
1069 /* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
1070 PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1071 JE_NOT_JSON_CHR, JE_BAD_CHR},
1072 /* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
1073 PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1074 JE_NOT_JSON_CHR, JE_BAD_CHR},
1075 /* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,
1076 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1077 JE_NOT_JSON_CHR, JE_BAD_CHR},
1078 /* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1079 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1080 JE_NOT_JSON_CHR, JE_BAD_CHR},
1081 /* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1082 JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1083 JE_NOT_JSON_CHR, JE_BAD_CHR},
1084 /* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1085 PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1086 JE_NOT_JSON_CHR, JE_BAD_CHR},
1087 /* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1088 PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
1089 JE_NOT_JSON_CHR, JE_BAD_CHR},
1090 };
1091
1092
json_path_setup(json_path_t * p,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)1093 int json_path_setup(json_path_t *p,
1094 CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1095 {
1096 int c_len, t_next, state= PS_GO;
1097 enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1098
1099 json_string_setup(&p->s, i_cs, str, end);
1100
1101 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1102 p->last_step= p->steps;
1103 p->mode_strict= FALSE;
1104 p->types_used= JSON_PATH_KEY_NULL;
1105
1106 do
1107 {
1108 if ((c_len= json_next_char(&p->s)) <= 0)
1109 t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1110 else
1111 t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1112
1113 if ((state= json_path_transitions[state][t_next]) < 0)
1114 return p->s.error= state;
1115
1116 p->s.c_str+= c_len;
1117
1118 switch (state)
1119 {
1120 case PS_LAX:
1121 if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1122 return 1;
1123 p->mode_strict= FALSE;
1124 continue;
1125 case PS_SCT:
1126 if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1127 return 1;
1128 p->mode_strict= TRUE;
1129 state= PS_LAX;
1130 continue;
1131 case PS_KWD:
1132 case PS_AWD:
1133 p->last_step->type|= JSON_PATH_WILD;
1134 p->types_used|= JSON_PATH_WILD;
1135 continue;
1136 case PS_INT:
1137 p->last_step->n_item*= 10;
1138 p->last_step->n_item+= p->s.c_next - '0';
1139 continue;
1140 case PS_EKYX:
1141 p->last_step->key_end= p->s.c_str - c_len;
1142 state= PS_PT;
1143 continue;
1144 case PS_EKY:
1145 p->last_step->key_end= p->s.c_str - c_len;
1146 state= PS_KEY;
1147 /* fall through */
1148 case PS_KEY:
1149 p->last_step++;
1150 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1151 return p->s.error= JE_DEPTH;
1152 p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1153 double_wildcard= JSON_PATH_KEY_NULL;
1154 /* fall through */
1155 case PS_KEYX:
1156 p->last_step->key= p->s.c_str;
1157 continue;
1158 case PS_EAR:
1159 p->last_step->key_end= p->s.c_str - c_len;
1160 state= PS_AR;
1161 /* fall through */
1162 case PS_AR:
1163 p->last_step++;
1164 if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1165 return p->s.error= JE_DEPTH;
1166 p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1167 double_wildcard= JSON_PATH_KEY_NULL;
1168 p->last_step->n_item= 0;
1169 continue;
1170 case PS_ESC:
1171 if (json_handle_esc(&p->s))
1172 return 1;
1173 state= PS_KNM;
1174 continue;
1175 case PS_ESCX:
1176 if (json_handle_esc(&p->s))
1177 return 1;
1178 state= PS_KNMX;
1179 continue;
1180 case PS_KOK:
1181 p->last_step->key_end= p->s.c_str - c_len;
1182 state= PS_OK;
1183 break; /* 'break' as the loop supposed to end after that. */
1184 case PS_DWD:
1185 double_wildcard= JSON_PATH_DOUBLE_WILD;
1186 continue;
1187 };
1188 } while (state != PS_OK);
1189
1190 return double_wildcard ? (p->s.error= JE_SYN) : 0;
1191 }
1192
1193
json_skip_to_level(json_engine_t * j,int level)1194 int json_skip_to_level(json_engine_t *j, int level)
1195 {
1196 do {
1197 if (j->stack_p < level)
1198 return 0;
1199 } while (json_scan_next(j) == 0);
1200
1201 return 1;
1202 }
1203
1204
1205 /*
1206 works as json_skip_level() but also counts items on the current
1207 level skipped.
1208 */
json_skip_level_and_count(json_engine_t * j,int * n_items_skipped)1209 int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1210 {
1211 int level= j->stack_p;
1212
1213 *n_items_skipped= 0;
1214 while (json_scan_next(j) == 0)
1215 {
1216 if (j->stack_p < level)
1217 return 0;
1218 if (j->stack_p == level && j->state == JST_VALUE)
1219 (*n_items_skipped)++;
1220 }
1221
1222 return 1;
1223 }
1224
1225
json_skip_key(json_engine_t * j)1226 int json_skip_key(json_engine_t *j)
1227 {
1228 if (json_read_value(j))
1229 return 1;
1230
1231 if (json_value_scalar(j))
1232 return 0;
1233
1234 return json_skip_level(j);
1235 }
1236
1237
1238 #define SKIPPED_STEP_MARK ((uint) ~0)
1239
1240 /*
1241 Current step of the patch matches the JSON construction.
1242 Now we should either stop the search or go to the next
1243 step of the path.
1244 */
handle_match(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1245 static int handle_match(json_engine_t *je, json_path_t *p,
1246 json_path_step_t **p_cur_step, uint *array_counters)
1247 {
1248 json_path_step_t *next_step= *p_cur_step + 1;
1249
1250 DBUG_ASSERT(*p_cur_step < p->last_step);
1251
1252 if (json_read_value(je))
1253 return 1;
1254
1255 if (json_value_scalar(je))
1256 {
1257 while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1258 {
1259 if (++next_step > p->last_step)
1260 {
1261 je->s.c_str= je->value_begin;
1262 return 1;
1263 }
1264 }
1265 return 0;
1266 }
1267
1268 if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1269 je->value_type & JSON_VALUE_OBJECT)
1270 {
1271 do
1272 {
1273 array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1274 if (++next_step > p->last_step)
1275 {
1276 je->s.c_str= je->value_begin;
1277 je->stack_p--;
1278 return 1;
1279 }
1280 } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1281 }
1282
1283
1284 array_counters[next_step - p->steps]= 0;
1285
1286 if ((int) je->value_type !=
1287 (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1288 return json_skip_level(je);
1289
1290 *p_cur_step= next_step;
1291 return 0;
1292 }
1293
1294
1295 /*
1296 Check if the name of the current JSON key matches
1297 the step of the path.
1298 */
json_key_matches(json_engine_t * je,json_string_t * k)1299 int json_key_matches(json_engine_t *je, json_string_t *k)
1300 {
1301 while (json_read_keyname_chr(je) == 0)
1302 {
1303 if (json_read_string_const_chr(k) ||
1304 je->s.c_next != k->c_next)
1305 return 0;
1306 }
1307
1308 return json_read_string_const_chr(k);
1309 }
1310
1311
json_find_path(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1312 int json_find_path(json_engine_t *je,
1313 json_path_t *p, json_path_step_t **p_cur_step,
1314 uint *array_counters)
1315 {
1316 json_string_t key_name;
1317
1318 json_string_set_cs(&key_name, p->s.cs);
1319
1320 do
1321 {
1322 json_path_step_t *cur_step= *p_cur_step;
1323 switch (je->state)
1324 {
1325 case JST_KEY:
1326 DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1327 if (!(cur_step->type & JSON_PATH_WILD))
1328 {
1329 json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1330 if (!json_key_matches(je, &key_name))
1331 {
1332 if (json_skip_key(je))
1333 goto exit;
1334 continue;
1335 }
1336 }
1337 if (cur_step == p->last_step ||
1338 handle_match(je, p, p_cur_step, array_counters))
1339 goto exit;
1340 break;
1341 case JST_VALUE:
1342 DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1343 if (cur_step->type & JSON_PATH_WILD ||
1344 cur_step->n_item == array_counters[cur_step - p->steps]++)
1345 {
1346 /* Array item matches. */
1347 if (cur_step == p->last_step ||
1348 handle_match(je, p, p_cur_step, array_counters))
1349 goto exit;
1350 }
1351 else
1352 json_skip_array_item(je);
1353 break;
1354 case JST_OBJ_END:
1355 do
1356 {
1357 (*p_cur_step)--;
1358 } while (*p_cur_step > p->steps &&
1359 array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1360 break;
1361 case JST_ARRAY_END:
1362 (*p_cur_step)--;
1363 break;
1364 default:
1365 DBUG_ASSERT(0);
1366 break;
1367 };
1368 } while (json_scan_next(je) == 0);
1369
1370 /* No luck. */
1371 return 1;
1372
1373 exit:
1374 return je->s.error;
1375 }
1376
1377
json_find_paths_first(json_engine_t * je,json_find_paths_t * state,uint n_paths,json_path_t * paths,uint * path_depths)1378 int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1379 uint n_paths, json_path_t *paths, uint *path_depths)
1380 {
1381 state->n_paths= n_paths;
1382 state->paths= paths;
1383 state->cur_depth= 0;
1384 state->path_depths= path_depths;
1385 return json_find_paths_next(je, state);
1386 }
1387
1388
json_find_paths_next(json_engine_t * je,json_find_paths_t * state)1389 int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1390 {
1391 uint p_c;
1392 int path_found, no_match_found;
1393 do
1394 {
1395 switch (je->state)
1396 {
1397 case JST_KEY:
1398 path_found= FALSE;
1399 no_match_found= TRUE;
1400 for (p_c=0; p_c < state->n_paths; p_c++)
1401 {
1402 json_path_step_t *cur_step;
1403 if (state->path_depths[p_c] <
1404 state->cur_depth /* Path already failed. */ ||
1405 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1406 JSON_PATH_KEY))
1407 continue;
1408
1409 if (!(cur_step->type & JSON_PATH_WILD))
1410 {
1411 json_string_t key_name;
1412 json_string_setup(&key_name, state->paths[p_c].s.cs,
1413 cur_step->key, cur_step->key_end);
1414 if (!json_key_matches(je, &key_name))
1415 continue;
1416 }
1417 if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1418 path_found= TRUE;
1419 else
1420 {
1421 no_match_found= FALSE;
1422 state->path_depths[p_c]= state->cur_depth + 1;
1423 }
1424 }
1425 if (path_found)
1426 /* Return the result. */
1427 goto exit;
1428 if (no_match_found)
1429 {
1430 /* No possible paths left to check. Just skip the level. */
1431 if (json_skip_level(je))
1432 goto exit;
1433 }
1434
1435 break;
1436 case JST_VALUE:
1437 path_found= FALSE;
1438 no_match_found= TRUE;
1439 for (p_c=0; p_c < state->n_paths; p_c++)
1440 {
1441 json_path_step_t *cur_step;
1442 if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1443 !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1444 JSON_PATH_ARRAY))
1445 continue;
1446 if (cur_step->type & JSON_PATH_WILD ||
1447 cur_step->n_item == state->array_counters[state->cur_depth])
1448 {
1449 /* Array item matches. */
1450 if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1451 path_found= TRUE;
1452 else
1453 {
1454 no_match_found= FALSE;
1455 state->path_depths[p_c]= state->cur_depth + 1;
1456 }
1457 }
1458 }
1459
1460 if (path_found)
1461 goto exit;
1462
1463 if (no_match_found)
1464 json_skip_array_item(je);
1465
1466 state->array_counters[state->cur_depth]++;
1467 break;
1468 case JST_OBJ_START:
1469 case JST_ARRAY_START:
1470 for (p_c=0; p_c < state->n_paths; p_c++)
1471 {
1472 if (state->path_depths[p_c] < state->cur_depth)
1473 /* Path already failed. */
1474 continue;
1475 if (state->paths[p_c].steps[state->cur_depth].type &
1476 ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1477 state->path_depths[p_c]++;
1478 }
1479 state->cur_depth++;
1480 break;
1481 case JST_OBJ_END:
1482 case JST_ARRAY_END:
1483 for (p_c=0; p_c < state->n_paths; p_c++)
1484 {
1485 if (state->path_depths[p_c] < state->cur_depth)
1486 continue;
1487 state->path_depths[p_c]--;
1488 }
1489 state->cur_depth--;
1490 break;
1491 default:
1492 DBUG_ASSERT(0);
1493 break;
1494 };
1495 } while (json_scan_next(je) == 0);
1496
1497 /* No luck. */
1498 return 1;
1499
1500 exit:
1501 return je->s.error;
1502 }
1503
1504
json_append_ascii(CHARSET_INFO * json_cs,uchar * json,uchar * json_end,const uchar * ascii,const uchar * ascii_end)1505 int json_append_ascii(CHARSET_INFO *json_cs,
1506 uchar *json, uchar *json_end,
1507 const uchar *ascii, const uchar *ascii_end)
1508 {
1509 const uchar *json_start= json;
1510 while (ascii < ascii_end)
1511 {
1512 int c_len;
1513 if ((c_len= json_cs->cset->wc_mb(json_cs, (my_wc_t) *ascii,
1514 json, json_end)) > 0)
1515 {
1516 json+= c_len;
1517 ascii++;
1518 continue;
1519 }
1520
1521 /* Error return. */
1522 return c_len;
1523 }
1524
1525 return (int)(json - json_start);
1526 }
1527
1528
json_unescape(CHARSET_INFO * json_cs,const uchar * json_str,const uchar * json_end,CHARSET_INFO * res_cs,uchar * res,uchar * res_end)1529 int json_unescape(CHARSET_INFO *json_cs,
1530 const uchar *json_str, const uchar *json_end,
1531 CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1532 {
1533 json_string_t s;
1534 const uchar *res_b= res;
1535
1536 json_string_setup(&s, json_cs, json_str, json_end);
1537 while (json_read_string_const_chr(&s) == 0)
1538 {
1539 int c_len;
1540 if ((c_len= res_cs->cset->wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1541 {
1542 res+= c_len;
1543 continue;
1544 }
1545 if (c_len == MY_CS_ILUNI)
1546 {
1547 /*
1548 Result charset doesn't support the json's character.
1549 Let's replace it with the '?' symbol.
1550 */
1551 if ((c_len= res_cs->cset->wc_mb(res_cs, '?', res, res_end)) > 0)
1552 {
1553 res+= c_len;
1554 continue;
1555 }
1556 }
1557 /* Result buffer is too small. */
1558 return -1;
1559 }
1560
1561 return s.error==JE_EOS ? (int)(res - res_b) : -1;
1562 }
1563
1564
1565 /* When we need to replace a character with the escaping. */
1566 enum json_esc_char_classes {
1567 ESC_= 0, /* No need to escape. */
1568 ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1569 ESC_B= 'b', /* Backspace. Escape as \b */
1570 ESC_F= 'f', /* Formfeed. Escape as \f */
1571 ESC_N= 'n', /* Newline. Escape as \n */
1572 ESC_R= 'r', /* Return. Escape as \r */
1573 ESC_T= 't', /* Tab. Escape as \s */
1574 ESC_BS= '\\' /* Backslash or '"'. Escape by the \\ prefix. */
1575 };
1576
1577
1578 /* This specifies how we should escape the character. */
1579 static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1580 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1581 ESC_B, ESC_T, ESC_N, ESC_U, ESC_F, ESC_R, ESC_U, ESC_U,
1582 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1583 ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U,
1584
1585 ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_, ESC_, ESC_,
1586 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1587 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1588 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1589
1590 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1591 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1592 ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_,
1593 ESC_, ESC_, ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_,
1594 };
1595
1596
1597 static const char hexconv[16] = "0123456789ABCDEF";
1598
1599
json_escape(CHARSET_INFO * str_cs,const uchar * str,const uchar * str_end,CHARSET_INFO * json_cs,uchar * json,uchar * json_end)1600 int json_escape(CHARSET_INFO *str_cs,
1601 const uchar *str, const uchar *str_end,
1602 CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1603 {
1604 const uchar *json_start= json;
1605
1606 while (str < str_end)
1607 {
1608 my_wc_t c_chr;
1609 int c_len;
1610 if ((c_len= str_cs->cset->mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1611 {
1612 enum json_esc_char_classes c_class;
1613
1614 str+= c_len;
1615 if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1616 {
1617 if ((c_len= json_cs->cset->wc_mb(json_cs, c_chr, json, json_end)) > 0)
1618 {
1619 json+= c_len;
1620 continue;
1621 }
1622 if (c_len < 0)
1623 {
1624 /* JSON buffer is depleted. */
1625 return -1;
1626 }
1627
1628 /* JSON charset cannot convert this character. */
1629 c_class= ESC_U;
1630 }
1631
1632 if ((c_len= json_cs->cset->wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1633 (c_len= json_cs->cset->wc_mb(json_cs,
1634 (c_class == ESC_BS) ? c_chr : c_class,
1635 json+= c_len, json_end)) <= 0)
1636 {
1637 /* JSON buffer is depleted. */
1638 return -1;
1639 }
1640 json+= c_len;
1641
1642 if (c_class != ESC_U)
1643 continue;
1644
1645 {
1646 /* We have to use /uXXXX escaping. */
1647 uchar utf16buf[4];
1648 uchar code_str[8];
1649 int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1650
1651 code_str[0]= hexconv[utf16buf[0] >> 4];
1652 code_str[1]= hexconv[utf16buf[0] & 15];
1653 code_str[2]= hexconv[utf16buf[1] >> 4];
1654 code_str[3]= hexconv[utf16buf[1] & 15];
1655
1656 if (u_len > 2)
1657 {
1658 code_str[4]= hexconv[utf16buf[2] >> 4];
1659 code_str[5]= hexconv[utf16buf[2] & 15];
1660 code_str[6]= hexconv[utf16buf[3] >> 4];
1661 code_str[7]= hexconv[utf16buf[3] & 15];
1662 }
1663
1664 if ((c_len= json_append_ascii(json_cs, json, json_end,
1665 code_str, code_str+u_len*2)) > 0)
1666 {
1667 json+= c_len;
1668 continue;
1669 }
1670 /* JSON buffer is depleted. */
1671 return -1;
1672 }
1673 }
1674 else /* c_len == 0, an illegal symbol. */
1675 return -1;
1676 }
1677
1678 return (int)(json - json_start);
1679 }
1680
1681
json_get_path_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end,json_path_t * p)1682 int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1683 const uchar *str, const uchar *end,
1684 json_path_t *p)
1685 {
1686 json_scan_start(je, i_cs, str, end);
1687 p->last_step= p->steps - 1;
1688 return 0;
1689 }
1690
1691
json_get_path_next(json_engine_t * je,json_path_t * p)1692 int json_get_path_next(json_engine_t *je, json_path_t *p)
1693 {
1694 if (p->last_step < p->steps)
1695 {
1696 if (json_read_value(je))
1697 return 1;
1698
1699 p->last_step= p->steps;
1700 p->steps[0].type= JSON_PATH_ARRAY_WILD;
1701 p->steps[0].n_item= 0;
1702 return 0;
1703 }
1704 else
1705 {
1706 if (json_value_scalar(je))
1707 {
1708 if (p->last_step->type & JSON_PATH_ARRAY)
1709 p->last_step->n_item++;
1710 }
1711 else
1712 {
1713 p->last_step++;
1714 p->last_step->type= (enum json_path_step_types) je->value_type;
1715 p->last_step->n_item= 0;
1716 }
1717
1718 if (json_scan_next(je))
1719 return 1;
1720 }
1721
1722 do
1723 {
1724 switch (je->state)
1725 {
1726 case JST_KEY:
1727 p->last_step->key= je->s.c_str;
1728 do
1729 {
1730 p->last_step->key_end= je->s.c_str;
1731 } while (json_read_keyname_chr(je) == 0);
1732 if (je->s.error)
1733 return 1;
1734 /* Now we have je.state == JST_VALUE, so let's handle it. */
1735
1736 /* fall through */
1737 case JST_VALUE:
1738 if (json_read_value(je))
1739 return 1;
1740 return 0;
1741 case JST_OBJ_END:
1742 case JST_ARRAY_END:
1743 p->last_step--;
1744 if (p->last_step->type & JSON_PATH_ARRAY)
1745 p->last_step->n_item++;
1746 break;
1747 default:
1748 break;
1749 }
1750 } while (json_scan_next(je) == 0);
1751
1752 return 1;
1753 }
1754
1755
json_path_parts_compare(const json_path_step_t * a,const json_path_step_t * a_end,const json_path_step_t * b,const json_path_step_t * b_end,enum json_value_types vt)1756 int json_path_parts_compare(
1757 const json_path_step_t *a, const json_path_step_t *a_end,
1758 const json_path_step_t *b, const json_path_step_t *b_end,
1759 enum json_value_types vt)
1760 {
1761 int res, res2;
1762
1763 while (a <= a_end)
1764 {
1765 if (b > b_end)
1766 {
1767 while (vt != JSON_VALUE_ARRAY &&
1768 (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY &&
1769 a->n_item == 0)
1770 {
1771 if (++a > a_end)
1772 return 0;
1773 }
1774 return -2;
1775 }
1776
1777 DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
1778
1779
1780 if (a->type & JSON_PATH_ARRAY)
1781 {
1782 if (b->type & JSON_PATH_ARRAY)
1783 {
1784 if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
1785 goto step_fits;
1786 goto step_failed;
1787 }
1788 if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
1789 goto step_fits_autowrap;
1790 goto step_failed;
1791 }
1792 else /* JSON_PATH_KEY */
1793 {
1794 if (!(b->type & JSON_PATH_KEY))
1795 goto step_failed;
1796
1797 if (!(a->type & JSON_PATH_WILD) &&
1798 (a->key_end - a->key != b->key_end - b->key ||
1799 memcmp(a->key, b->key, a->key_end - a->key) != 0))
1800 goto step_failed;
1801
1802 goto step_fits;
1803 }
1804 step_failed:
1805 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1806 return -1;
1807 b++;
1808 continue;
1809
1810 step_fits:
1811 b++;
1812 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1813 {
1814 a++;
1815 continue;
1816 }
1817
1818 /* Double wild handling needs recursions. */
1819 res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
1820 if (res == 0)
1821 return 0;
1822
1823 res2= json_path_parts_compare(a, a_end, b, b_end, vt);
1824
1825 return (res2 >= 0) ? res2 : res;
1826
1827 step_fits_autowrap:
1828 if (!(a->type & JSON_PATH_DOUBLE_WILD))
1829 {
1830 a++;
1831 continue;
1832 }
1833
1834 /* Double wild handling needs recursions. */
1835 res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
1836 if (res == 0)
1837 return 0;
1838
1839 res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
1840
1841 return (res2 >= 0) ? res2 : res;
1842
1843 }
1844
1845 return b <= b_end;
1846 }
1847
1848
json_path_compare(const json_path_t * a,const json_path_t * b,enum json_value_types vt)1849 int json_path_compare(const json_path_t *a, const json_path_t *b,
1850 enum json_value_types vt)
1851 {
1852 return json_path_parts_compare(a->steps+1, a->last_step,
1853 b->steps+1, b->last_step, vt);
1854 }
1855