1 #include <my_global.h>
2 #include <string.h>
3 #include <m_ctype.h>
4 #include "json_lib.h"
5 
6 /*
7   JSON escaping lets user specify UTF16 codes of characters.
8   So we're going to need the UTF16 charset capabilities. Let's import
9   them from the utf16 charset.
10 */
11 int my_utf16_uni(CHARSET_INFO *cs,
12                  my_wc_t *pwc, const uchar *s, const uchar *e);
13 int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
14 
15 
json_string_set_str(json_string_t * s,const uchar * str,const uchar * end)16 void json_string_set_str(json_string_t *s,
17                          const uchar *str, const uchar *end)
18 {
19   s->c_str= str;
20   s->str_end= end;
21 }
22 
23 
json_string_set_cs(json_string_t * s,CHARSET_INFO * i_cs)24 void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
25 {
26   s->cs= i_cs;
27   s->error= 0;
28   s->wc= i_cs->cset->mb_wc;
29 }
30 
31 
json_string_setup(json_string_t * s,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)32 static void json_string_setup(json_string_t *s,
33                               CHARSET_INFO *i_cs, const uchar *str,
34                               const uchar *end)
35 {
36   json_string_set_cs(s, i_cs);
37   json_string_set_str(s, str, end);
38 }
39 
40 
41 enum json_char_classes {
42   C_EOS,    /* end of string */
43   C_LCURB,  /* {  */
44   C_RCURB,  /* } */
45   C_LSQRB,  /* [ */
46   C_RSQRB,  /* ] */
47   C_COLON,  /* : */
48   C_COMMA,  /* , */
49   C_QUOTE,  /* " */
50   C_DIGIT,  /* -0123456789 */
51   C_LOW_F,  /* 'f' (for "false") */
52   C_LOW_N,  /* 'n' (for "null") */
53   C_LOW_T,  /* 't' (for "true") */
54   C_ETC,    /* everything else */
55   C_ERR,    /* character disallowed in JSON */
56   C_BAD,    /* invalid character, charset handler cannot read it */
57   NR_C_CLASSES, /* Counter for classes that handled with functions. */
58   C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
59 };
60 
61 
62 /*
63   This array maps first 128 Unicode Code Points into classes.
64   The remaining Unicode characters should be mapped to C_ETC.
65 */
66 
67 static enum json_char_classes json_chr_map[128] = {
68   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
69   C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
70   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
71   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
72 
73   C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
74   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
75   C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
76   C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
77 
78   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
79   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
80   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
81   C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
82 
83   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
84   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
85   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
86   C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
87 };
88 
89 
90 /*
91   JSON parser actually has more states than the 'enum json_states'
92   declares. But the rest of the states aren't seen to the user so let's
93   specify them here to avoid confusion.
94 */
95 
96 enum json_all_states {
97   JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
98   JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
99   JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
100   JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
101   NR_JSON_STATES= NR_JSON_USER_STATES+4
102 };
103 
104 
105 typedef int (*json_state_handler)(json_engine_t *);
106 
107 
108 /* The string is broken. */
unexpected_eos(json_engine_t * j)109 static int unexpected_eos(json_engine_t *j)
110 {
111   j->s.error= JE_EOS;
112   return 1;
113 }
114 
115 
116 /* This symbol here breaks the JSON syntax. */
syntax_error(json_engine_t * j)117 static int syntax_error(json_engine_t *j)
118 {
119   j->s.error= JE_SYN;
120   return 1;
121 }
122 
123 
124 /* Value of object. */
mark_object(json_engine_t * j)125 static int mark_object(json_engine_t *j)
126 {
127   j->state= JST_OBJ_START;
128   if (++j->stack_p < JSON_DEPTH_LIMIT)
129   {
130     j->stack[j->stack_p]= JST_OBJ_CONT;
131     return 0;
132   }
133   j->s.error= JE_DEPTH;
134   return 1;
135 }
136 
137 
138 /* Read value of object. */
read_obj(json_engine_t * j)139 static int read_obj(json_engine_t *j)
140 {
141   j->state= JST_OBJ_START;
142   j->value_type= JSON_VALUE_OBJECT;
143   j->value= j->value_begin;
144   if (++j->stack_p < JSON_DEPTH_LIMIT)
145   {
146     j->stack[j->stack_p]= JST_OBJ_CONT;
147     return 0;
148   }
149   j->s.error= JE_DEPTH;
150   return 1;
151 }
152 
153 
154 /* Value of array. */
mark_array(json_engine_t * j)155 static int mark_array(json_engine_t *j)
156 {
157   j->state= JST_ARRAY_START;
158   if (++j->stack_p < JSON_DEPTH_LIMIT)
159   {
160     j->stack[j->stack_p]= JST_ARRAY_CONT;
161     j->value= j->value_begin;
162     return 0;
163   }
164   j->s.error= JE_DEPTH;
165   return 1;
166 }
167 
168 /* Read value of object. */
read_array(json_engine_t * j)169 static int read_array(json_engine_t *j)
170 {
171   j->state= JST_ARRAY_START;
172   j->value_type= JSON_VALUE_ARRAY;
173   j->value= j->value_begin;
174   if (++j->stack_p < JSON_DEPTH_LIMIT)
175   {
176     j->stack[j->stack_p]= JST_ARRAY_CONT;
177     return 0;
178   }
179   j->s.error= JE_DEPTH;
180   return 1;
181 }
182 
183 
184 
185 /*
186   Character classes inside the JSON string constant.
187   We mostly need this to parse escaping properly.
188   Escapings available in JSON are:
189   \" - quotation mark
190   \\ - backslash
191   \b - backspace UNICODE 8
192   \f - formfeed UNICODE 12
193   \n - newline UNICODE 10
194   \r - carriage return UNICODE 13
195   \t - horizontal tab UNICODE 9
196   \u{four-hex-digits} - code in UCS16 character set
197 */
198 enum json_string_char_classes {
199   S_0= 0,
200   S_1= 1,
201   S_2= 2,
202   S_3= 3,
203   S_4= 4,
204   S_5= 5,
205   S_6= 6,
206   S_7= 7,
207   S_8= 8,
208   S_9= 9,
209   S_A= 10,
210   S_B= 11,
211   S_C= 12,
212   S_D= 13,
213   S_E= 14,
214   S_F= 15,
215   S_ETC= 36,    /* rest of characters. */
216   S_QUOTE= 37,
217   S_BKSL= 38, /* \ */
218   S_ERR= 100,   /* disallowed */
219 };
220 
221 
222 /* This maps characters to their types inside a string constant. */
223 static enum json_string_char_classes json_instr_chr_map[128] = {
224   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
225   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
226   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
227   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
228 
229   S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
230   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
231   S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
232   S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
233 
234   S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
235   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
236   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
237   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
238 
239   S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
240   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
241   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
242   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
243 };
244 
245 
read_4_hexdigits(json_string_t * s,uchar * dest)246 static int read_4_hexdigits(json_string_t *s, uchar *dest)
247 {
248   int i, t, c_len;
249   for (i=0; i<4; i++)
250   {
251     if ((c_len= json_next_char(s)) <= 0)
252       return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
253 
254     if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
255       return s->error= JE_SYN;
256 
257     s->c_str+= c_len;
258     dest[i/2]+= (i % 2) ? t : t*16;
259   }
260   return 0;
261 }
262 
263 
json_handle_esc(json_string_t * s)264 static int json_handle_esc(json_string_t *s)
265 {
266   int t, c_len;
267 
268   if ((c_len= json_next_char(s)) <= 0)
269     return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
270 
271   s->c_str+= c_len;
272   switch (s->c_next)
273   {
274     case 'b':
275       s->c_next= 8;
276       return 0;
277     case 'f':
278       s->c_next= 12;
279       return 0;
280     case 'n':
281       s->c_next= 10;
282       return 0;
283     case 'r':
284       s->c_next= 13;
285       return 0;
286     case 't':
287       s->c_next= 9;
288       return 0;
289   }
290 
291   if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
292   {
293     s->c_str-= c_len;
294     return s->error= JE_ESCAPING;
295   }
296 
297 
298   if (s->c_next != 'u')
299     return 0;
300 
301   {
302     /*
303       Read the four-hex-digits code.
304       If symbol is not in the Basic Multilingual Plane, we're reading
305       the string for the next four digits to compose the UTF-16 surrogate pair.
306     */
307     uchar code[4]= {0,0,0,0};
308 
309     if (read_4_hexdigits(s, code))
310       return 1;
311 
312     if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
313       return 0;
314 
315     if (c_len != MY_CS_TOOSMALL4)
316       return s->error= JE_BAD_CHR;
317 
318     if ((c_len= json_next_char(s)) <= 0)
319       return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
320     if (s->c_next != '\\')
321       return s->error= JE_SYN;
322 
323     s->c_str+= c_len;
324     if ((c_len= json_next_char(s)) <= 0)
325       return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
326     if (s->c_next != 'u')
327       return s->error= JE_SYN;
328     s->c_str+= c_len;
329 
330     if (read_4_hexdigits(s, code+2))
331       return 1;
332 
333     if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
334       return 0;
335   }
336   return s->error= JE_BAD_CHR;
337 }
338 
339 
json_read_string_const_chr(json_string_t * js)340 int json_read_string_const_chr(json_string_t *js)
341 {
342   int c_len;
343 
344   if ((c_len= json_next_char(js)) > 0)
345   {
346     js->c_str+= c_len;
347     return (js->c_next == '\\') ? json_handle_esc(js) : 0;
348   }
349   js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR;
350   return 1;
351 }
352 
353 
skip_str_constant(json_engine_t * j)354 static int skip_str_constant(json_engine_t *j)
355 {
356   int t, c_len;
357   for (;;)
358   {
359     if ((c_len= json_next_char(&j->s)) > 0)
360     {
361       j->s.c_str+= c_len;
362       if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
363         continue;
364 
365       if (j->s.c_next == '"')
366         break;
367       if (j->s.c_next == '\\')
368       {
369         j->value_escaped= 1;
370         if (json_handle_esc(&j->s))
371           return 1;
372         continue;
373       }
374       /* Symbol not allowed in JSON. */
375       return j->s.error= JE_NOT_JSON_CHR;
376     }
377     else
378       return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
379   }
380 
381   j->state= j->stack[j->stack_p];
382   return 0;
383 }
384 
385 
386 /* Scalar string. */
v_string(json_engine_t * j)387 static int v_string(json_engine_t *j)
388 {
389   return skip_str_constant(j) || json_scan_next(j);
390 }
391 
392 
393 /* Read scalar string. */
read_strn(json_engine_t * j)394 static int read_strn(json_engine_t *j)
395 {
396   j->value= j->s.c_str;
397   j->value_type= JSON_VALUE_STRING;
398   j->value_escaped= 0;
399 
400   if (skip_str_constant(j))
401     return 1;
402 
403   j->state= j->stack[j->stack_p];
404   j->value_len= (int)(j->s.c_str - j->value) - 1;
405   return 0;
406 }
407 
408 
409 /*
410   We have dedicated parser for numeric constants. It's similar
411   to the main JSON parser, we similarly define character classes,
412   map characters to classes and implement the state-per-class
413   table. Though we don't create functions that handle
414   particular classes, just specify what new state should parser
415   get in this case.
416 */
417 enum json_num_char_classes {
418   N_MINUS,
419   N_PLUS,
420   N_ZERO,
421   N_DIGIT,
422   N_POINT,
423   N_E,
424   N_END,
425   N_EEND,
426   N_ERR,
427   N_NUM_CLASSES
428 };
429 
430 
431 static enum json_num_char_classes json_num_chr_map[128] = {
432   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
433   N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
434   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
435   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
436 
437   N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
438   N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
439   N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
440   N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
441 
442   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
443   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
444   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
445   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
446 
447   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
448   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
449   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
450   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
451 };
452 
453 
454 enum json_num_states {
455   NS_OK,  /* Number ended. */
456   NS_GO,  /* Initial state. */
457   NS_GO1, /* If the number starts with '-'. */
458   NS_Z,   /* If the number starts with '0'. */
459   NS_Z1,  /* If the numbers starts with '-0'. */
460   NS_INT, /* Integer part. */
461   NS_FRAC,/* Fractional part. */
462   NS_EX,  /* Exponential part begins. */
463   NS_EX1, /* Exponential part continues. */
464   NS_NUM_STATES
465 };
466 
467 
468 static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
469 {
470 /*         -        +       0        1..9    POINT    E       END_OK   ERROR */
471 /*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
472 /*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
473 /*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
474 /*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, JE_SYN, NS_OK,  JE_BAD_CHR },
475 /*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, JE_SYN, NS_OK,  JE_BAD_CHR },
476 /*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
477 /*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
478 /*EX*/   { NS_EX,   NS_EX,  NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
479 /*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, NS_OK,  JE_BAD_CHR }
480 };
481 
482 
483 static uint json_num_state_flags[NS_NUM_STATES]=
484 {
485 /*OK*/   0,
486 /*GO*/   0,
487 /*GO1*/  JSON_NUM_NEG,
488 /*ZERO*/ 0,
489 /*ZE1*/  0,
490 /*INT*/  0,
491 /*FRAC*/ JSON_NUM_FRAC_PART,
492 /*EX*/   JSON_NUM_EXP,
493 /*EX1*/  0,
494 };
495 
496 
skip_num_constant(json_engine_t * j)497 static int skip_num_constant(json_engine_t *j)
498 {
499   int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
500   int c_len;
501 
502   j->num_flags= 0;
503   for (;;)
504   {
505     j->num_flags|= json_num_state_flags[state];
506     if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
507     {
508       if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
509       {
510         j->s.c_str+= c_len;
511         continue;
512       }
513       break;
514     }
515 
516     if ((j->s.error=
517           json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
518       return 1;
519     else
520       break;
521   }
522 
523   j->state= j->stack[j->stack_p];
524   return 0;
525 }
526 
527 
528 /* Scalar numeric. */
v_number(json_engine_t * j)529 static int v_number(json_engine_t *j)
530 {
531   return skip_num_constant(j) || json_scan_next(j);
532 }
533 
534 
535 /* Read numeric constant. */
read_num(json_engine_t * j)536 static int read_num(json_engine_t *j)
537 {
538   j->value= j->value_begin;
539   if (skip_num_constant(j) == 0)
540   {
541     j->value_type= JSON_VALUE_NUMBER;
542     j->value_len= (int)(j->s.c_str - j->value_begin);
543     return 0;
544   }
545   return 1;
546 }
547 
548 
549 /* Check that the JSON string matches the argument and skip it. */
skip_string_verbatim(json_string_t * s,const char * str)550 static int skip_string_verbatim(json_string_t *s, const char *str)
551 {
552   int c_len;
553   while (*str)
554   {
555     if ((c_len= json_next_char(s)) > 0)
556     {
557       if (s->c_next == (my_wc_t) *(str++))
558       {
559         s->c_str+= c_len;
560         continue;
561       }
562       return s->error= JE_SYN;
563     }
564     return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
565   }
566 
567   return 0;
568 }
569 
570 
571 /* Scalar false. */
v_false(json_engine_t * j)572 static int v_false(json_engine_t *j)
573 {
574   if (skip_string_verbatim(&j->s, "alse"))
575    return 1;
576   j->state= j->stack[j->stack_p];
577   return json_scan_next(j);
578 }
579 
580 
581 /* Scalar null. */
v_null(json_engine_t * j)582 static int v_null(json_engine_t *j)
583 {
584   if (skip_string_verbatim(&j->s, "ull"))
585    return 1;
586   j->state= j->stack[j->stack_p];
587   return json_scan_next(j);
588 }
589 
590 
591 /* Scalar true. */
v_true(json_engine_t * j)592 static int v_true(json_engine_t *j)
593 {
594   if (skip_string_verbatim(&j->s, "rue"))
595    return 1;
596   j->state= j->stack[j->stack_p];
597   return json_scan_next(j);
598 }
599 
600 
601 /* Read false. */
read_false(json_engine_t * j)602 static int read_false(json_engine_t *j)
603 {
604   j->value_type= JSON_VALUE_FALSE;
605   j->value= j->value_begin;
606   j->state= j->stack[j->stack_p];
607   j->value_len= 5;
608   return skip_string_verbatim(&j->s, "alse");
609 }
610 
611 
612 /* Read null. */
read_null(json_engine_t * j)613 static int read_null(json_engine_t *j)
614 {
615   j->value_type= JSON_VALUE_NULL;
616   j->value= j->value_begin;
617   j->state= j->stack[j->stack_p];
618   j->value_len= 4;
619   return skip_string_verbatim(&j->s, "ull");
620 }
621 
622 
623 /* Read true. */
read_true(json_engine_t * j)624 static int read_true(json_engine_t *j)
625 {
626   j->value_type= JSON_VALUE_TRUE;
627   j->value= j->value_begin;
628   j->state= j->stack[j->stack_p];
629   j->value_len= 4;
630   return skip_string_verbatim(&j->s, "rue");
631 }
632 
633 
634 /* Disallowed character. */
not_json_chr(json_engine_t * j)635 static int not_json_chr(json_engine_t *j)
636 {
637   j->s.error= JE_NOT_JSON_CHR;
638   return 1;
639 }
640 
641 
642 /* Bad character. */
bad_chr(json_engine_t * j)643 static int bad_chr(json_engine_t *j)
644 {
645   j->s.error= JE_BAD_CHR;
646   return 1;
647 }
648 
649 
650 /* Correct finish. */
done(json_engine_t * j)651 static int done(json_engine_t *j  __attribute__((unused)))
652 {
653   return 1;
654 }
655 
656 
657 /* End of the object. */
end_object(json_engine_t * j)658 static int end_object(json_engine_t *j)
659 {
660   j->stack_p--;
661   j->state= JST_OBJ_END;
662   return 0;
663 }
664 
665 
666 /* End of the array. */
end_array(json_engine_t * j)667 static int end_array(json_engine_t *j)
668 {
669   j->stack_p--;
670   j->state= JST_ARRAY_END;
671   return 0;
672 }
673 
674 
675 /* Start reading key name. */
read_keyname(json_engine_t * j)676 static int read_keyname(json_engine_t *j)
677 {
678   j->state= JST_KEY;
679   return 0;
680 }
681 
682 
get_first_nonspace(json_string_t * js,int * t_next,int * c_len)683 static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
684 {
685   do
686   {
687     if ((*c_len= json_next_char(js)) <= 0)
688       *t_next= json_eos(js) ? C_EOS : C_BAD;
689     else
690     {
691       *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
692       js->c_str+= *c_len;
693     }
694   } while (*t_next == C_SPACE);
695 }
696 
697 
698 /* Next key name. */
next_key(json_engine_t * j)699 static int next_key(json_engine_t *j)
700 {
701   int t_next, c_len;
702   get_first_nonspace(&j->s, &t_next, &c_len);
703 
704   if (t_next == C_QUOTE)
705   {
706     j->state= JST_KEY;
707     return 0;
708   }
709 
710   j->s.error= (t_next == C_EOS)  ? JE_EOS :
711               ((t_next == C_BAD) ? JE_BAD_CHR :
712                                    JE_SYN);
713   return 1;
714 }
715 
716 
717 /* Forward declarations. */
718 static int skip_colon(json_engine_t *j);
719 static int skip_key(json_engine_t *j);
720 static int struct_end_cb(json_engine_t *j);
721 static int struct_end_qb(json_engine_t *j);
722 static int struct_end_cm(json_engine_t *j);
723 static int struct_end_eos(json_engine_t *j);
724 
725 
next_item(json_engine_t * j)726 static int next_item(json_engine_t *j)
727 {
728   j->state= JST_VALUE;
729   return 0;
730 }
731 
732 
array_item(json_engine_t * j)733 static int array_item(json_engine_t *j)
734 {
735   j->state= JST_VALUE;
736   j->s.c_str-= j->sav_c_len;
737   return 0;
738 }
739 
740 
741 static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
742 /*
743    EOS              {            }             [             ]
744    :                ,            "             -0..9         f
745    n                t              ETC          ERR           BAD
746 */
747 {
748   {/*VALUE*/
749     unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
750     syntax_error,   syntax_error,v_string,     v_number,     v_false,
751     v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
752   {/*KEY*/
753     unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
754     skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
755     skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
756   {/*OBJ_START*/
757     unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
758     syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
759     syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
760   {/*OBJ_END*/
761     struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
762     syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
763     syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
764   {/*ARRAY_START*/
765     unexpected_eos, array_item,   syntax_error, array_item,   end_array,
766     syntax_error,   syntax_error, array_item,  array_item,  array_item,
767     array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
768   {/*ARRAY_END*/
769     struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
770     syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
771     syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
772   {/*DONE*/
773     done,           syntax_error, syntax_error, syntax_error, syntax_error,
774     syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
775     syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
776   {/*OBJ_CONT*/
777     unexpected_eos, syntax_error, end_object,    syntax_error,   syntax_error,
778     syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
779     syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
780   {/*ARRAY_CONT*/
781     unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
782     syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
783     syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
784   {/*READ_VALUE*/
785     unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
786     syntax_error,   syntax_error, read_strn,     read_num,      read_false,
787     read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
788 };
789 
790 
791 
json_scan_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)792 int json_scan_start(json_engine_t *je,
793                     CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
794 {
795   json_string_setup(&je->s, i_cs, str, end);
796   je->stack[0]= JST_DONE;
797   je->stack_p= 0;
798   je->state= JST_VALUE;
799   return 0;
800 }
801 
802 
803 /* Skip colon and the value. */
skip_colon(json_engine_t * j)804 static int skip_colon(json_engine_t *j)
805 {
806   int t_next, c_len;
807 
808   get_first_nonspace(&j->s, &t_next, &c_len);
809 
810   if (t_next == C_COLON)
811   {
812     get_first_nonspace(&j->s, &t_next, &c_len);
813     return json_actions[JST_VALUE][t_next](j);
814  }
815 
816   j->s.error= (t_next == C_EOS)  ? JE_EOS :
817               ((t_next == C_BAD) ? JE_BAD_CHR:
818                                    JE_SYN);
819 
820   return 1;
821 }
822 
823 
824 /* Skip colon and the value. */
skip_key(json_engine_t * j)825 static int skip_key(json_engine_t *j)
826 {
827   int t_next, c_len;
828 
829   if (json_instr_chr_map[j->s.c_next] == S_BKSL &&
830       json_handle_esc(&j->s))
831     return 1;
832 
833   while (json_read_keyname_chr(j) == 0) {}
834 
835   if (j->s.error)
836     return 1;
837 
838   get_first_nonspace(&j->s, &t_next, &c_len);
839   return json_actions[JST_VALUE][t_next](j);
840 }
841 
842 
843 /*
844   Handle EOS after the end of an object or array.
845   To do that we should pop the stack to see if
846   we are inside an object, or an array, and
847   run our 'state machine' accordingly.
848 */
struct_end_eos(json_engine_t * j)849 static int struct_end_eos(json_engine_t *j)
850 { return json_actions[j->stack[j->stack_p]][C_EOS](j); }
851 
852 
853 /*
854   Handle '}' after the end of an object or array.
855   To do that we should pop the stack to see if
856   we are inside an object, or an array, and
857   run our 'state machine' accordingly.
858 */
struct_end_cb(json_engine_t * j)859 static int struct_end_cb(json_engine_t *j)
860 { return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
861 
862 
863 /*
864   Handle ']' after the end of an object or array.
865   To do that we should pop the stack to see if
866   we are inside an object, or an array, and
867   run our 'state machine' accordingly.
868 */
struct_end_qb(json_engine_t * j)869 static int struct_end_qb(json_engine_t *j)
870 { return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
871 
872 
873 /*
874   Handle ',' after the end of an object or array.
875   To do that we should pop the stack to see if
876   we are inside an object, or an array, and
877   run our 'state machine' accordingly.
878 */
struct_end_cm(json_engine_t * j)879 static int struct_end_cm(json_engine_t *j)
880 { return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
881 
882 
json_read_keyname_chr(json_engine_t * j)883 int json_read_keyname_chr(json_engine_t *j)
884 {
885   int c_len, t;
886 
887   if ((c_len= json_next_char(&j->s)) > 0)
888   {
889     j->s.c_str+= c_len;
890     if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
891       return 0;
892 
893     switch (t)
894     {
895     case S_QUOTE:
896       for (;;)  /* Skip spaces until ':'. */
897       {
898         if ((c_len= json_next_char(&j->s)) > 0)
899         {
900           if (j->s.c_next == ':')
901           {
902             j->s.c_str+= c_len;
903             j->state= JST_VALUE;
904             return 1;
905           }
906 
907           if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
908           {
909             j->s.c_str+= c_len;
910             continue;
911           }
912           j->s.error= JE_SYN;
913           break;
914         }
915         j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
916         break;
917       }
918       return 1;
919     case S_BKSL:
920       return json_handle_esc(&j->s);
921     case S_ERR:
922       j->s.c_str-= c_len;
923       j->s.error= JE_STRING_CONST;
924       return 1;
925     }
926   }
927   j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
928   return 1;
929 }
930 
931 
json_read_value(json_engine_t * j)932 int json_read_value(json_engine_t *j)
933 {
934   int t_next, c_len, res;
935 
936   j->value_type= JSON_VALUE_UNINITALIZED;
937   if (j->state == JST_KEY)
938   {
939     while (json_read_keyname_chr(j) == 0) {}
940 
941     if (j->s.error)
942       return 1;
943   }
944 
945   get_first_nonspace(&j->s, &t_next, &c_len);
946 
947   j->value_begin= j->s.c_str-c_len;
948   res= json_actions[JST_READ_VALUE][t_next](j);
949   j->value_end= j->s.c_str;
950   return res;
951 }
952 
953 
json_scan_next(json_engine_t * j)954 int json_scan_next(json_engine_t *j)
955 {
956   int t_next;
957 
958   get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
959   return json_actions[j->state][t_next](j);
960 }
961 
962 
963 enum json_path_chr_classes {
964   P_EOS,    /* end of string */
965   P_USD,    /* $ */
966   P_ASTER,  /* * */
967   P_LSQRB,  /* [ */
968   P_RSQRB,  /* ] */
969   P_POINT,  /* . */
970   P_ZERO,   /* 0 */
971   P_DIGIT,  /* 123456789 */
972   P_L,      /* l (for "lax") */
973   P_S,      /* s (for "strict") */
974   P_SPACE,  /* space */
975   P_BKSL,   /* \ */
976   P_QUOTE,  /* " */
977   P_ETC,    /* everything else */
978   P_ERR,    /* character disallowed in JSON*/
979   P_BAD,    /* invalid character */
980   N_PATH_CLASSES,
981 };
982 
983 
984 static enum json_path_chr_classes json_path_chr_map[128] = {
985   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
986   P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
987   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
988   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
989 
990   P_SPACE, P_ETC,   P_QUOTE, P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
991   P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_ETC,   P_POINT, P_ETC,
992   P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
993   P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
994 
995   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
996   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
997   P_ETC,   P_ETC,   P_S,     P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
998   P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
999 
1000   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1001   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1002   P_ETC,   P_ETC,   P_S,     P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1003   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
1004 };
1005 
1006 
1007 enum json_path_states {
1008   PS_GO,  /* Initial state. */
1009   PS_LAX, /* Parse the 'lax' keyword. */
1010   PS_PT,  /* New path's step begins. */
1011   PS_AR,  /* Parse array step. */
1012   PS_SAR, /* space after the '['. */
1013   PS_AWD, /* Array wildcard. */
1014   PS_Z,   /* '0' (as an array item number). */
1015   PS_INT, /* Parse integer (as an array item number). */
1016   PS_AS,  /* Space. */
1017   PS_KEY, /* Key. */
1018   PS_KNM, /* Parse key name. */
1019   PS_KWD, /* Key wildcard. */
1020   PS_AST, /* Asterisk. */
1021   PS_DWD, /* Double wildcard. */
1022   PS_KEYX, /* Key started with quote ("). */
1023   PS_KNMX, /* Parse quoted key name. */
1024   N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1025   PS_SCT,  /* Parse the 'strict' keyword. */
1026   PS_EKY,  /* '.' after the keyname so next step is the key. */
1027   PS_EKYX, /* Closing " for the quoted keyname. */
1028   PS_EAR,  /* '[' after the keyname so next step is the array. */
1029   PS_ESC,  /* Escaping in the keyname. */
1030   PS_ESCX, /* Escaping in the quoted keyname. */
1031   PS_OK,   /* Path normally ended. */
1032   PS_KOK   /* EOS after the keyname so end the path normally. */
1033 };
1034 
1035 
1036 static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1037 {
1038 /*
1039             EOS       $,      *       [       ]       .       0
1040             1..9    L       S       SPACE   \       "       ETC
1041             ERR              BAD
1042 */
1043 /* GO  */ { JE_EOS, PS_PT,  JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1044             JE_SYN, PS_LAX, PS_SCT, PS_GO,  JE_SYN, JE_SYN, JE_SYN,
1045             JE_NOT_JSON_CHR, JE_BAD_CHR},
1046 /* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1047             JE_SYN, PS_LAX, JE_SYN, PS_GO,  JE_SYN, JE_SYN, JE_SYN,
1048             JE_NOT_JSON_CHR, JE_BAD_CHR},
1049 /* PT */  { PS_OK,  JE_SYN, PS_AST, PS_AR,  JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1050             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1051             JE_NOT_JSON_CHR, JE_BAD_CHR},
1052 /* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
1053             PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1054             JE_NOT_JSON_CHR, JE_BAD_CHR},
1055 /* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT,  JE_SYN, PS_Z,
1056             PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1057             JE_NOT_JSON_CHR, JE_BAD_CHR},
1058 /* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN,
1059             JE_SYN, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1060             JE_NOT_JSON_CHR, JE_BAD_CHR},
1061 /* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN,
1062             JE_SYN, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1063             JE_NOT_JSON_CHR, JE_BAD_CHR},
1064 /* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, PS_INT,
1065             PS_INT, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1066             JE_NOT_JSON_CHR, JE_BAD_CHR},
1067 /* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN, JE_SYN,
1068             JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1069             JE_NOT_JSON_CHR, JE_BAD_CHR},
1070 /* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
1071             PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1072             JE_NOT_JSON_CHR, JE_BAD_CHR},
1073 /* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
1074             PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1075             JE_NOT_JSON_CHR, JE_BAD_CHR},
1076 /* KWD */ { PS_OK,  JE_SYN, JE_SYN, PS_AR,  JE_SYN, PS_EKY, JE_SYN,
1077             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1078             JE_NOT_JSON_CHR, JE_BAD_CHR},
1079 /* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1080             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1081             JE_NOT_JSON_CHR, JE_BAD_CHR},
1082 /* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR,  JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1083             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1084             JE_NOT_JSON_CHR, JE_BAD_CHR},
1085 /* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1086             PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1087             JE_NOT_JSON_CHR, JE_BAD_CHR},
1088 /* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1089             PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
1090             JE_NOT_JSON_CHR, JE_BAD_CHR},
1091 };
1092 
1093 
json_path_setup(json_path_t * p,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)1094 int json_path_setup(json_path_t *p,
1095                     CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1096 {
1097   int c_len, t_next, state= PS_GO;
1098   enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1099 
1100   json_string_setup(&p->s, i_cs, str, end);
1101 
1102   p->steps[0].type= JSON_PATH_ARRAY_WILD;
1103   p->last_step= p->steps;
1104   p->mode_strict= FALSE;
1105   p->types_used= JSON_PATH_KEY_NULL;
1106 
1107   do
1108   {
1109     if ((c_len= json_next_char(&p->s)) <= 0)
1110       t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1111     else
1112       t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1113 
1114     if ((state= json_path_transitions[state][t_next]) < 0)
1115       return p->s.error= state;
1116 
1117     p->s.c_str+= c_len;
1118 
1119     switch (state)
1120     {
1121     case PS_LAX:
1122       if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1123         return 1;
1124       p->mode_strict= FALSE;
1125       continue;
1126     case PS_SCT:
1127       if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1128         return 1;
1129       p->mode_strict= TRUE;
1130       state= PS_LAX;
1131       continue;
1132     case PS_KWD:
1133     case PS_AWD:
1134       p->last_step->type|= JSON_PATH_WILD;
1135       p->types_used|= JSON_PATH_WILD;
1136       continue;
1137     case PS_INT:
1138       p->last_step->n_item*= 10;
1139       p->last_step->n_item+= p->s.c_next - '0';
1140       continue;
1141     case PS_EKYX:
1142       p->last_step->key_end= p->s.c_str - c_len;
1143       state= PS_PT;
1144       continue;
1145     case PS_EKY:
1146       p->last_step->key_end= p->s.c_str - c_len;
1147       state= PS_KEY;
1148       /* fall through */
1149     case PS_KEY:
1150       p->last_step++;
1151       if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1152         return p->s.error= JE_DEPTH;
1153       p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1154       double_wildcard= JSON_PATH_KEY_NULL;
1155       /* fall through */
1156     case PS_KEYX:
1157       p->last_step->key= p->s.c_str;
1158       continue;
1159     case PS_EAR:
1160       p->last_step->key_end= p->s.c_str - c_len;
1161       state= PS_AR;
1162       /* fall through */
1163     case PS_AR:
1164       p->last_step++;
1165       if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1166         return p->s.error= JE_DEPTH;
1167       p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1168       double_wildcard= JSON_PATH_KEY_NULL;
1169       p->last_step->n_item= 0;
1170       continue;
1171     case PS_ESC:
1172       if (json_handle_esc(&p->s))
1173         return 1;
1174       state= PS_KNM;
1175       continue;
1176     case PS_ESCX:
1177       if (json_handle_esc(&p->s))
1178         return 1;
1179       state= PS_KNMX;
1180       continue;
1181     case PS_KOK:
1182       p->last_step->key_end= p->s.c_str - c_len;
1183       state= PS_OK;
1184       break; /* 'break' as the loop supposed to end after that. */
1185     case PS_DWD:
1186       double_wildcard= JSON_PATH_DOUBLE_WILD;
1187       continue;
1188     };
1189   } while (state != PS_OK);
1190 
1191   return double_wildcard ? (p->s.error= JE_SYN) : 0;
1192 }
1193 
1194 
json_skip_to_level(json_engine_t * j,int level)1195 int json_skip_to_level(json_engine_t *j, int level)
1196 {
1197   do {
1198     if (j->stack_p < level)
1199       return 0;
1200   } while (json_scan_next(j) == 0);
1201 
1202   return 1;
1203 }
1204 
1205 
1206 /*
1207   works as json_skip_level() but also counts items on the current
1208   level skipped.
1209 */
json_skip_level_and_count(json_engine_t * j,int * n_items_skipped)1210 int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1211 {
1212   int level= j->stack_p;
1213 
1214   *n_items_skipped= 0;
1215   while (json_scan_next(j) == 0)
1216   {
1217     if (j->stack_p < level)
1218       return 0;
1219     if (j->stack_p == level && j->state == JST_VALUE)
1220       (*n_items_skipped)++;
1221   }
1222 
1223   return 1;
1224 }
1225 
1226 
json_skip_key(json_engine_t * j)1227 int json_skip_key(json_engine_t *j)
1228 {
1229   if (json_read_value(j))
1230     return 1;
1231 
1232   if (json_value_scalar(j))
1233     return 0;
1234 
1235   return json_skip_level(j);
1236 }
1237 
1238 
1239 #define SKIPPED_STEP_MARK ((uint) ~0)
1240 
1241 /*
1242   Current step of the patch matches the JSON construction.
1243   Now we should either stop the search or go to the next
1244   step of the path.
1245 */
handle_match(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1246 static int handle_match(json_engine_t *je, json_path_t *p,
1247                         json_path_step_t **p_cur_step, uint *array_counters)
1248 {
1249   json_path_step_t *next_step= *p_cur_step + 1;
1250 
1251   DBUG_ASSERT(*p_cur_step < p->last_step);
1252 
1253   if (json_read_value(je))
1254     return 1;
1255 
1256   if (json_value_scalar(je))
1257   {
1258     while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1259     {
1260       if (++next_step > p->last_step)
1261       {
1262         je->s.c_str= je->value_begin;
1263         return 1;
1264       }
1265     }
1266     return 0;
1267   }
1268 
1269   if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1270       je->value_type & JSON_VALUE_OBJECT)
1271   {
1272     do
1273     {
1274       array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1275       if (++next_step > p->last_step)
1276       {
1277         je->s.c_str= je->value_begin;
1278         je->stack_p--;
1279         return 1;
1280       }
1281     } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1282   }
1283 
1284 
1285   array_counters[next_step - p->steps]= 0;
1286 
1287   if ((int) je->value_type !=
1288       (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1289     return json_skip_level(je);
1290 
1291   *p_cur_step= next_step;
1292   return 0;
1293 }
1294 
1295 
1296 /*
1297   Check if the name of the current JSON key matches
1298   the step of the path.
1299 */
json_key_matches(json_engine_t * je,json_string_t * k)1300 int json_key_matches(json_engine_t *je, json_string_t *k)
1301 {
1302   while (json_read_keyname_chr(je) == 0)
1303   {
1304     if (json_read_string_const_chr(k) ||
1305         je->s.c_next != k->c_next)
1306       return 0;
1307   }
1308 
1309   return json_read_string_const_chr(k);
1310 }
1311 
1312 
json_find_path(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1313 int json_find_path(json_engine_t *je,
1314                    json_path_t *p, json_path_step_t **p_cur_step,
1315                    uint *array_counters)
1316 {
1317   json_string_t key_name;
1318 
1319   json_string_set_cs(&key_name, p->s.cs);
1320 
1321   do
1322   {
1323     json_path_step_t *cur_step= *p_cur_step;
1324     switch (je->state)
1325     {
1326     case JST_KEY:
1327       DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1328       if (!(cur_step->type & JSON_PATH_WILD))
1329       {
1330         json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1331         if (!json_key_matches(je, &key_name))
1332         {
1333           if (json_skip_key(je))
1334             goto exit;
1335           continue;
1336         }
1337       }
1338       if (cur_step == p->last_step ||
1339           handle_match(je, p, p_cur_step, array_counters))
1340         goto exit;
1341       break;
1342     case JST_VALUE:
1343       DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1344       if (cur_step->type & JSON_PATH_WILD ||
1345           cur_step->n_item == array_counters[cur_step - p->steps]++)
1346       {
1347         /* Array item matches. */
1348         if (cur_step == p->last_step ||
1349             handle_match(je, p, p_cur_step, array_counters))
1350           goto exit;
1351       }
1352       else
1353         json_skip_array_item(je);
1354       break;
1355     case JST_OBJ_END:
1356       do
1357       {
1358         (*p_cur_step)--;
1359       } while (*p_cur_step > p->steps &&
1360                array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1361       break;
1362     case JST_ARRAY_END:
1363       (*p_cur_step)--;
1364       break;
1365     default:
1366       DBUG_ASSERT(0);
1367       break;
1368     };
1369   } while (json_scan_next(je) == 0);
1370 
1371   /* No luck. */
1372   return 1;
1373 
1374 exit:
1375   return je->s.error;
1376 }
1377 
1378 
json_find_paths_first(json_engine_t * je,json_find_paths_t * state,uint n_paths,json_path_t * paths,uint * path_depths)1379 int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1380                           uint n_paths, json_path_t *paths, uint *path_depths)
1381 {
1382   state->n_paths= n_paths;
1383   state->paths= paths;
1384   state->cur_depth= 0;
1385   state->path_depths= path_depths;
1386   return json_find_paths_next(je, state);
1387 }
1388 
1389 
json_find_paths_next(json_engine_t * je,json_find_paths_t * state)1390 int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1391 {
1392   uint p_c;
1393   int path_found, no_match_found;
1394   do
1395   {
1396     switch (je->state)
1397     {
1398     case JST_KEY:
1399       path_found= FALSE;
1400       no_match_found= TRUE;
1401       for (p_c=0; p_c < state->n_paths; p_c++)
1402       {
1403         json_path_step_t *cur_step;
1404         if (state->path_depths[p_c] <
1405               state->cur_depth /* Path already failed. */ ||
1406             !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1407               JSON_PATH_KEY))
1408           continue;
1409 
1410         if (!(cur_step->type & JSON_PATH_WILD))
1411         {
1412           json_string_t key_name;
1413           json_string_setup(&key_name, state->paths[p_c].s.cs,
1414                             cur_step->key, cur_step->key_end);
1415           if (!json_key_matches(je, &key_name))
1416             continue;
1417         }
1418         if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1419           path_found= TRUE;
1420         else
1421         {
1422           no_match_found= FALSE;
1423           state->path_depths[p_c]= state->cur_depth + 1;
1424         }
1425       }
1426       if (path_found)
1427         /* Return the result. */
1428         goto exit;
1429       if (no_match_found)
1430       {
1431         /* No possible paths left to check. Just skip the level. */
1432         if (json_skip_level(je))
1433           goto exit;
1434       }
1435 
1436       break;
1437     case JST_VALUE:
1438       path_found= FALSE;
1439       no_match_found= TRUE;
1440       for (p_c=0; p_c < state->n_paths; p_c++)
1441       {
1442         json_path_step_t *cur_step;
1443         if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1444             !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1445               JSON_PATH_ARRAY))
1446           continue;
1447         if (cur_step->type & JSON_PATH_WILD ||
1448             cur_step->n_item == state->array_counters[state->cur_depth])
1449         {
1450           /* Array item matches. */
1451           if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1452             path_found= TRUE;
1453           else
1454           {
1455             no_match_found= FALSE;
1456             state->path_depths[p_c]= state->cur_depth + 1;
1457           }
1458         }
1459       }
1460 
1461       if (path_found)
1462         goto exit;
1463 
1464       if (no_match_found)
1465         json_skip_array_item(je);
1466 
1467       state->array_counters[state->cur_depth]++;
1468       break;
1469     case JST_OBJ_START:
1470     case JST_ARRAY_START:
1471       for (p_c=0; p_c < state->n_paths; p_c++)
1472       {
1473         if (state->path_depths[p_c] < state->cur_depth)
1474           /* Path already failed. */
1475           continue;
1476         if (state->paths[p_c].steps[state->cur_depth].type &
1477             ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1478           state->path_depths[p_c]++;
1479       }
1480       state->cur_depth++;
1481       break;
1482     case JST_OBJ_END:
1483     case JST_ARRAY_END:
1484       for (p_c=0; p_c < state->n_paths; p_c++)
1485       {
1486         if (state->path_depths[p_c] < state->cur_depth)
1487           continue;
1488         state->path_depths[p_c]--;
1489       }
1490       state->cur_depth--;
1491       break;
1492     default:
1493       DBUG_ASSERT(0);
1494       break;
1495     };
1496   } while (json_scan_next(je) == 0);
1497 
1498   /* No luck. */
1499   return 1;
1500 
1501 exit:
1502   return je->s.error;
1503 }
1504 
1505 
json_append_ascii(CHARSET_INFO * json_cs,uchar * json,uchar * json_end,const uchar * ascii,const uchar * ascii_end)1506 int json_append_ascii(CHARSET_INFO *json_cs,
1507                       uchar *json, uchar *json_end,
1508                       const uchar *ascii, const uchar *ascii_end)
1509 {
1510   const uchar *json_start= json;
1511   while (ascii < ascii_end)
1512   {
1513     int c_len;
1514     if ((c_len= json_cs->cset->wc_mb(json_cs, (my_wc_t) *ascii,
1515                                      json, json_end)) > 0)
1516     {
1517       json+= c_len;
1518       ascii++;
1519       continue;
1520     }
1521 
1522     /* Error return. */
1523     return c_len;
1524   }
1525 
1526   return (int)(json - json_start);
1527 }
1528 
1529 
json_unescape(CHARSET_INFO * json_cs,const uchar * json_str,const uchar * json_end,CHARSET_INFO * res_cs,uchar * res,uchar * res_end)1530 int json_unescape(CHARSET_INFO *json_cs,
1531                   const uchar *json_str, const uchar *json_end,
1532                   CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1533 {
1534   json_string_t s;
1535   const uchar *res_b= res;
1536 
1537   json_string_setup(&s, json_cs, json_str, json_end);
1538   while (json_read_string_const_chr(&s) == 0)
1539   {
1540     int c_len;
1541     if ((c_len= res_cs->cset->wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1542     {
1543       res+= c_len;
1544       continue;
1545     }
1546     if (c_len == MY_CS_ILUNI)
1547     {
1548       /*
1549         Result charset doesn't support the json's character.
1550         Let's replace it with the '?' symbol.
1551       */
1552       if ((c_len= res_cs->cset->wc_mb(res_cs, '?', res, res_end)) > 0)
1553       {
1554         res+= c_len;
1555         continue;
1556       }
1557     }
1558     /* Result buffer is too small. */
1559     return -1;
1560   }
1561 
1562   return s.error==JE_EOS ? (int)(res - res_b) : -1;
1563 }
1564 
1565 
1566 /* When we need to replace a character with the escaping. */
1567 enum json_esc_char_classes {
1568   ESC_= 0,    /* No need to escape. */
1569   ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1570   ESC_B= 'b', /* Backspace. Escape as \b */
1571   ESC_F= 'f', /* Formfeed. Escape as \f */
1572   ESC_N= 'n', /* Newline. Escape as \n */
1573   ESC_R= 'r', /* Return. Escape as \r */
1574   ESC_T= 't', /* Tab. Escape as \s */
1575   ESC_BS= '\\'  /* Backslash or '"'. Escape by the \\ prefix. */
1576 };
1577 
1578 
1579 /* This specifies how we should escape the character. */
1580 static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1581   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1582   ESC_B,   ESC_T,   ESC_N,   ESC_U,   ESC_F,   ESC_R,   ESC_U,   ESC_U,
1583   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1584   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1585 
1586   ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1587   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1588   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1589   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1590 
1591   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1592   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1593   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1594   ESC_,    ESC_,    ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,
1595 };
1596 
1597 
1598 static const char hexconv[16] = "0123456789ABCDEF";
1599 
1600 
json_escape(CHARSET_INFO * str_cs,const uchar * str,const uchar * str_end,CHARSET_INFO * json_cs,uchar * json,uchar * json_end)1601 int json_escape(CHARSET_INFO *str_cs,
1602                 const uchar *str, const uchar *str_end,
1603                 CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1604 {
1605   const uchar *json_start= json;
1606 
1607   while (str < str_end)
1608   {
1609     my_wc_t c_chr;
1610     int c_len;
1611     if ((c_len= str_cs->cset->mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1612     {
1613       enum json_esc_char_classes c_class;
1614 
1615       str+= c_len;
1616       if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1617       {
1618         if ((c_len= json_cs->cset->wc_mb(json_cs, c_chr, json, json_end)) > 0)
1619         {
1620           json+= c_len;
1621           continue;
1622         }
1623         if (c_len < 0)
1624         {
1625           /* JSON buffer is depleted. */
1626           return -1;
1627         }
1628 
1629         /* JSON charset cannot convert this character. */
1630         c_class= ESC_U;
1631       }
1632 
1633       if ((c_len= json_cs->cset->wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1634           (c_len= json_cs->cset->wc_mb(json_cs,
1635                                        (c_class == ESC_BS) ? c_chr : c_class,
1636                                        json+= c_len, json_end)) <= 0)
1637       {
1638         /* JSON buffer is depleted. */
1639         return -1;
1640       }
1641       json+= c_len;
1642 
1643       if (c_class != ESC_U)
1644         continue;
1645 
1646       {
1647         /* We have to use /uXXXX escaping. */
1648         uchar utf16buf[4];
1649         uchar code_str[8];
1650         int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1651 
1652         code_str[0]= hexconv[utf16buf[0] >> 4];
1653         code_str[1]= hexconv[utf16buf[0] & 15];
1654         code_str[2]= hexconv[utf16buf[1] >> 4];
1655         code_str[3]= hexconv[utf16buf[1] & 15];
1656 
1657         if (u_len > 2)
1658         {
1659           code_str[4]= hexconv[utf16buf[2] >> 4];
1660           code_str[5]= hexconv[utf16buf[2] & 15];
1661           code_str[6]= hexconv[utf16buf[3] >> 4];
1662           code_str[7]= hexconv[utf16buf[3] & 15];
1663         }
1664 
1665         if ((c_len= json_append_ascii(json_cs, json, json_end,
1666                                       code_str, code_str+u_len*2)) > 0)
1667         {
1668           json+= c_len;
1669           continue;
1670         }
1671         /* JSON buffer is depleted. */
1672         return -1;
1673       }
1674     }
1675     else /* c_len == 0, an illegal symbol. */
1676       return -1;
1677   }
1678 
1679   return (int)(json - json_start);
1680 }
1681 
1682 
json_get_path_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end,json_path_t * p)1683 int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1684                         const uchar *str, const uchar *end,
1685                         json_path_t *p)
1686 {
1687   json_scan_start(je, i_cs, str, end);
1688   p->last_step= p->steps - 1;
1689   return 0;
1690 }
1691 
1692 
json_get_path_next(json_engine_t * je,json_path_t * p)1693 int json_get_path_next(json_engine_t *je, json_path_t *p)
1694 {
1695   if (p->last_step < p->steps)
1696   {
1697     if (json_read_value(je))
1698       return 1;
1699 
1700     p->last_step= p->steps;
1701     p->steps[0].type= JSON_PATH_ARRAY_WILD;
1702     p->steps[0].n_item= 0;
1703     return 0;
1704   }
1705   else
1706   {
1707     if (json_value_scalar(je))
1708     {
1709       if (p->last_step->type & JSON_PATH_ARRAY)
1710         p->last_step->n_item++;
1711     }
1712     else
1713     {
1714       p->last_step++;
1715       p->last_step->type= (enum json_path_step_types) je->value_type;
1716       p->last_step->n_item= 0;
1717     }
1718 
1719     if (json_scan_next(je))
1720       return 1;
1721   }
1722 
1723   do
1724   {
1725     switch (je->state)
1726     {
1727     case JST_KEY:
1728       p->last_step->key= je->s.c_str;
1729       do
1730       {
1731         p->last_step->key_end= je->s.c_str;
1732       } while (json_read_keyname_chr(je) == 0);
1733       if (je->s.error)
1734         return 1;
1735       /* Now we have je.state == JST_VALUE, so let's handle it. */
1736 
1737       /* fall through */
1738     case JST_VALUE:
1739       if (json_read_value(je))
1740         return 1;
1741       return 0;
1742     case JST_OBJ_END:
1743     case JST_ARRAY_END:
1744       p->last_step--;
1745       if (p->last_step->type & JSON_PATH_ARRAY)
1746         p->last_step->n_item++;
1747       break;
1748     default:
1749       break;
1750     }
1751   } while (json_scan_next(je) == 0);
1752 
1753   return 1;
1754 }
1755 
1756 
json_path_parts_compare(const json_path_step_t * a,const json_path_step_t * a_end,const json_path_step_t * b,const json_path_step_t * b_end,enum json_value_types vt)1757 int json_path_parts_compare(
1758     const json_path_step_t *a, const json_path_step_t *a_end,
1759     const json_path_step_t *b, const json_path_step_t *b_end,
1760     enum json_value_types vt)
1761 {
1762   int res, res2;
1763 
1764   while (a <= a_end)
1765   {
1766     if (b > b_end)
1767     {
1768       while (vt != JSON_VALUE_ARRAY &&
1769              (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY &&
1770              a->n_item == 0)
1771       {
1772         if (++a > a_end)
1773           return 0;
1774       }
1775       return -2;
1776     }
1777 
1778     DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
1779 
1780 
1781     if (a->type & JSON_PATH_ARRAY)
1782     {
1783       if (b->type & JSON_PATH_ARRAY)
1784       {
1785         if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
1786           goto step_fits;
1787         goto step_failed;
1788       }
1789       if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
1790         goto step_fits_autowrap;
1791       goto step_failed;
1792     }
1793     else /* JSON_PATH_KEY */
1794     {
1795       if (!(b->type & JSON_PATH_KEY))
1796         goto step_failed;
1797 
1798       if (!(a->type & JSON_PATH_WILD) &&
1799           (a->key_end - a->key != b->key_end - b->key ||
1800            memcmp(a->key, b->key, a->key_end - a->key) != 0))
1801         goto step_failed;
1802 
1803       goto step_fits;
1804     }
1805 step_failed:
1806     if (!(a->type & JSON_PATH_DOUBLE_WILD))
1807       return -1;
1808     b++;
1809     continue;
1810 
1811 step_fits:
1812     b++;
1813     if (!(a->type & JSON_PATH_DOUBLE_WILD))
1814     {
1815       a++;
1816       continue;
1817     }
1818 
1819     /* Double wild handling needs recursions. */
1820     res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
1821     if (res == 0)
1822       return 0;
1823 
1824     res2= json_path_parts_compare(a, a_end, b, b_end, vt);
1825 
1826     return (res2 >= 0) ? res2 : res;
1827 
1828 step_fits_autowrap:
1829     if (!(a->type & JSON_PATH_DOUBLE_WILD))
1830     {
1831       a++;
1832       continue;
1833     }
1834 
1835     /* Double wild handling needs recursions. */
1836     res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
1837     if (res == 0)
1838       return 0;
1839 
1840     res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
1841 
1842     return (res2 >= 0) ? res2 : res;
1843 
1844   }
1845 
1846   return b <= b_end;
1847 }
1848 
1849 
json_path_compare(const json_path_t * a,const json_path_t * b,enum json_value_types vt)1850 int json_path_compare(const json_path_t *a, const json_path_t *b,
1851                       enum json_value_types vt)
1852 {
1853   return json_path_parts_compare(a->steps+1, a->last_step,
1854                                  b->steps+1, b->last_step, vt);
1855 }
1856 
1857 
smart_read_value(json_engine_t * je,const char ** value,int * value_len)1858 static enum json_types smart_read_value(json_engine_t *je,
1859                                         const char **value, int *value_len)
1860 {
1861   if (json_read_value(je))
1862     goto err_return;
1863 
1864   *value= (char *) je->value;
1865 
1866   if (json_value_scalar(je))
1867     *value_len= je->value_len;
1868   else
1869   {
1870     if (json_skip_level(je))
1871       goto err_return;
1872 
1873     *value_len= (int) ((char *) je->s.c_str - *value);
1874   }
1875 
1876   compile_time_assert((int) JSON_VALUE_OBJECT == (int) JSV_OBJECT);
1877   compile_time_assert((int) JSON_VALUE_ARRAY == (int) JSV_ARRAY);
1878   compile_time_assert((int) JSON_VALUE_STRING == (int) JSV_STRING);
1879   compile_time_assert((int) JSON_VALUE_NUMBER == (int) JSV_NUMBER);
1880   compile_time_assert((int) JSON_VALUE_TRUE == (int) JSV_TRUE);
1881   compile_time_assert((int) JSON_VALUE_FALSE == (int) JSV_FALSE);
1882   compile_time_assert((int) JSON_VALUE_NULL == (int) JSV_NULL);
1883 
1884   return (enum json_types) je->value_type;
1885 
1886 err_return:
1887   return JSV_BAD_JSON;
1888 }
1889 
1890 
json_type(const char * js,const char * js_end,const char ** value,int * value_len)1891 enum json_types json_type(const char *js, const char *js_end,
1892                           const char **value, int *value_len)
1893 {
1894   json_engine_t je;
1895 
1896   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1897                   (const uchar *) js_end);
1898 
1899   return smart_read_value(&je, value, value_len);
1900 }
1901 
1902 
json_get_array_item(const char * js,const char * js_end,int n_item,const char ** value,int * value_len)1903 enum json_types json_get_array_item(const char *js, const char *js_end,
1904                                     int n_item,
1905                                     const char **value, int *value_len)
1906 {
1907   json_engine_t je;
1908   int c_item= 0;
1909 
1910   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1911                   (const uchar *) js_end);
1912 
1913   if (json_read_value(&je) ||
1914       je.value_type != JSON_VALUE_ARRAY)
1915     goto err_return;
1916 
1917   while (!json_scan_next(&je))
1918   {
1919     switch (je.state)
1920     {
1921     case JST_VALUE:
1922       if (c_item == n_item)
1923         return smart_read_value(&je, value, value_len);
1924 
1925       if (json_skip_key(&je))
1926         goto err_return;
1927 
1928       c_item++;
1929       break;
1930 
1931     case JST_ARRAY_END:
1932       *value= (const char *) (je.s.c_str - je.sav_c_len);
1933       *value_len= c_item;
1934       return JSV_NOTHING;
1935     }
1936   }
1937 
1938 err_return:
1939   return JSV_BAD_JSON;
1940 }
1941 
1942 
1943 /** Simple json lookup for a value by the key.
1944 
1945   Expects JSON object.
1946   Only scans the 'first level' of the object, not
1947   the nested structures.
1948 
1949   @param js          [in]       json object to search in
1950   @param js_end      [in]       end of json string
1951   @param key         [in]       key to search for
1952   @param key_end     [in]         - " -
1953   @param value_start [out]      pointer into js (value or closing })
1954   @param value_len   [out]      length of the value found or number of keys
1955 
1956   @retval the type of the key value
1957   @retval JSV_BAD_JSON - syntax error found reading JSON.
1958                          or not JSON object.
1959   @retval JSV_NOTHING - no such key found.
1960 */
json_get_object_key(const char * js,const char * js_end,const char * key,const char ** value,int * value_len)1961 enum json_types json_get_object_key(const char *js, const char *js_end,
1962                                     const char *key,
1963                                     const char **value, int *value_len)
1964 {
1965   const char *key_end= key + strlen(key);
1966   json_engine_t je;
1967   json_string_t key_name;
1968   int n_keys= 0;
1969 
1970   json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1971 
1972   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1973                   (const uchar *) js_end);
1974 
1975   if (json_read_value(&je) ||
1976       je.value_type != JSON_VALUE_OBJECT)
1977     goto err_return;
1978 
1979   while (!json_scan_next(&je))
1980   {
1981     switch (je.state)
1982     {
1983     case JST_KEY:
1984       n_keys++;
1985       json_string_set_str(&key_name, (const uchar *) key,
1986                           (const uchar *) key_end);
1987       if (json_key_matches(&je, &key_name))
1988         return smart_read_value(&je, value, value_len);
1989 
1990       if (json_skip_key(&je))
1991         goto err_return;
1992 
1993       break;
1994 
1995     case JST_OBJ_END:
1996       *value= (const char *) (je.s.c_str - je.sav_c_len);
1997       *value_len= n_keys;
1998       return JSV_NOTHING;
1999     }
2000   }
2001 
2002 err_return:
2003   return JSV_BAD_JSON;
2004 }
2005 
2006 
json_get_object_nkey(const char * js,const char * js_end,int nkey,const char ** keyname,const char ** keyname_end,const char ** value,int * value_len)2007 enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2008                                      const char *js_end __attribute__((unused)),
2009                                      int nkey __attribute__((unused)),
2010                                      const char **keyname __attribute__((unused)),
2011                                      const char **keyname_end __attribute__((unused)),
2012                                      const char **value __attribute__((unused)),
2013                                      int *value_len __attribute__((unused)))
2014 {
2015   return JSV_NOTHING;
2016 }
2017 
2018 
2019 /** Check if json is valid (well-formed)
2020 
2021   @retval 0 - success, json is well-formed
2022   @retval 1 - error, json is invalid
2023 */
json_valid(const char * js,size_t js_len,CHARSET_INFO * cs)2024 int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2025 {
2026   json_engine_t je;
2027   json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2028   while (json_scan_next(&je) == 0) /* no-op */ ;
2029   return je.s.error == 0;
2030 }
2031 
2032 
2033 /*
2034   Expects the JSON object as an js argument, and the key name.
2035   Looks for this key in the object and returns
2036   the location of all the text related to it.
2037   The text includes the comma, separating this key.
2038 
2039   comma_pos - the hint where the comma is. It is important
2040        if you plan to replace the key rather than just cut.
2041     1  - comma is on the left
2042     2  - comma is on the right.
2043     0  - no comma at all (the object has just this single key)
2044 
2045   if no such key found *key_start is set to NULL.
2046 */
json_locate_key(const char * js,const char * js_end,const char * kname,const char ** key_start,const char ** key_end,int * comma_pos)2047 int json_locate_key(const char *js, const char *js_end,
2048                     const char *kname,
2049                     const char **key_start, const char **key_end,
2050                     int *comma_pos)
2051 {
2052   const char *kname_end= kname + strlen(kname);
2053   json_engine_t je;
2054   json_string_t key_name;
2055   int t_next, c_len, match_result;
2056 
2057   json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2058 
2059   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2060                   (const uchar *) js_end);
2061 
2062   if (json_read_value(&je) ||
2063       je.value_type != JSON_VALUE_OBJECT)
2064     goto err_return;
2065 
2066   *key_start= (const char *) je.s.c_str;
2067   *comma_pos= 0;
2068 
2069   while (!json_scan_next(&je))
2070   {
2071     switch (je.state)
2072     {
2073     case JST_KEY:
2074       json_string_set_str(&key_name, (const uchar *) kname,
2075                           (const uchar *) kname_end);
2076       match_result= json_key_matches(&je, &key_name);
2077       if (json_skip_key(&je))
2078         goto err_return;
2079       get_first_nonspace(&je.s, &t_next, &c_len);
2080       je.s.c_str-= c_len;
2081 
2082       if (match_result)
2083       {
2084         *key_end= (const char *) je.s.c_str;
2085 
2086         if (*comma_pos == 1)
2087           return 0;
2088 
2089         DBUG_ASSERT(*comma_pos == 0);
2090 
2091         if (t_next == C_COMMA)
2092         {
2093           *key_end+= c_len;
2094           *comma_pos= 2;
2095         }
2096         else if (t_next == C_RCURB)
2097           *comma_pos= 0;
2098         else
2099           goto err_return;
2100         return 0;
2101       }
2102 
2103       *key_start= (const char *) je.s.c_str;
2104       *comma_pos= 1;
2105       break;
2106 
2107     case JST_OBJ_END:
2108       *key_start= NULL;
2109       return 0;
2110     }
2111   }
2112 
2113 err_return:
2114   return 1;
2115 
2116 }
2117