1 /* Copyright (c) 2016, 2020, MariaDB Corporation.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
15 
16 #include <my_global.h>
17 #include <string.h>
18 #include <m_ctype.h>
19 #include "json_lib.h"
20 
21 /*
22   JSON escaping lets user specify UTF16 codes of characters.
23   So we're going to need the UTF16 charset capabilities. Let's import
24   them from the utf16 charset.
25 */
26 int my_utf16_uni(CHARSET_INFO *cs,
27                  my_wc_t *pwc, const uchar *s, const uchar *e);
28 int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e);
29 
30 
json_string_set_str(json_string_t * s,const uchar * str,const uchar * end)31 void json_string_set_str(json_string_t *s,
32                          const uchar *str, const uchar *end)
33 {
34   s->c_str= str;
35   s->str_end= end;
36 }
37 
38 
json_string_set_cs(json_string_t * s,CHARSET_INFO * i_cs)39 void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs)
40 {
41   s->cs= i_cs;
42   s->error= 0;
43   s->wc= i_cs->cset->mb_wc;
44 }
45 
46 
json_string_setup(json_string_t * s,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)47 static void json_string_setup(json_string_t *s,
48                               CHARSET_INFO *i_cs, const uchar *str,
49                               const uchar *end)
50 {
51   json_string_set_cs(s, i_cs);
52   json_string_set_str(s, str, end);
53 }
54 
55 
56 enum json_char_classes {
57   C_EOS,    /* end of string */
58   C_LCURB,  /* {  */
59   C_RCURB,  /* } */
60   C_LSQRB,  /* [ */
61   C_RSQRB,  /* ] */
62   C_COLON,  /* : */
63   C_COMMA,  /* , */
64   C_QUOTE,  /* " */
65   C_DIGIT,  /* -0123456789 */
66   C_LOW_F,  /* 'f' (for "false") */
67   C_LOW_N,  /* 'n' (for "null") */
68   C_LOW_T,  /* 't' (for "true") */
69   C_ETC,    /* everything else */
70   C_ERR,    /* character disallowed in JSON */
71   C_BAD,    /* invalid character, charset handler cannot read it */
72   NR_C_CLASSES, /* Counter for classes that handled with functions. */
73   C_SPACE   /* space. Doesn't need specific handlers, so after the counter.*/
74 };
75 
76 
77 /*
78   This array maps first 128 Unicode Code Points into classes.
79   The remaining Unicode characters should be mapped to C_ETC.
80 */
81 
82 static enum json_char_classes json_chr_map[128] = {
83   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
84   C_ERR,   C_SPACE, C_SPACE, C_ERR,   C_ERR,   C_SPACE, C_ERR,   C_ERR,
85   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
86   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,   C_ERR,
87 
88   C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
89   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_COMMA, C_DIGIT, C_ETC,   C_ETC,
90   C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
91   C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
92 
93   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
94   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
95   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96   C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_ETC,   C_RSQRB, C_ETC,   C_ETC,
97 
98   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_F, C_ETC,
99   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_N, C_ETC,
100   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_T, C_ETC,   C_ETC,   C_ETC,
101   C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
102 };
103 
104 
105 /*
106   JSON parser actually has more states than the 'enum json_states'
107   declares. But the rest of the states aren't seen to the user so let's
108   specify them here to avoid confusion.
109 */
110 
111 enum json_all_states {
112   JST_DONE= NR_JSON_USER_STATES,         /* ok to finish     */
113   JST_OBJ_CONT= NR_JSON_USER_STATES+1,   /* object continues */
114   JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues  */
115   JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */
116   NR_JSON_STATES= NR_JSON_USER_STATES+4
117 };
118 
119 
120 typedef int (*json_state_handler)(json_engine_t *);
121 
122 
123 /* The string is broken. */
unexpected_eos(json_engine_t * j)124 static int unexpected_eos(json_engine_t *j)
125 {
126   j->s.error= JE_EOS;
127   return 1;
128 }
129 
130 
131 /* This symbol here breaks the JSON syntax. */
syntax_error(json_engine_t * j)132 static int syntax_error(json_engine_t *j)
133 {
134   j->s.error= JE_SYN;
135   return 1;
136 }
137 
138 
139 /* Value of object. */
mark_object(json_engine_t * j)140 static int mark_object(json_engine_t *j)
141 {
142   j->state= JST_OBJ_START;
143   if (++j->stack_p < JSON_DEPTH_LIMIT)
144   {
145     j->stack[j->stack_p]= JST_OBJ_CONT;
146     return 0;
147   }
148   j->s.error= JE_DEPTH;
149   return 1;
150 }
151 
152 
153 /* Read value of object. */
read_obj(json_engine_t * j)154 static int read_obj(json_engine_t *j)
155 {
156   j->state= JST_OBJ_START;
157   j->value_type= JSON_VALUE_OBJECT;
158   j->value= j->value_begin;
159   if (++j->stack_p < JSON_DEPTH_LIMIT)
160   {
161     j->stack[j->stack_p]= JST_OBJ_CONT;
162     return 0;
163   }
164   j->s.error= JE_DEPTH;
165   return 1;
166 }
167 
168 
169 /* Value of array. */
mark_array(json_engine_t * j)170 static int mark_array(json_engine_t *j)
171 {
172   j->state= JST_ARRAY_START;
173   if (++j->stack_p < JSON_DEPTH_LIMIT)
174   {
175     j->stack[j->stack_p]= JST_ARRAY_CONT;
176     j->value= j->value_begin;
177     return 0;
178   }
179   j->s.error= JE_DEPTH;
180   return 1;
181 }
182 
183 /* Read value of object. */
read_array(json_engine_t * j)184 static int read_array(json_engine_t *j)
185 {
186   j->state= JST_ARRAY_START;
187   j->value_type= JSON_VALUE_ARRAY;
188   j->value= j->value_begin;
189   if (++j->stack_p < JSON_DEPTH_LIMIT)
190   {
191     j->stack[j->stack_p]= JST_ARRAY_CONT;
192     return 0;
193   }
194   j->s.error= JE_DEPTH;
195   return 1;
196 }
197 
198 
199 
200 /*
201   Character classes inside the JSON string constant.
202   We mostly need this to parse escaping properly.
203   Escapings available in JSON are:
204   \" - quotation mark
205   \\ - backslash
206   \b - backspace UNICODE 8
207   \f - formfeed UNICODE 12
208   \n - newline UNICODE 10
209   \r - carriage return UNICODE 13
210   \t - horizontal tab UNICODE 9
211   \u{four-hex-digits} - code in UCS16 character set
212 */
213 enum json_string_char_classes {
214   S_0= 0,
215   S_1= 1,
216   S_2= 2,
217   S_3= 3,
218   S_4= 4,
219   S_5= 5,
220   S_6= 6,
221   S_7= 7,
222   S_8= 8,
223   S_9= 9,
224   S_A= 10,
225   S_B= 11,
226   S_C= 12,
227   S_D= 13,
228   S_E= 14,
229   S_F= 15,
230   S_ETC= 36,    /* rest of characters. */
231   S_QUOTE= 37,
232   S_BKSL= 38, /* \ */
233   S_ERR= 100,   /* disallowed */
234 };
235 
236 
237 /* This maps characters to their types inside a string constant. */
238 static enum json_string_char_classes json_instr_chr_map[128] = {
239   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
240   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
241   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
242   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,   S_ERR,
243 
244   S_ETC,   S_ETC,   S_QUOTE, S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
245   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
246   S_0,     S_1,     S_2,     S_3,     S_4,     S_5,     S_6,     S_7,
247   S_8,     S_9,     S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
248 
249   S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
250   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
251   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
252   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_BKSL,  S_ETC,   S_ETC,   S_ETC,
253 
254   S_ETC,   S_A,     S_B,     S_C,     S_D,     S_E,     S_F,     S_ETC,
255   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
256   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,
257   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC,   S_ETC
258 };
259 
260 
read_4_hexdigits(json_string_t * s,uchar * dest)261 static int read_4_hexdigits(json_string_t *s, uchar *dest)
262 {
263   int i, t, c_len;
264   for (i=0; i<4; i++)
265   {
266     if ((c_len= json_next_char(s)) <= 0)
267       return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
268 
269     if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) > S_F)
270       return s->error= JE_SYN;
271 
272     s->c_str+= c_len;
273     dest[i/2]+= (i % 2) ? t : t*16;
274   }
275   return 0;
276 }
277 
278 
json_handle_esc(json_string_t * s)279 static int json_handle_esc(json_string_t *s)
280 {
281   int t, c_len;
282 
283   if ((c_len= json_next_char(s)) <= 0)
284     return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
285 
286   s->c_str+= c_len;
287   switch (s->c_next)
288   {
289     case 'b':
290       s->c_next= 8;
291       return 0;
292     case 'f':
293       s->c_next= 12;
294       return 0;
295     case 'n':
296       s->c_next= 10;
297       return 0;
298     case 'r':
299       s->c_next= 13;
300       return 0;
301     case 't':
302       s->c_next= 9;
303       return 0;
304   }
305 
306   if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR)
307   {
308     s->c_str-= c_len;
309     return s->error= JE_ESCAPING;
310   }
311 
312 
313   if (s->c_next != 'u')
314     return 0;
315 
316   {
317     /*
318       Read the four-hex-digits code.
319       If symbol is not in the Basic Multilingual Plane, we're reading
320       the string for the next four digits to compose the UTF-16 surrogate pair.
321     */
322     uchar code[4]= {0,0,0,0};
323 
324     if (read_4_hexdigits(s, code))
325       return 1;
326 
327     if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2)
328       return 0;
329 
330     if (c_len != MY_CS_TOOSMALL4)
331       return s->error= JE_BAD_CHR;
332 
333     if ((c_len= json_next_char(s)) <= 0)
334       return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
335     if (s->c_next != '\\')
336       return s->error= JE_SYN;
337 
338     s->c_str+= c_len;
339     if ((c_len= json_next_char(s)) <= 0)
340       return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
341     if (s->c_next != 'u')
342       return s->error= JE_SYN;
343     s->c_str+= c_len;
344 
345     if (read_4_hexdigits(s, code+2))
346       return 1;
347 
348     if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 4)
349       return 0;
350   }
351   return s->error= JE_BAD_CHR;
352 }
353 
354 
json_read_string_const_chr(json_string_t * js)355 int json_read_string_const_chr(json_string_t *js)
356 {
357   int c_len;
358 
359   if ((c_len= json_next_char(js)) > 0)
360   {
361     js->c_str+= c_len;
362     return (js->c_next == '\\') ? json_handle_esc(js) : 0;
363   }
364   js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR;
365   return 1;
366 }
367 
368 
skip_str_constant(json_engine_t * j)369 static int skip_str_constant(json_engine_t *j)
370 {
371   int t, c_len;
372   for (;;)
373   {
374     if ((c_len= json_next_char(&j->s)) > 0)
375     {
376       j->s.c_str+= c_len;
377       if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC))
378         continue;
379 
380       if (j->s.c_next == '"')
381         break;
382       if (j->s.c_next == '\\')
383       {
384         j->value_escaped= 1;
385         if (json_handle_esc(&j->s))
386           return 1;
387         continue;
388       }
389       /* Symbol not allowed in JSON. */
390       return j->s.error= JE_NOT_JSON_CHR;
391     }
392     else
393       return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
394   }
395 
396   j->state= j->stack[j->stack_p];
397   return 0;
398 }
399 
400 
401 /* Scalar string. */
v_string(json_engine_t * j)402 static int v_string(json_engine_t *j)
403 {
404   return skip_str_constant(j) || json_scan_next(j);
405 }
406 
407 
408 /* Read scalar string. */
read_strn(json_engine_t * j)409 static int read_strn(json_engine_t *j)
410 {
411   j->value= j->s.c_str;
412   j->value_type= JSON_VALUE_STRING;
413   j->value_escaped= 0;
414 
415   if (skip_str_constant(j))
416     return 1;
417 
418   j->state= j->stack[j->stack_p];
419   j->value_len= (int)(j->s.c_str - j->value) - 1;
420   return 0;
421 }
422 
423 
424 /*
425   We have dedicated parser for numeric constants. It's similar
426   to the main JSON parser, we similarly define character classes,
427   map characters to classes and implement the state-per-class
428   table. Though we don't create functions that handle
429   particular classes, just specify what new state should parser
430   get in this case.
431 */
432 enum json_num_char_classes {
433   N_MINUS,
434   N_PLUS,
435   N_ZERO,
436   N_DIGIT,
437   N_POINT,
438   N_E,
439   N_END,
440   N_EEND,
441   N_ERR,
442   N_NUM_CLASSES
443 };
444 
445 
446 static enum json_num_char_classes json_num_chr_map[128] = {
447   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
448   N_ERR,   N_END,   N_END,   N_ERR,   N_ERR,   N_END,   N_ERR,   N_ERR,
449   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
450   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,   N_ERR,
451 
452   N_END,   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
453   N_EEND,  N_EEND,  N_EEND,  N_PLUS,  N_END,   N_MINUS, N_POINT, N_EEND,
454   N_ZERO,  N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT,
455   N_DIGIT, N_DIGIT, N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
456 
457   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
458   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
459   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
460   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_END,   N_EEND,  N_EEND,
461 
462   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_E,     N_EEND,  N_EEND,
463   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
464   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,
465   N_EEND,  N_EEND,  N_EEND,  N_EEND,  N_EEND,   N_END,   N_EEND,  N_EEND,
466 };
467 
468 
469 enum json_num_states {
470   NS_OK,  /* Number ended. */
471   NS_GO,  /* Initial state. */
472   NS_GO1, /* If the number starts with '-'. */
473   NS_Z,   /* If the number starts with '0'. */
474   NS_Z1,  /* If the numbers starts with '-0'. */
475   NS_INT, /* Integer part. */
476   NS_FRAC,/* Fractional part. */
477   NS_EX,  /* Exponential part begins. */
478   NS_EX1, /* Exponential part continues. */
479   NS_NUM_STATES
480 };
481 
482 
483 static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]=
484 {
485 /*         -        +       0        1..9    POINT    E       END_OK   ERROR */
486 /*OK*/   { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
487 /*GO*/   { NS_GO1,  JE_SYN, NS_Z,     NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
488 /*GO1*/  { JE_SYN,  JE_SYN, NS_Z1,    NS_INT, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
489 /*ZERO*/ { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, JE_SYN, NS_OK,  JE_BAD_CHR },
490 /*ZE1*/  { JE_SYN,  JE_SYN, JE_SYN,   JE_SYN, NS_FRAC, JE_SYN, NS_OK,  JE_BAD_CHR },
491 /*INT*/  { JE_SYN,  JE_SYN, NS_INT,   NS_INT, NS_FRAC, NS_EX,  NS_OK,  JE_BAD_CHR },
492 /*FRAC*/ { JE_SYN,  JE_SYN, NS_FRAC,  NS_FRAC,JE_SYN,  NS_EX,  NS_OK,  JE_BAD_CHR },
493 /*EX*/   { NS_EX,   NS_EX,  NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, JE_SYN, JE_BAD_CHR },
494 /*EX1*/  { JE_SYN,  JE_SYN, NS_EX1,   NS_EX1, JE_SYN,  JE_SYN, NS_OK,  JE_BAD_CHR }
495 };
496 
497 
498 static uint json_num_state_flags[NS_NUM_STATES]=
499 {
500 /*OK*/   0,
501 /*GO*/   0,
502 /*GO1*/  JSON_NUM_NEG,
503 /*ZERO*/ 0,
504 /*ZE1*/  0,
505 /*INT*/  0,
506 /*FRAC*/ JSON_NUM_FRAC_PART,
507 /*EX*/   JSON_NUM_EXP,
508 /*EX1*/  0,
509 };
510 
511 
skip_num_constant(json_engine_t * j)512 static int skip_num_constant(json_engine_t *j)
513 {
514   int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]];
515   int c_len;
516 
517   j->num_flags= 0;
518   for (;;)
519   {
520     j->num_flags|= json_num_state_flags[state];
521     if ((c_len= json_next_char(&j->s)) > 0 && j->s.c_next < 128)
522     {
523       if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0)
524       {
525         j->s.c_str+= c_len;
526         continue;
527       }
528       break;
529     }
530 
531     if ((j->s.error=
532           json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0)
533       return 1;
534     else
535       break;
536   }
537 
538   j->state= j->stack[j->stack_p];
539   return 0;
540 }
541 
542 
543 /* Scalar numeric. */
v_number(json_engine_t * j)544 static int v_number(json_engine_t *j)
545 {
546   return skip_num_constant(j) || json_scan_next(j);
547 }
548 
549 
550 /* Read numeric constant. */
read_num(json_engine_t * j)551 static int read_num(json_engine_t *j)
552 {
553   j->value= j->value_begin;
554   if (skip_num_constant(j) == 0)
555   {
556     j->value_type= JSON_VALUE_NUMBER;
557     j->value_len= (int)(j->s.c_str - j->value_begin);
558     return 0;
559   }
560   return 1;
561 }
562 
563 
564 /* Check that the JSON string matches the argument and skip it. */
skip_string_verbatim(json_string_t * s,const char * str)565 static int skip_string_verbatim(json_string_t *s, const char *str)
566 {
567   int c_len;
568   while (*str)
569   {
570     if ((c_len= json_next_char(s)) > 0)
571     {
572       if (s->c_next == (my_wc_t) *(str++))
573       {
574         s->c_str+= c_len;
575         continue;
576       }
577       return s->error= JE_SYN;
578     }
579     return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR;
580   }
581 
582   return 0;
583 }
584 
585 
586 /* Scalar false. */
v_false(json_engine_t * j)587 static int v_false(json_engine_t *j)
588 {
589   if (skip_string_verbatim(&j->s, "alse"))
590    return 1;
591   j->state= j->stack[j->stack_p];
592   return json_scan_next(j);
593 }
594 
595 
596 /* Scalar null. */
v_null(json_engine_t * j)597 static int v_null(json_engine_t *j)
598 {
599   if (skip_string_verbatim(&j->s, "ull"))
600    return 1;
601   j->state= j->stack[j->stack_p];
602   return json_scan_next(j);
603 }
604 
605 
606 /* Scalar true. */
v_true(json_engine_t * j)607 static int v_true(json_engine_t *j)
608 {
609   if (skip_string_verbatim(&j->s, "rue"))
610    return 1;
611   j->state= j->stack[j->stack_p];
612   return json_scan_next(j);
613 }
614 
615 
616 /* Read false. */
read_false(json_engine_t * j)617 static int read_false(json_engine_t *j)
618 {
619   j->value_type= JSON_VALUE_FALSE;
620   j->value= j->value_begin;
621   j->state= j->stack[j->stack_p];
622   j->value_len= 5;
623   return skip_string_verbatim(&j->s, "alse");
624 }
625 
626 
627 /* Read null. */
read_null(json_engine_t * j)628 static int read_null(json_engine_t *j)
629 {
630   j->value_type= JSON_VALUE_NULL;
631   j->value= j->value_begin;
632   j->state= j->stack[j->stack_p];
633   j->value_len= 4;
634   return skip_string_verbatim(&j->s, "ull");
635 }
636 
637 
638 /* Read true. */
read_true(json_engine_t * j)639 static int read_true(json_engine_t *j)
640 {
641   j->value_type= JSON_VALUE_TRUE;
642   j->value= j->value_begin;
643   j->state= j->stack[j->stack_p];
644   j->value_len= 4;
645   return skip_string_verbatim(&j->s, "rue");
646 }
647 
648 
649 /* Disallowed character. */
not_json_chr(json_engine_t * j)650 static int not_json_chr(json_engine_t *j)
651 {
652   j->s.error= JE_NOT_JSON_CHR;
653   return 1;
654 }
655 
656 
657 /* Bad character. */
bad_chr(json_engine_t * j)658 static int bad_chr(json_engine_t *j)
659 {
660   j->s.error= JE_BAD_CHR;
661   return 1;
662 }
663 
664 
665 /* Correct finish. */
done(json_engine_t * j)666 static int done(json_engine_t *j  __attribute__((unused)))
667 {
668   return 1;
669 }
670 
671 
672 /* End of the object. */
end_object(json_engine_t * j)673 static int end_object(json_engine_t *j)
674 {
675   j->stack_p--;
676   j->state= JST_OBJ_END;
677   return 0;
678 }
679 
680 
681 /* End of the array. */
end_array(json_engine_t * j)682 static int end_array(json_engine_t *j)
683 {
684   j->stack_p--;
685   j->state= JST_ARRAY_END;
686   return 0;
687 }
688 
689 
690 /* Start reading key name. */
read_keyname(json_engine_t * j)691 static int read_keyname(json_engine_t *j)
692 {
693   j->state= JST_KEY;
694   return 0;
695 }
696 
697 
get_first_nonspace(json_string_t * js,int * t_next,int * c_len)698 static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len)
699 {
700   do
701   {
702     if ((*c_len= json_next_char(js)) <= 0)
703       *t_next= json_eos(js) ? C_EOS : C_BAD;
704     else
705     {
706       *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC;
707       js->c_str+= *c_len;
708     }
709   } while (*t_next == C_SPACE);
710 }
711 
712 
713 /* Next key name. */
next_key(json_engine_t * j)714 static int next_key(json_engine_t *j)
715 {
716   int t_next, c_len;
717   get_first_nonspace(&j->s, &t_next, &c_len);
718 
719   if (t_next == C_QUOTE)
720   {
721     j->state= JST_KEY;
722     return 0;
723   }
724 
725   j->s.error= (t_next == C_EOS)  ? JE_EOS :
726               ((t_next == C_BAD) ? JE_BAD_CHR :
727                                    JE_SYN);
728   return 1;
729 }
730 
731 
732 /* Forward declarations. */
733 static int skip_colon(json_engine_t *j);
734 static int skip_key(json_engine_t *j);
735 static int struct_end_cb(json_engine_t *j);
736 static int struct_end_qb(json_engine_t *j);
737 static int struct_end_cm(json_engine_t *j);
738 static int struct_end_eos(json_engine_t *j);
739 
740 
next_item(json_engine_t * j)741 static int next_item(json_engine_t *j)
742 {
743   j->state= JST_VALUE;
744   return 0;
745 }
746 
747 
array_item(json_engine_t * j)748 static int array_item(json_engine_t *j)
749 {
750   j->state= JST_VALUE;
751   j->s.c_str-= j->sav_c_len;
752   return 0;
753 }
754 
755 
756 static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]=
757 /*
758    EOS              {            }             [             ]
759    :                ,            "             -0..9         f
760    n                t              ETC          ERR           BAD
761 */
762 {
763   {/*VALUE*/
764     unexpected_eos, mark_object, syntax_error, mark_array,   syntax_error,
765     syntax_error,   syntax_error,v_string,     v_number,     v_false,
766     v_null,         v_true,       syntax_error, not_json_chr, bad_chr},
767   {/*KEY*/
768     unexpected_eos, skip_key,    skip_key,     skip_key,     skip_key,
769     skip_key,       skip_key,    skip_colon,   skip_key,     skip_key,
770     skip_key,       skip_key,     skip_key,     not_json_chr, bad_chr},
771   {/*OBJ_START*/
772     unexpected_eos, syntax_error, end_object,  syntax_error, syntax_error,
773     syntax_error,   syntax_error, read_keyname, syntax_error, syntax_error,
774     syntax_error,   syntax_error,   syntax_error,    not_json_chr, bad_chr},
775   {/*OBJ_END*/
776     struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb,
777     syntax_error,   struct_end_cm,syntax_error,  syntax_error, syntax_error,
778     syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
779   {/*ARRAY_START*/
780     unexpected_eos, array_item,   syntax_error, array_item,   end_array,
781     syntax_error,   syntax_error, array_item,  array_item,  array_item,
782     array_item,    array_item,    syntax_error,    not_json_chr, bad_chr},
783   {/*ARRAY_END*/
784     struct_end_eos, syntax_error, struct_end_cb, syntax_error,  struct_end_qb,
785     syntax_error,   struct_end_cm, syntax_error, syntax_error,  syntax_error,
786     syntax_error,   syntax_error,  syntax_error,    not_json_chr, bad_chr},
787   {/*DONE*/
788     done,           syntax_error, syntax_error, syntax_error, syntax_error,
789     syntax_error,   syntax_error, syntax_error, syntax_error, syntax_error,
790     syntax_error,   syntax_error, syntax_error, not_json_chr, bad_chr},
791   {/*OBJ_CONT*/
792     unexpected_eos, syntax_error, end_object,    syntax_error,   syntax_error,
793     syntax_error,   next_key,     syntax_error,  syntax_error,   syntax_error,
794     syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
795   {/*ARRAY_CONT*/
796     unexpected_eos, syntax_error, syntax_error,  syntax_error, end_array,
797     syntax_error,   next_item,    syntax_error,  syntax_error, syntax_error,
798     syntax_error,    syntax_error,    syntax_error,    not_json_chr, bad_chr},
799   {/*READ_VALUE*/
800     unexpected_eos, read_obj,     syntax_error,  read_array,    syntax_error,
801     syntax_error,   syntax_error, read_strn,     read_num,      read_false,
802     read_null,      read_true,    syntax_error,    not_json_chr, bad_chr},
803 };
804 
805 
806 
json_scan_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)807 int json_scan_start(json_engine_t *je,
808                     CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
809 {
810   json_string_setup(&je->s, i_cs, str, end);
811   je->stack[0]= JST_DONE;
812   je->stack_p= 0;
813   je->state= JST_VALUE;
814   return 0;
815 }
816 
817 
818 /* Skip colon and the value. */
skip_colon(json_engine_t * j)819 static int skip_colon(json_engine_t *j)
820 {
821   int t_next, c_len;
822 
823   get_first_nonspace(&j->s, &t_next, &c_len);
824 
825   if (t_next == C_COLON)
826   {
827     get_first_nonspace(&j->s, &t_next, &c_len);
828     return json_actions[JST_VALUE][t_next](j);
829  }
830 
831   j->s.error= (t_next == C_EOS)  ? JE_EOS :
832               ((t_next == C_BAD) ? JE_BAD_CHR:
833                                    JE_SYN);
834 
835   return 1;
836 }
837 
838 
839 /* Skip colon and the value. */
skip_key(json_engine_t * j)840 static int skip_key(json_engine_t *j)
841 {
842   int t_next, c_len;
843 
844   if (json_instr_chr_map[j->s.c_next] == S_BKSL &&
845       json_handle_esc(&j->s))
846     return 1;
847 
848   while (json_read_keyname_chr(j) == 0) {}
849 
850   if (j->s.error)
851     return 1;
852 
853   get_first_nonspace(&j->s, &t_next, &c_len);
854   return json_actions[JST_VALUE][t_next](j);
855 }
856 
857 
858 /*
859   Handle EOS after the end of an object or array.
860   To do that we should pop the stack to see if
861   we are inside an object, or an array, and
862   run our 'state machine' accordingly.
863 */
struct_end_eos(json_engine_t * j)864 static int struct_end_eos(json_engine_t *j)
865 { return json_actions[j->stack[j->stack_p]][C_EOS](j); }
866 
867 
868 /*
869   Handle '}' after the end of an object or array.
870   To do that we should pop the stack to see if
871   we are inside an object, or an array, and
872   run our 'state machine' accordingly.
873 */
struct_end_cb(json_engine_t * j)874 static int struct_end_cb(json_engine_t *j)
875 { return json_actions[j->stack[j->stack_p]][C_RCURB](j); }
876 
877 
878 /*
879   Handle ']' after the end of an object or array.
880   To do that we should pop the stack to see if
881   we are inside an object, or an array, and
882   run our 'state machine' accordingly.
883 */
struct_end_qb(json_engine_t * j)884 static int struct_end_qb(json_engine_t *j)
885 { return json_actions[j->stack[j->stack_p]][C_RSQRB](j); }
886 
887 
888 /*
889   Handle ',' after the end of an object or array.
890   To do that we should pop the stack to see if
891   we are inside an object, or an array, and
892   run our 'state machine' accordingly.
893 */
struct_end_cm(json_engine_t * j)894 static int struct_end_cm(json_engine_t *j)
895 { return json_actions[j->stack[j->stack_p]][C_COMMA](j); }
896 
897 
json_read_keyname_chr(json_engine_t * j)898 int json_read_keyname_chr(json_engine_t *j)
899 {
900   int c_len, t;
901 
902   if ((c_len= json_next_char(&j->s)) > 0)
903   {
904     j->s.c_str+= c_len;
905     if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC)
906       return 0;
907 
908     switch (t)
909     {
910     case S_QUOTE:
911       for (;;)  /* Skip spaces until ':'. */
912       {
913         if ((c_len= json_next_char(&j->s)) > 0)
914         {
915           if (j->s.c_next == ':')
916           {
917             j->s.c_str+= c_len;
918             j->state= JST_VALUE;
919             return 1;
920           }
921 
922           if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE)
923           {
924             j->s.c_str+= c_len;
925             continue;
926           }
927           j->s.error= JE_SYN;
928           break;
929         }
930         j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
931         break;
932       }
933       return 1;
934     case S_BKSL:
935       return json_handle_esc(&j->s);
936     case S_ERR:
937       j->s.c_str-= c_len;
938       j->s.error= JE_STRING_CONST;
939       return 1;
940     }
941   }
942   j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR;
943   return 1;
944 }
945 
946 
json_read_value(json_engine_t * j)947 int json_read_value(json_engine_t *j)
948 {
949   int t_next, c_len, res;
950 
951   j->value_type= JSON_VALUE_UNINITALIZED;
952   if (j->state == JST_KEY)
953   {
954     while (json_read_keyname_chr(j) == 0) {}
955 
956     if (j->s.error)
957       return 1;
958   }
959 
960   get_first_nonspace(&j->s, &t_next, &c_len);
961 
962   j->value_begin= j->s.c_str-c_len;
963   res= json_actions[JST_READ_VALUE][t_next](j);
964   j->value_end= j->s.c_str;
965   return res;
966 }
967 
968 
json_scan_next(json_engine_t * j)969 int json_scan_next(json_engine_t *j)
970 {
971   int t_next;
972 
973   get_first_nonspace(&j->s, &t_next, &j->sav_c_len);
974   return json_actions[j->state][t_next](j);
975 }
976 
977 
978 enum json_path_chr_classes {
979   P_EOS,    /* end of string */
980   P_USD,    /* $ */
981   P_ASTER,  /* * */
982   P_LSQRB,  /* [ */
983   P_RSQRB,  /* ] */
984   P_POINT,  /* . */
985   P_ZERO,   /* 0 */
986   P_DIGIT,  /* 123456789 */
987   P_L,      /* l (for "lax") */
988   P_S,      /* s (for "strict") */
989   P_SPACE,  /* space */
990   P_BKSL,   /* \ */
991   P_QUOTE,  /* " */
992   P_ETC,    /* everything else */
993   P_ERR,    /* character disallowed in JSON*/
994   P_BAD,    /* invalid character */
995   N_PATH_CLASSES,
996 };
997 
998 
999 static enum json_path_chr_classes json_path_chr_map[128] = {
1000   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1001   P_ERR,   P_SPACE, P_SPACE, P_ERR,   P_ERR,   P_SPACE, P_ERR,   P_ERR,
1002   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1003   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,   P_ERR,
1004 
1005   P_SPACE, P_ETC,   P_QUOTE, P_ETC,   P_USD,   P_ETC,   P_ETC,   P_ETC,
1006   P_ETC,   P_ETC,   P_ASTER, P_ETC,   P_ETC,   P_ETC,   P_POINT, P_ETC,
1007   P_ZERO,  P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
1008   P_DIGIT, P_DIGIT, P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1009 
1010   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1011   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1012   P_ETC,   P_ETC,   P_S,     P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1013   P_ETC,   P_ETC,   P_ETC,   P_LSQRB, P_BKSL, P_RSQRB, P_ETC,   P_ETC,
1014 
1015   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1016   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_L,     P_ETC,   P_ETC,   P_ETC,
1017   P_ETC,   P_ETC,   P_S,     P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,
1018   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC,   P_ETC
1019 };
1020 
1021 
1022 enum json_path_states {
1023   PS_GO,  /* Initial state. */
1024   PS_LAX, /* Parse the 'lax' keyword. */
1025   PS_PT,  /* New path's step begins. */
1026   PS_AR,  /* Parse array step. */
1027   PS_SAR, /* space after the '['. */
1028   PS_AWD, /* Array wildcard. */
1029   PS_Z,   /* '0' (as an array item number). */
1030   PS_INT, /* Parse integer (as an array item number). */
1031   PS_AS,  /* Space. */
1032   PS_KEY, /* Key. */
1033   PS_KNM, /* Parse key name. */
1034   PS_KWD, /* Key wildcard. */
1035   PS_AST, /* Asterisk. */
1036   PS_DWD, /* Double wildcard. */
1037   PS_KEYX, /* Key started with quote ("). */
1038   PS_KNMX, /* Parse quoted key name. */
1039   N_PATH_STATES, /* Below are states that aren't in the transitions table. */
1040   PS_SCT,  /* Parse the 'strict' keyword. */
1041   PS_EKY,  /* '.' after the keyname so next step is the key. */
1042   PS_EKYX, /* Closing " for the quoted keyname. */
1043   PS_EAR,  /* '[' after the keyname so next step is the array. */
1044   PS_ESC,  /* Escaping in the keyname. */
1045   PS_ESCX, /* Escaping in the quoted keyname. */
1046   PS_OK,   /* Path normally ended. */
1047   PS_KOK   /* EOS after the keyname so end the path normally. */
1048 };
1049 
1050 
1051 static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
1052 {
1053 /*
1054             EOS       $,      *       [       ]       .       0
1055             1..9    L       S       SPACE   \       "       ETC
1056             ERR              BAD
1057 */
1058 /* GO  */ { JE_EOS, PS_PT,  JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1059             JE_SYN, PS_LAX, PS_SCT, PS_GO,  JE_SYN, JE_SYN, JE_SYN,
1060             JE_NOT_JSON_CHR, JE_BAD_CHR},
1061 /* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1062             JE_SYN, PS_LAX, JE_SYN, PS_GO,  JE_SYN, JE_SYN, JE_SYN,
1063             JE_NOT_JSON_CHR, JE_BAD_CHR},
1064 /* PT */  { PS_OK,  JE_SYN, PS_AST, PS_AR,  JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1065             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1066             JE_NOT_JSON_CHR, JE_BAD_CHR},
1067 /* AR */  { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
1068             PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1069             JE_NOT_JSON_CHR, JE_BAD_CHR},
1070 /* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT,  JE_SYN, PS_Z,
1071             PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
1072             JE_NOT_JSON_CHR, JE_BAD_CHR},
1073 /* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN,
1074             JE_SYN, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1075             JE_NOT_JSON_CHR, JE_BAD_CHR},
1076 /* Z */   { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN,
1077             JE_SYN, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1078             JE_NOT_JSON_CHR, JE_BAD_CHR},
1079 /* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, PS_INT,
1080             PS_INT, JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1081             JE_NOT_JSON_CHR, JE_BAD_CHR},
1082 /* AS */  { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT,  JE_SYN, JE_SYN, JE_SYN,
1083             JE_SYN, JE_SYN, PS_AS,  JE_SYN, JE_SYN, JE_SYN,
1084             JE_NOT_JSON_CHR, JE_BAD_CHR},
1085 /* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
1086             PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
1087             JE_NOT_JSON_CHR, JE_BAD_CHR},
1088 /* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
1089             PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
1090             JE_NOT_JSON_CHR, JE_BAD_CHR},
1091 /* KWD */ { PS_OK,  JE_SYN, JE_SYN, PS_AR,  JE_SYN, PS_EKY, JE_SYN,
1092             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1093             JE_NOT_JSON_CHR, JE_BAD_CHR},
1094 /* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1095             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1096             JE_NOT_JSON_CHR, JE_BAD_CHR},
1097 /* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR,  JE_SYN, PS_KEY, JE_SYN, JE_SYN,
1098             JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
1099             JE_NOT_JSON_CHR, JE_BAD_CHR},
1100 /* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1101             PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
1102             JE_NOT_JSON_CHR, JE_BAD_CHR},
1103 /* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
1104             PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
1105             JE_NOT_JSON_CHR, JE_BAD_CHR},
1106 };
1107 
1108 
json_path_setup(json_path_t * p,CHARSET_INFO * i_cs,const uchar * str,const uchar * end)1109 int json_path_setup(json_path_t *p,
1110                     CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
1111 {
1112   int c_len, t_next, state= PS_GO;
1113   enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
1114 
1115   json_string_setup(&p->s, i_cs, str, end);
1116 
1117   p->steps[0].type= JSON_PATH_ARRAY_WILD;
1118   p->last_step= p->steps;
1119   p->mode_strict= FALSE;
1120   p->types_used= JSON_PATH_KEY_NULL;
1121 
1122   do
1123   {
1124     if ((c_len= json_next_char(&p->s)) <= 0)
1125       t_next= json_eos(&p->s) ? P_EOS : P_BAD;
1126     else
1127       t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next];
1128 
1129     if ((state= json_path_transitions[state][t_next]) < 0)
1130       return p->s.error= state;
1131 
1132     p->s.c_str+= c_len;
1133 
1134     switch (state)
1135     {
1136     case PS_LAX:
1137       if ((p->s.error= skip_string_verbatim(&p->s, "ax")))
1138         return 1;
1139       p->mode_strict= FALSE;
1140       continue;
1141     case PS_SCT:
1142       if ((p->s.error= skip_string_verbatim(&p->s, "rict")))
1143         return 1;
1144       p->mode_strict= TRUE;
1145       state= PS_LAX;
1146       continue;
1147     case PS_KWD:
1148     case PS_AWD:
1149       p->last_step->type|= JSON_PATH_WILD;
1150       p->types_used|= JSON_PATH_WILD;
1151       continue;
1152     case PS_INT:
1153       p->last_step->n_item*= 10;
1154       p->last_step->n_item+= p->s.c_next - '0';
1155       continue;
1156     case PS_EKYX:
1157       p->last_step->key_end= p->s.c_str - c_len;
1158       state= PS_PT;
1159       continue;
1160     case PS_EKY:
1161       p->last_step->key_end= p->s.c_str - c_len;
1162       state= PS_KEY;
1163       /* fall through */
1164     case PS_KEY:
1165       p->last_step++;
1166       if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1167         return p->s.error= JE_DEPTH;
1168       p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
1169       double_wildcard= JSON_PATH_KEY_NULL;
1170       /* fall through */
1171     case PS_KEYX:
1172       p->last_step->key= p->s.c_str;
1173       continue;
1174     case PS_EAR:
1175       p->last_step->key_end= p->s.c_str - c_len;
1176       state= PS_AR;
1177       /* fall through */
1178     case PS_AR:
1179       p->last_step++;
1180       if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
1181         return p->s.error= JE_DEPTH;
1182       p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
1183       double_wildcard= JSON_PATH_KEY_NULL;
1184       p->last_step->n_item= 0;
1185       continue;
1186     case PS_ESC:
1187       if (json_handle_esc(&p->s))
1188         return 1;
1189       state= PS_KNM;
1190       continue;
1191     case PS_ESCX:
1192       if (json_handle_esc(&p->s))
1193         return 1;
1194       state= PS_KNMX;
1195       continue;
1196     case PS_KOK:
1197       p->last_step->key_end= p->s.c_str - c_len;
1198       state= PS_OK;
1199       break; /* 'break' as the loop supposed to end after that. */
1200     case PS_DWD:
1201       double_wildcard= JSON_PATH_DOUBLE_WILD;
1202       continue;
1203     };
1204   } while (state != PS_OK);
1205 
1206   return double_wildcard ? (p->s.error= JE_SYN) : 0;
1207 }
1208 
1209 
json_skip_to_level(json_engine_t * j,int level)1210 int json_skip_to_level(json_engine_t *j, int level)
1211 {
1212   do {
1213     if (j->stack_p < level)
1214       return 0;
1215   } while (json_scan_next(j) == 0);
1216 
1217   return 1;
1218 }
1219 
1220 
1221 /*
1222   works as json_skip_level() but also counts items on the current
1223   level skipped.
1224 */
json_skip_level_and_count(json_engine_t * j,int * n_items_skipped)1225 int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
1226 {
1227   int level= j->stack_p;
1228 
1229   *n_items_skipped= 0;
1230   while (json_scan_next(j) == 0)
1231   {
1232     if (j->stack_p < level)
1233       return 0;
1234     if (j->stack_p == level && j->state == JST_VALUE)
1235       (*n_items_skipped)++;
1236   }
1237 
1238   return 1;
1239 }
1240 
1241 
json_skip_key(json_engine_t * j)1242 int json_skip_key(json_engine_t *j)
1243 {
1244   if (json_read_value(j))
1245     return 1;
1246 
1247   if (json_value_scalar(j))
1248     return 0;
1249 
1250   return json_skip_level(j);
1251 }
1252 
1253 
1254 #define SKIPPED_STEP_MARK ((uint) ~0)
1255 
1256 /*
1257   Current step of the patch matches the JSON construction.
1258   Now we should either stop the search or go to the next
1259   step of the path.
1260 */
handle_match(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1261 static int handle_match(json_engine_t *je, json_path_t *p,
1262                         json_path_step_t **p_cur_step, uint *array_counters)
1263 {
1264   json_path_step_t *next_step= *p_cur_step + 1;
1265 
1266   DBUG_ASSERT(*p_cur_step < p->last_step);
1267 
1268   if (json_read_value(je))
1269     return 1;
1270 
1271   if (json_value_scalar(je))
1272   {
1273     while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0)
1274     {
1275       if (++next_step > p->last_step)
1276       {
1277         je->s.c_str= je->value_begin;
1278         return 1;
1279       }
1280     }
1281     return 0;
1282   }
1283 
1284   if (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0 &&
1285       je->value_type & JSON_VALUE_OBJECT)
1286   {
1287     do
1288     {
1289       array_counters[next_step - p->steps]= SKIPPED_STEP_MARK;
1290       if (++next_step > p->last_step)
1291       {
1292         je->s.c_str= je->value_begin;
1293         je->stack_p--;
1294         return 1;
1295       }
1296     } while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
1297   }
1298 
1299 
1300   array_counters[next_step - p->steps]= 0;
1301 
1302   if ((int) je->value_type !=
1303       (int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
1304     return json_skip_level(je);
1305 
1306   *p_cur_step= next_step;
1307   return 0;
1308 }
1309 
1310 
1311 /*
1312   Check if the name of the current JSON key matches
1313   the step of the path.
1314 */
json_key_matches(json_engine_t * je,json_string_t * k)1315 int json_key_matches(json_engine_t *je, json_string_t *k)
1316 {
1317   while (json_read_keyname_chr(je) == 0)
1318   {
1319     if (json_read_string_const_chr(k) ||
1320         je->s.c_next != k->c_next)
1321       return 0;
1322   }
1323 
1324   return json_read_string_const_chr(k);
1325 }
1326 
1327 
json_find_path(json_engine_t * je,json_path_t * p,json_path_step_t ** p_cur_step,uint * array_counters)1328 int json_find_path(json_engine_t *je,
1329                    json_path_t *p, json_path_step_t **p_cur_step,
1330                    uint *array_counters)
1331 {
1332   json_string_t key_name;
1333 
1334   json_string_set_cs(&key_name, p->s.cs);
1335 
1336   do
1337   {
1338     json_path_step_t *cur_step= *p_cur_step;
1339     switch (je->state)
1340     {
1341     case JST_KEY:
1342       DBUG_ASSERT(cur_step->type & JSON_PATH_KEY);
1343       if (!(cur_step->type & JSON_PATH_WILD))
1344       {
1345         json_string_set_str(&key_name, cur_step->key, cur_step->key_end);
1346         if (!json_key_matches(je, &key_name))
1347         {
1348           if (json_skip_key(je))
1349             goto exit;
1350           continue;
1351         }
1352       }
1353       if (cur_step == p->last_step ||
1354           handle_match(je, p, p_cur_step, array_counters))
1355         goto exit;
1356       break;
1357     case JST_VALUE:
1358       DBUG_ASSERT(cur_step->type & JSON_PATH_ARRAY);
1359       if (cur_step->type & JSON_PATH_WILD ||
1360           cur_step->n_item == array_counters[cur_step - p->steps]++)
1361       {
1362         /* Array item matches. */
1363         if (cur_step == p->last_step ||
1364             handle_match(je, p, p_cur_step, array_counters))
1365           goto exit;
1366       }
1367       else
1368         json_skip_array_item(je);
1369       break;
1370     case JST_OBJ_END:
1371       do
1372       {
1373         (*p_cur_step)--;
1374       } while (*p_cur_step > p->steps &&
1375                array_counters[*p_cur_step - p->steps] == SKIPPED_STEP_MARK);
1376       break;
1377     case JST_ARRAY_END:
1378       (*p_cur_step)--;
1379       break;
1380     default:
1381       DBUG_ASSERT(0);
1382       break;
1383     };
1384   } while (json_scan_next(je) == 0);
1385 
1386   /* No luck. */
1387   return 1;
1388 
1389 exit:
1390   return je->s.error;
1391 }
1392 
1393 
json_find_paths_first(json_engine_t * je,json_find_paths_t * state,uint n_paths,json_path_t * paths,uint * path_depths)1394 int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
1395                           uint n_paths, json_path_t *paths, uint *path_depths)
1396 {
1397   state->n_paths= n_paths;
1398   state->paths= paths;
1399   state->cur_depth= 0;
1400   state->path_depths= path_depths;
1401   return json_find_paths_next(je, state);
1402 }
1403 
1404 
json_find_paths_next(json_engine_t * je,json_find_paths_t * state)1405 int json_find_paths_next(json_engine_t *je, json_find_paths_t *state)
1406 {
1407   uint p_c;
1408   int path_found, no_match_found;
1409   do
1410   {
1411     switch (je->state)
1412     {
1413     case JST_KEY:
1414       path_found= FALSE;
1415       no_match_found= TRUE;
1416       for (p_c=0; p_c < state->n_paths; p_c++)
1417       {
1418         json_path_step_t *cur_step;
1419         if (state->path_depths[p_c] <
1420               state->cur_depth /* Path already failed. */ ||
1421             !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1422               JSON_PATH_KEY))
1423           continue;
1424 
1425         if (!(cur_step->type & JSON_PATH_WILD))
1426         {
1427           json_string_t key_name;
1428           json_string_setup(&key_name, state->paths[p_c].s.cs,
1429                             cur_step->key, cur_step->key_end);
1430           if (!json_key_matches(je, &key_name))
1431             continue;
1432         }
1433         if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1434           path_found= TRUE;
1435         else
1436         {
1437           no_match_found= FALSE;
1438           state->path_depths[p_c]= state->cur_depth + 1;
1439         }
1440       }
1441       if (path_found)
1442         /* Return the result. */
1443         goto exit;
1444       if (no_match_found)
1445       {
1446         /* No possible paths left to check. Just skip the level. */
1447         if (json_skip_level(je))
1448           goto exit;
1449       }
1450 
1451       break;
1452     case JST_VALUE:
1453       path_found= FALSE;
1454       no_match_found= TRUE;
1455       for (p_c=0; p_c < state->n_paths; p_c++)
1456       {
1457         json_path_step_t *cur_step;
1458         if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ ||
1459             !((cur_step= state->paths[p_c].steps + state->cur_depth)->type &
1460               JSON_PATH_ARRAY))
1461           continue;
1462         if (cur_step->type & JSON_PATH_WILD ||
1463             cur_step->n_item == state->array_counters[state->cur_depth])
1464         {
1465           /* Array item matches. */
1466           if (cur_step == state->paths[p_c].last_step + state->cur_depth)
1467             path_found= TRUE;
1468           else
1469           {
1470             no_match_found= FALSE;
1471             state->path_depths[p_c]= state->cur_depth + 1;
1472           }
1473         }
1474       }
1475 
1476       if (path_found)
1477         goto exit;
1478 
1479       if (no_match_found)
1480         json_skip_array_item(je);
1481 
1482       state->array_counters[state->cur_depth]++;
1483       break;
1484     case JST_OBJ_START:
1485     case JST_ARRAY_START:
1486       for (p_c=0; p_c < state->n_paths; p_c++)
1487       {
1488         if (state->path_depths[p_c] < state->cur_depth)
1489           /* Path already failed. */
1490           continue;
1491         if (state->paths[p_c].steps[state->cur_depth].type &
1492             ((je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY))
1493           state->path_depths[p_c]++;
1494       }
1495       state->cur_depth++;
1496       break;
1497     case JST_OBJ_END:
1498     case JST_ARRAY_END:
1499       for (p_c=0; p_c < state->n_paths; p_c++)
1500       {
1501         if (state->path_depths[p_c] < state->cur_depth)
1502           continue;
1503         state->path_depths[p_c]--;
1504       }
1505       state->cur_depth--;
1506       break;
1507     default:
1508       DBUG_ASSERT(0);
1509       break;
1510     };
1511   } while (json_scan_next(je) == 0);
1512 
1513   /* No luck. */
1514   return 1;
1515 
1516 exit:
1517   return je->s.error;
1518 }
1519 
1520 
json_append_ascii(CHARSET_INFO * json_cs,uchar * json,uchar * json_end,const uchar * ascii,const uchar * ascii_end)1521 int json_append_ascii(CHARSET_INFO *json_cs,
1522                       uchar *json, uchar *json_end,
1523                       const uchar *ascii, const uchar *ascii_end)
1524 {
1525   const uchar *json_start= json;
1526   while (ascii < ascii_end)
1527   {
1528     int c_len;
1529     if ((c_len= my_ci_wc_mb(json_cs, (my_wc_t) *ascii, json, json_end)) > 0)
1530     {
1531       json+= c_len;
1532       ascii++;
1533       continue;
1534     }
1535 
1536     /* Error return. */
1537     return c_len;
1538   }
1539 
1540   return (int)(json - json_start);
1541 }
1542 
1543 
json_unescape(CHARSET_INFO * json_cs,const uchar * json_str,const uchar * json_end,CHARSET_INFO * res_cs,uchar * res,uchar * res_end)1544 int json_unescape(CHARSET_INFO *json_cs,
1545                   const uchar *json_str, const uchar *json_end,
1546                   CHARSET_INFO *res_cs, uchar *res, uchar *res_end)
1547 {
1548   json_string_t s;
1549   const uchar *res_b= res;
1550 
1551   json_string_setup(&s, json_cs, json_str, json_end);
1552   while (json_read_string_const_chr(&s) == 0)
1553   {
1554     int c_len;
1555     if ((c_len= my_ci_wc_mb(res_cs, s.c_next, res, res_end)) > 0)
1556     {
1557       res+= c_len;
1558       continue;
1559     }
1560     if (c_len == MY_CS_ILUNI)
1561     {
1562       /*
1563         Result charset doesn't support the json's character.
1564         Let's replace it with the '?' symbol.
1565       */
1566       if ((c_len= my_ci_wc_mb(res_cs, '?', res, res_end)) > 0)
1567       {
1568         res+= c_len;
1569         continue;
1570       }
1571     }
1572     /* Result buffer is too small. */
1573     return -1;
1574   }
1575 
1576   return s.error==JE_EOS ? (int)(res - res_b) : -1;
1577 }
1578 
1579 
1580 /* When we need to replace a character with the escaping. */
1581 enum json_esc_char_classes {
1582   ESC_= 0,    /* No need to escape. */
1583   ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */
1584   ESC_B= 'b', /* Backspace. Escape as \b */
1585   ESC_F= 'f', /* Formfeed. Escape as \f */
1586   ESC_N= 'n', /* Newline. Escape as \n */
1587   ESC_R= 'r', /* Return. Escape as \r */
1588   ESC_T= 't', /* Tab. Escape as \s */
1589   ESC_BS= '\\'  /* Backslash or '"'. Escape by the \\ prefix. */
1590 };
1591 
1592 
1593 /* This specifies how we should escape the character. */
1594 static enum json_esc_char_classes json_escape_chr_map[0x60] = {
1595   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1596   ESC_B,   ESC_T,   ESC_N,   ESC_U,   ESC_F,   ESC_R,   ESC_U,   ESC_U,
1597   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1598   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,   ESC_U,
1599 
1600   ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1601   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1602   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1603   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1604 
1605   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1606   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1607   ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,    ESC_,
1608   ESC_,    ESC_,    ESC_,    ESC_,    ESC_BS,  ESC_,    ESC_,    ESC_,
1609 };
1610 
1611 
1612 static const char hexconv[16] = "0123456789ABCDEF";
1613 
1614 
json_escape(CHARSET_INFO * str_cs,const uchar * str,const uchar * str_end,CHARSET_INFO * json_cs,uchar * json,uchar * json_end)1615 int json_escape(CHARSET_INFO *str_cs,
1616                 const uchar *str, const uchar *str_end,
1617                 CHARSET_INFO *json_cs, uchar *json, uchar *json_end)
1618 {
1619   const uchar *json_start= json;
1620 
1621   while (str < str_end)
1622   {
1623     my_wc_t c_chr;
1624     int c_len;
1625     if ((c_len= my_ci_mb_wc(str_cs, &c_chr, str, str_end)) > 0)
1626     {
1627       enum json_esc_char_classes c_class;
1628 
1629       str+= c_len;
1630       if (c_chr >= 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_)
1631       {
1632         if ((c_len= my_ci_wc_mb(json_cs, c_chr, json, json_end)) > 0)
1633         {
1634           json+= c_len;
1635           continue;
1636         }
1637         if (c_len < 0)
1638         {
1639           /* JSON buffer is depleted. */
1640           return -1;
1641         }
1642 
1643         /* JSON charset cannot convert this character. */
1644         c_class= ESC_U;
1645       }
1646 
1647       if ((c_len= my_ci_wc_mb(json_cs, '\\', json, json_end)) <= 0 ||
1648           (c_len= my_ci_wc_mb(json_cs, (c_class == ESC_BS) ? c_chr : c_class,
1649                                        json+= c_len, json_end)) <= 0)
1650       {
1651         /* JSON buffer is depleted. */
1652         return -1;
1653       }
1654       json+= c_len;
1655 
1656       if (c_class != ESC_U)
1657         continue;
1658 
1659       {
1660         /* We have to use /uXXXX escaping. */
1661         uchar utf16buf[4];
1662         uchar code_str[8];
1663         int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4);
1664 
1665         code_str[0]= hexconv[utf16buf[0] >> 4];
1666         code_str[1]= hexconv[utf16buf[0] & 15];
1667         code_str[2]= hexconv[utf16buf[1] >> 4];
1668         code_str[3]= hexconv[utf16buf[1] & 15];
1669 
1670         if (u_len > 2)
1671         {
1672           code_str[4]= hexconv[utf16buf[2] >> 4];
1673           code_str[5]= hexconv[utf16buf[2] & 15];
1674           code_str[6]= hexconv[utf16buf[3] >> 4];
1675           code_str[7]= hexconv[utf16buf[3] & 15];
1676         }
1677 
1678         if ((c_len= json_append_ascii(json_cs, json, json_end,
1679                                       code_str, code_str+u_len*2)) > 0)
1680         {
1681           json+= c_len;
1682           continue;
1683         }
1684         /* JSON buffer is depleted. */
1685         return -1;
1686       }
1687     }
1688     else /* c_len == 0, an illegal symbol. */
1689       return -1;
1690   }
1691 
1692   return (int)(json - json_start);
1693 }
1694 
1695 
json_get_path_start(json_engine_t * je,CHARSET_INFO * i_cs,const uchar * str,const uchar * end,json_path_t * p)1696 int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
1697                         const uchar *str, const uchar *end,
1698                         json_path_t *p)
1699 {
1700   json_scan_start(je, i_cs, str, end);
1701   p->last_step= p->steps - 1;
1702   return 0;
1703 }
1704 
1705 
json_get_path_next(json_engine_t * je,json_path_t * p)1706 int json_get_path_next(json_engine_t *je, json_path_t *p)
1707 {
1708   if (p->last_step < p->steps)
1709   {
1710     if (json_read_value(je))
1711       return 1;
1712 
1713     p->last_step= p->steps;
1714     p->steps[0].type= JSON_PATH_ARRAY_WILD;
1715     p->steps[0].n_item= 0;
1716     return 0;
1717   }
1718   else
1719   {
1720     if (json_value_scalar(je))
1721     {
1722       if (p->last_step->type & JSON_PATH_ARRAY)
1723         p->last_step->n_item++;
1724     }
1725     else
1726     {
1727       p->last_step++;
1728       p->last_step->type= (enum json_path_step_types) je->value_type;
1729       p->last_step->n_item= 0;
1730     }
1731 
1732     if (json_scan_next(je))
1733       return 1;
1734   }
1735 
1736   do
1737   {
1738     switch (je->state)
1739     {
1740     case JST_KEY:
1741       p->last_step->key= je->s.c_str;
1742       do
1743       {
1744         p->last_step->key_end= je->s.c_str;
1745       } while (json_read_keyname_chr(je) == 0);
1746       if (je->s.error)
1747         return 1;
1748       /* Now we have je.state == JST_VALUE, so let's handle it. */
1749 
1750       /* fall through */
1751     case JST_VALUE:
1752       if (json_read_value(je))
1753         return 1;
1754       return 0;
1755     case JST_OBJ_END:
1756     case JST_ARRAY_END:
1757       p->last_step--;
1758       if (p->last_step->type & JSON_PATH_ARRAY)
1759         p->last_step->n_item++;
1760       break;
1761     default:
1762       break;
1763     }
1764   } while (json_scan_next(je) == 0);
1765 
1766   return 1;
1767 }
1768 
1769 
json_path_parts_compare(const json_path_step_t * a,const json_path_step_t * a_end,const json_path_step_t * b,const json_path_step_t * b_end,enum json_value_types vt)1770 int json_path_parts_compare(
1771     const json_path_step_t *a, const json_path_step_t *a_end,
1772     const json_path_step_t *b, const json_path_step_t *b_end,
1773     enum json_value_types vt)
1774 {
1775   int res, res2;
1776 
1777   while (a <= a_end)
1778   {
1779     if (b > b_end)
1780     {
1781       while (vt != JSON_VALUE_ARRAY &&
1782              (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY &&
1783              a->n_item == 0)
1784       {
1785         if (++a > a_end)
1786           return 0;
1787       }
1788       return -2;
1789     }
1790 
1791     DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0);
1792 
1793 
1794     if (a->type & JSON_PATH_ARRAY)
1795     {
1796       if (b->type & JSON_PATH_ARRAY)
1797       {
1798         if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
1799           goto step_fits;
1800         goto step_failed;
1801       }
1802       if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0)
1803         goto step_fits_autowrap;
1804       goto step_failed;
1805     }
1806     else /* JSON_PATH_KEY */
1807     {
1808       if (!(b->type & JSON_PATH_KEY))
1809         goto step_failed;
1810 
1811       if (!(a->type & JSON_PATH_WILD) &&
1812           (a->key_end - a->key != b->key_end - b->key ||
1813            memcmp(a->key, b->key, a->key_end - a->key) != 0))
1814         goto step_failed;
1815 
1816       goto step_fits;
1817     }
1818 step_failed:
1819     if (!(a->type & JSON_PATH_DOUBLE_WILD))
1820       return -1;
1821     b++;
1822     continue;
1823 
1824 step_fits:
1825     b++;
1826     if (!(a->type & JSON_PATH_DOUBLE_WILD))
1827     {
1828       a++;
1829       continue;
1830     }
1831 
1832     /* Double wild handling needs recursions. */
1833     res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
1834     if (res == 0)
1835       return 0;
1836 
1837     res2= json_path_parts_compare(a, a_end, b, b_end, vt);
1838 
1839     return (res2 >= 0) ? res2 : res;
1840 
1841 step_fits_autowrap:
1842     if (!(a->type & JSON_PATH_DOUBLE_WILD))
1843     {
1844       a++;
1845       continue;
1846     }
1847 
1848     /* Double wild handling needs recursions. */
1849     res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
1850     if (res == 0)
1851       return 0;
1852 
1853     res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
1854 
1855     return (res2 >= 0) ? res2 : res;
1856 
1857   }
1858 
1859   return b <= b_end;
1860 }
1861 
1862 
json_path_compare(const json_path_t * a,const json_path_t * b,enum json_value_types vt)1863 int json_path_compare(const json_path_t *a, const json_path_t *b,
1864                       enum json_value_types vt)
1865 {
1866   return json_path_parts_compare(a->steps+1, a->last_step,
1867                                  b->steps+1, b->last_step, vt);
1868 }
1869 
1870 
smart_read_value(json_engine_t * je,const char ** value,int * value_len)1871 static enum json_types smart_read_value(json_engine_t *je,
1872                                         const char **value, int *value_len)
1873 {
1874   if (json_read_value(je))
1875     goto err_return;
1876 
1877   *value= (char *) je->value;
1878 
1879   if (json_value_scalar(je))
1880     *value_len= je->value_len;
1881   else
1882   {
1883     if (json_skip_level(je))
1884       goto err_return;
1885 
1886     *value_len= (int) ((char *) je->s.c_str - *value);
1887   }
1888 
1889   compile_time_assert((int) JSON_VALUE_OBJECT == (int) JSV_OBJECT);
1890   compile_time_assert((int) JSON_VALUE_ARRAY == (int) JSV_ARRAY);
1891   compile_time_assert((int) JSON_VALUE_STRING == (int) JSV_STRING);
1892   compile_time_assert((int) JSON_VALUE_NUMBER == (int) JSV_NUMBER);
1893   compile_time_assert((int) JSON_VALUE_TRUE == (int) JSV_TRUE);
1894   compile_time_assert((int) JSON_VALUE_FALSE == (int) JSV_FALSE);
1895   compile_time_assert((int) JSON_VALUE_NULL == (int) JSV_NULL);
1896 
1897   return (enum json_types) je->value_type;
1898 
1899 err_return:
1900   return JSV_BAD_JSON;
1901 }
1902 
1903 
json_type(const char * js,const char * js_end,const char ** value,int * value_len)1904 enum json_types json_type(const char *js, const char *js_end,
1905                           const char **value, int *value_len)
1906 {
1907   json_engine_t je;
1908 
1909   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1910                   (const uchar *) js_end);
1911 
1912   return smart_read_value(&je, value, value_len);
1913 }
1914 
1915 
json_get_array_item(const char * js,const char * js_end,int n_item,const char ** value,int * value_len)1916 enum json_types json_get_array_item(const char *js, const char *js_end,
1917                                     int n_item,
1918                                     const char **value, int *value_len)
1919 {
1920   json_engine_t je;
1921   int c_item= 0;
1922 
1923   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1924                   (const uchar *) js_end);
1925 
1926   if (json_read_value(&je) ||
1927       je.value_type != JSON_VALUE_ARRAY)
1928     goto err_return;
1929 
1930   while (!json_scan_next(&je))
1931   {
1932     switch (je.state)
1933     {
1934     case JST_VALUE:
1935       if (c_item == n_item)
1936         return smart_read_value(&je, value, value_len);
1937 
1938       if (json_skip_key(&je))
1939         goto err_return;
1940 
1941       c_item++;
1942       break;
1943 
1944     case JST_ARRAY_END:
1945       *value= (const char *) (je.s.c_str - je.sav_c_len);
1946       *value_len= c_item;
1947       return JSV_NOTHING;
1948     }
1949   }
1950 
1951 err_return:
1952   return JSV_BAD_JSON;
1953 }
1954 
1955 
1956 /** Simple json lookup for a value by the key.
1957 
1958   Expects JSON object.
1959   Only scans the 'first level' of the object, not
1960   the nested structures.
1961 
1962   @param js          [in]       json object to search in
1963   @param js_end      [in]       end of json string
1964   @param key         [in]       key to search for
1965   @param key_end     [in]         - " -
1966   @param value_start [out]      pointer into js (value or closing })
1967   @param value_len   [out]      length of the value found or number of keys
1968 
1969   @retval the type of the key value
1970   @retval JSV_BAD_JSON - syntax error found reading JSON.
1971                          or not JSON object.
1972   @retval JSV_NOTHING - no such key found.
1973 */
json_get_object_key(const char * js,const char * js_end,const char * key,const char ** value,int * value_len)1974 enum json_types json_get_object_key(const char *js, const char *js_end,
1975                                     const char *key,
1976                                     const char **value, int *value_len)
1977 {
1978   const char *key_end= key + strlen(key);
1979   json_engine_t je;
1980   json_string_t key_name;
1981   int n_keys= 0;
1982 
1983   json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
1984 
1985   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
1986                   (const uchar *) js_end);
1987 
1988   if (json_read_value(&je) ||
1989       je.value_type != JSON_VALUE_OBJECT)
1990     goto err_return;
1991 
1992   while (!json_scan_next(&je))
1993   {
1994     switch (je.state)
1995     {
1996     case JST_KEY:
1997       n_keys++;
1998       json_string_set_str(&key_name, (const uchar *) key,
1999                           (const uchar *) key_end);
2000       if (json_key_matches(&je, &key_name))
2001         return smart_read_value(&je, value, value_len);
2002 
2003       if (json_skip_key(&je))
2004         goto err_return;
2005 
2006       break;
2007 
2008     case JST_OBJ_END:
2009       *value= (const char *) (je.s.c_str - je.sav_c_len);
2010       *value_len= n_keys;
2011       return JSV_NOTHING;
2012     }
2013   }
2014 
2015 err_return:
2016   return JSV_BAD_JSON;
2017 }
2018 
2019 
json_get_object_nkey(const char * js,const char * js_end,int nkey,const char ** keyname,const char ** keyname_end,const char ** value,int * value_len)2020 enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
2021                                      const char *js_end __attribute__((unused)),
2022                                      int nkey __attribute__((unused)),
2023                                      const char **keyname __attribute__((unused)),
2024                                      const char **keyname_end __attribute__((unused)),
2025                                      const char **value __attribute__((unused)),
2026                                      int *value_len __attribute__((unused)))
2027 {
2028   return JSV_NOTHING;
2029 }
2030 
2031 
2032 /** Check if json is valid (well-formed)
2033 
2034   @retval 0 - success, json is well-formed
2035   @retval 1 - error, json is invalid
2036 */
json_valid(const char * js,size_t js_len,CHARSET_INFO * cs)2037 int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
2038 {
2039   json_engine_t je;
2040   json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
2041   while (json_scan_next(&je) == 0) /* no-op */ ;
2042   return je.s.error == 0;
2043 }
2044 
2045 
2046 /*
2047   Expects the JSON object as an js argument, and the key name.
2048   Looks for this key in the object and returns
2049   the location of all the text related to it.
2050   The text includes the comma, separating this key.
2051 
2052   comma_pos - the hint where the comma is. It is important
2053        if you plan to replace the key rather than just cut.
2054     1  - comma is on the left
2055     2  - comma is on the right.
2056     0  - no comma at all (the object has just this single key)
2057 
2058   if no such key found *key_start is set to NULL.
2059 */
json_locate_key(const char * js,const char * js_end,const char * kname,const char ** key_start,const char ** key_end,int * comma_pos)2060 int json_locate_key(const char *js, const char *js_end,
2061                     const char *kname,
2062                     const char **key_start, const char **key_end,
2063                     int *comma_pos)
2064 {
2065   const char *kname_end= kname + strlen(kname);
2066   json_engine_t je;
2067   json_string_t key_name;
2068   int t_next, c_len, match_result;
2069 
2070   json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
2071 
2072   json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
2073                   (const uchar *) js_end);
2074 
2075   if (json_read_value(&je) ||
2076       je.value_type != JSON_VALUE_OBJECT)
2077     goto err_return;
2078 
2079   *key_start= (const char *) je.s.c_str;
2080   *comma_pos= 0;
2081 
2082   while (!json_scan_next(&je))
2083   {
2084     switch (je.state)
2085     {
2086     case JST_KEY:
2087       json_string_set_str(&key_name, (const uchar *) kname,
2088                           (const uchar *) kname_end);
2089       match_result= json_key_matches(&je, &key_name);
2090       if (json_skip_key(&je))
2091         goto err_return;
2092       get_first_nonspace(&je.s, &t_next, &c_len);
2093       je.s.c_str-= c_len;
2094 
2095       if (match_result)
2096       {
2097         *key_end= (const char *) je.s.c_str;
2098 
2099         if (*comma_pos == 1)
2100           return 0;
2101 
2102         DBUG_ASSERT(*comma_pos == 0);
2103 
2104         if (t_next == C_COMMA)
2105         {
2106           *key_end+= c_len;
2107           *comma_pos= 2;
2108         }
2109         else if (t_next == C_RCURB)
2110           *comma_pos= 0;
2111         else
2112           goto err_return;
2113         return 0;
2114       }
2115 
2116       *key_start= (const char *) je.s.c_str;
2117       *comma_pos= 1;
2118       break;
2119 
2120     case JST_OBJ_END:
2121       *key_start= NULL;
2122       return 0;
2123     }
2124   }
2125 
2126 err_return:
2127   return 1;
2128 
2129 }
2130