1 #ifndef JSON_LIB_INCLUDED
2 #define JSON_LIB_INCLUDED
3 
4 #ifdef __cplusplus
5 extern "C" {
6 #endif
7 
8 #define JSON_DEPTH_LIMIT 32
9 
10 /*
11   When error happens, the c_next of the JSON engine contains the
12   character that caused the error, and the c_str is the position
13   in string where the error occurs.
14 */
15 enum json_errors {
16   JE_BAD_CHR= -1,      /* Invalid character, charset handler cannot read it. */
17 
18   JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */
19                        /* ASCII 00-08 for instance.       */
20 
21   JE_EOS= -3,          /* Unexpected end of string. */
22 
23   JE_SYN= -4,          /* The next character breaks the JSON syntax. */
24 
25   JE_STRING_CONST= -5, /* Character disallowed in string constant. */
26 
27   JE_ESCAPING= -6,     /* Error in the escaping. */
28 
29   JE_DEPTH= -7,        /* The limit on the JSON depth was overrun. */
30 };
31 
32 
33 typedef struct st_json_string_t
34 {
35   const uchar *c_str;    /* Current position in JSON string */
36   const uchar *str_end;  /* The end on the string. */
37   my_wc_t c_next;        /* UNICODE of the last read character */
38   int error;             /* error code. */
39 
40   CHARSET_INFO *cs;      /* Character set of the JSON string. */
41 
42   my_charset_conv_mb_wc wc; /* UNICODE conversion function. */
43                             /* It's taken out of the cs just to speed calls. */
44 } json_string_t;
45 
46 
47 void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs);
48 void json_string_set_str(json_string_t *s,
49                          const uchar *str, const uchar *end);
50 #define json_next_char(j) \
51   (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end)
52 #define json_eos(j) ((j)->c_str >= (j)->str_end)
53 /*
54   read_string_const_chr() reads the next character of the string constant
55   and saves it to the js->c_next.
56   It takes into account possible escapings, so if for instance
57   the string is '\b', the read_string_const_chr() sets 8.
58 */
59 int json_read_string_const_chr(json_string_t *js);
60 
61 
62 /*
63   Various JSON-related operations expect JSON path as a parameter.
64   The path is a string like this "$.keyA[2].*"
65   The path itself is a number of steps specifying either a key or a position
66   in an array. Some of them can be wildcards.
67   So the representation of the JSON path is the json_path_t class
68   containing an array of json_path_step_t objects.
69 */
70 
71 
72 /* Path step types - actually bitmasks to let '&' or '|' operations. */
73 enum json_path_step_types
74 {
75   JSON_PATH_KEY_NULL=0,
76   JSON_PATH_KEY=1,   /* Must be equal to JSON_VALUE_OBJECT. */
77   JSON_PATH_ARRAY=2, /* Must be equal to JSON_VALUE_ARRAY. */
78   JSON_PATH_KEY_OR_ARRAY=3,
79   JSON_PATH_WILD=4, /* Step like .* or [*] */
80   JSON_PATH_DOUBLE_WILD=8, /* Step like **.k or **[1] */
81   JSON_PATH_KEY_WILD= 1+4,
82   JSON_PATH_KEY_DOUBLEWILD= 1+8,
83   JSON_PATH_ARRAY_WILD= 2+4,
84   JSON_PATH_ARRAY_DOUBLEWILD= 2+8
85 };
86 
87 
88 typedef struct st_json_path_step_t
89 {
90   enum json_path_step_types type;  /* The type of the step -   */
91                                    /* see json_path_step_types */
92   const uchar *key; /* Pointer to the beginning of the key. */
93   const uchar *key_end;  /* Pointer to the end of the key. */
94   uint n_item;      /* Item number in an array. No meaning for the key step. */
95 } json_path_step_t;
96 
97 
98 typedef struct st_json_path_t
99 {
100   json_string_t s;  /* The string to be parsed. */
101   json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */
102   json_path_step_t *last_step; /* Points to the last step. */
103 
104   int mode_strict; /* TRUE if the path specified as 'strict' */
105   enum json_path_step_types types_used; /* The '|' of all step's 'type'-s */
106 } json_path_t;
107 
108 
109 int json_path_setup(json_path_t *p,
110                     CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
111 
112 
113 /*
114   The set of functions and structures below provides interface
115   to the JSON text parser.
116   Running the parser normally goes like this:
117 
118     json_engine_t j_eng;   // structure keeps parser's data
119     json_scan_start(j_eng) // begin the parsing
120 
121     do
122     {
123       // The parser has read next piece of JSON
124       // and set fields of j_eng structure accordingly.
125       // So let's see what we have:
126       switch (j_eng.state)
127       {
128         case JST_KEY:
129            // Handle key name. See the json_read_keyname_chr()
130            // Probably compare it with the keyname we're looking for
131         case JST_VALUE:
132            // Handle value. It is either value of the key or an array item.
133            // see the json_read_value()
134         case JST_OBJ_START:
135           // parser found an object (the '{' in JSON)
136         case JST_OBJ_END:
137           // parser found the end of the object (the '}' in JSON)
138         case JST_ARRAY_START:
139           // parser found an array (the '[' in JSON)
140         case JST_ARRAY_END:
141           // parser found the end of the array (the ']' in JSON)
142 
143       };
144     } while (json_scan_next() == 0);  // parse next structure
145 
146 
147     if (j_eng.s.error)  // we need to check why the loop ended.
148                         // Did we get to the end of JSON, or came upon error.
149     {
150        signal_error_in_JSON()
151     }
152 
153 
154   Parts of JSON can be quickly skipped. If we are not interested
155   in a particular key, we can just skip it with json_skip_key() call.
156   Similarly json_skip_level() goes right to the end of an object
157   or an array.
158 */
159 
160 
161 /* These are JSON parser states that user can expect and handle.  */
162 enum json_states {
163   JST_VALUE,       /* value found      */
164   JST_KEY,         /* key found        */
165   JST_OBJ_START,   /* object           */
166   JST_OBJ_END,     /* object ended     */
167   JST_ARRAY_START, /* array            */
168   JST_ARRAY_END,   /* array ended      */
169   NR_JSON_USER_STATES
170 };
171 
172 
173 enum json_value_types
174 {
175   JSON_VALUE_UNINITALIZED=0,
176   JSON_VALUE_OBJECT=1,
177   JSON_VALUE_ARRAY=2,
178   JSON_VALUE_STRING=3,
179   JSON_VALUE_NUMBER=4,
180   JSON_VALUE_TRUE=5,
181   JSON_VALUE_FALSE=6,
182   JSON_VALUE_NULL=7
183 };
184 
185 
186 enum json_num_flags
187 {
188   JSON_NUM_NEG=1,        /* Number is negative. */
189   JSON_NUM_FRAC_PART=2,  /* The fractional part is not empty. */
190   JSON_NUM_EXP=4,        /* The number has the 'e' part. */
191 };
192 
193 
194 typedef struct st_json_engine_t
195 {
196   json_string_t s;  /* String to parse. */
197   int sav_c_len;    /* Length of the current character.
198                        Can be more than 1 for multibyte charsets */
199 
200   int state; /* The state of the parser. One of 'enum json_states'.
201                 It tells us what construction of JSON we've just read. */
202 
203   /* These values are only set after the json_read_value() call. */
204   enum json_value_types value_type; /* type of the value.*/
205   const uchar *value;      /* Points to the value. */
206   const uchar *value_begin;/* Points to where the value starts in the JSON. */
207   int value_escaped;       /* Flag telling if the string value has escaping.*/
208   uint num_flags;  /* the details of the JSON_VALUE_NUMBER, is it negative,
209                       or if it has the fractional part.
210                       See the enum json_num_flags. */
211 
212   /*
213     In most cases the 'value' and 'value_begin' are equal.
214     They only differ if the value is a string constants. Then 'value_begin'
215     points to the starting quotation mark, while the 'value' - to
216     the first character of the string.
217   */
218 
219   const uchar *value_end; /* Points to the next character after the value. */
220   int value_len; /* The length of the value. Does not count quotations for */
221                  /* string constants. */
222 
223   int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */
224   int stack_p;                 /* The 'stack' pointer. */
225 } json_engine_t;
226 
227 
228 int json_scan_start(json_engine_t *je,
229                         CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
230 int json_scan_next(json_engine_t *j);
231 
232 
233 /*
234   json_read_keyname_chr() function assists parsing the name of an JSON key.
235   It only can be called when the json_engine is in JST_KEY.
236   The json_read_keyname_chr() reads one character of the name of the key,
237   and puts it in j_eng.s.next_c.
238   Typical usage is like this:
239 
240   if (j_eng.state == JST_KEY)
241   {
242     while (json_read_keyname_chr(&j) == 0)
243     {
244       //handle next character i.e. match it against the pattern
245     }
246   }
247 */
248 
249 int json_read_keyname_chr(json_engine_t *j);
250 
251 
252 /*
253   Check if the name of the current JSON key matches
254   the step of the path.
255 */
256 int json_key_matches(json_engine_t *je, json_string_t *k);
257 
258 
259 /*
260   json_read_value() function parses the JSON value syntax,
261   so that we can handle the value of a key or an array item.
262   It only returns meaningful result when the engine is in
263   the JST_VALUE state.
264 
265   Typical usage is like this:
266 
267   if (j_eng.state ==  JST_VALUE)
268   {
269     json_read_value(&j_eng);
270     switch(j_eng.value_type)
271     {
272       case JSON_VALUE_STRING:
273         // get the string
274         str= j_eng.value;
275         str_length= j_eng.value_len;
276       case JSON_VALUE_NUMBER:
277         // get the number
278       ... etc
279     }
280 */
281 int json_read_value(json_engine_t *j);
282 
283 
284 /*
285   json_skip_key() makes parser skip the content of the current
286   JSON key quickly.
287   It can be called only when the json_engine state is JST_KEY.
288   Typical usage is:
289 
290   if (j_eng.state == JST_KEY)
291   {
292     if (key_does_not_match(j_eng))
293       json_skip_key(j_eng);
294   }
295 */
296 
297 int json_skip_key(json_engine_t *j);
298 
299 
300 typedef const int *json_level_t;
301 
302 /*
303   json_skip_to_level() makes parser quickly get out of nested
304   loops and arrays. It is used when we're not interested in what is
305   there in the rest of these structures.
306   The 'level' should be remembered in advance.
307         json_level_t level= json_get_level(j);
308         .... // getting into the nested JSON structures
309         json_skip_to_level(j, level);
310 */
311 #define json_get_level(j) (j->stack_p)
312 
313 int json_skip_to_level(json_engine_t *j, int level);
314 
315 /*
316   json_skip_level() works as above with just current structure.
317   So it gets to the end of the current JSON array or object.
318 */
319 #define json_skip_level(json_engine) \
320   json_skip_to_level((json_engine), (json_engine)->stack_p)
321 
322 
323 /*
324   works as json_skip_level() but also counts items on the current
325   level skipped.
326 */
327 int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped);
328 
329 #define json_skip_array_item json_skip_key
330 
331 /*
332   Checks if the current value is of scalar type -
333   not an OBJECT nor ARRAY.
334 */
335 #define json_value_scalar(je)  ((je)->value_type > JSON_VALUE_ARRAY)
336 
337 
338 /*
339   Look for the JSON PATH in the json string.
340   Function can be called several times with same JSON/PATH to
341   find multiple matches.
342   On the first call, the json_engine_t parameter should be
343   initialized with the JSON string, and the json_path_t with the JSON path
344   appropriately. The 'p_cur_step' should point at the first
345   step of the path.
346   The 'array_counters' is the array of JSON_DEPTH_LIMIT size.
347   It stores the array counters of the parsed JSON.
348   If function returns 0, it means it found the match. The position of
349   the match is je->s.c_str. Then we can call the json_find_path()
350   with same engine/path/p_cur_step to get the next match.
351   Non-zero return means no matches found.
352   Check je->s.error to see if there was an error in JSON.
353 */
354 int json_find_path(json_engine_t *je,
355                    json_path_t *p, json_path_step_t **p_cur_step,
356                    uint *array_counters);
357 
358 
359 typedef struct st_json_find_paths_t
360 {
361   uint n_paths;
362   json_path_t *paths;
363   uint cur_depth;
364   uint *path_depths;
365   uint array_counters[JSON_DEPTH_LIMIT];
366 } json_find_paths_t;
367 
368 
369 int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
370                           uint n_paths, json_path_t *paths, uint *path_depths);
371 int json_find_paths_next(json_engine_t *je, json_find_paths_t *state);
372 
373 
374 /*
375   Converst JSON string constant into ordinary string constant
376   which can involve unpacking json escapes and changing character set.
377   Returns negative integer in the case of an error,
378   the length of the result otherwise.
379 */
380 int json_unescape(CHARSET_INFO *json_cs,
381                   const uchar *json_str, const uchar *json_end,
382                   CHARSET_INFO *res_cs,
383                   uchar *res, uchar *res_end);
384 
385 /*
386   Converst ordinary string constant into JSON string constant.
387   which can involve appropriate escaping and changing character set.
388   Returns negative integer in the case of an error,
389   the length of the result otherwise.
390 */
391 int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end,
392                 CHARSET_INFO *json_cs, uchar *json, uchar *json_end);
393 
394 
395 /*
396   Appends the ASCII string to the json with the charset conversion.
397 */
398 int json_append_ascii(CHARSET_INFO *json_cs,
399                       uchar *json, uchar *json_end,
400                       const uchar *ascii, const uchar *ascii_end);
401 
402 
403 /*
404   Scan the JSON and return paths met one-by-one.
405      json_get_path_start(&p)
406      while (json_get_path_next(&p))
407      {
408        handle_the_next_path();
409      }
410 */
411 
412 int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
413                         const uchar *str, const uchar *end,
414                         json_path_t *p);
415 
416 
417 int json_get_path_next(json_engine_t *je, json_path_t *p);
418 
419 
420 int json_path_parts_compare(
421         const json_path_step_t *a, const json_path_step_t *a_end,
422         const json_path_step_t *b, const json_path_step_t *b_end,
423         enum json_value_types vt);
424 int json_path_compare(const json_path_t *a, const json_path_t *b,
425                       enum json_value_types vt);
426 
427 int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs);
428 
429 int json_locate_key(const char *js, const char *js_end,
430                     const char *kname,
431                     const char **key_start, const char **key_end,
432                     int *comma_pos);
433 
434 #ifdef  __cplusplus
435 }
436 #endif
437 
438 #endif /* JSON_LIB_INCLUDED */
439