1 /* https://github.com/mnunberg/jsonsl */
2 
3 /**
4  * JSON Simple/Stacked/Stateful Lexer.
5  * - Does not buffer data
6  * - Maintains state
7  * - Callback oriented
8  * - Lightweight and fast. One source file and one header file
9  *
10  * Copyright (C) 2012-2015 Mark Nunberg
11  * See included LICENSE file for license details.
12  */
13 
14 #ifndef JSONSL_H_
15 #define JSONSL_H_
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <stddef.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <wchar.h>
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif /* __cplusplus */
27 
28 #ifdef JSONSL_USE_WCHAR
29 typedef jsonsl_char_t wchar_t;
30 typedef jsonsl_uchar_t unsigned wchar_t;
31 #else
32 typedef char jsonsl_char_t;
33 typedef unsigned char jsonsl_uchar_t;
34 #endif /* JSONSL_USE_WCHAR */
35 
36 #ifdef JSONSL_PARSE_NAN
37 #define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
38 #define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
39 #else
40 #define JSONSL__NAN_PROXY 0
41 #define JSONSL__INF_PROXY 0
42 #endif
43 
44 /* Stolen from http-parser.h, and possibly others */
45 #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
46 typedef __int8 int8_t;
47 typedef unsigned __int8 uint8_t;
48 typedef __int16 int16_t;
49 typedef unsigned __int16 uint16_t;
50 typedef __int32 int32_t;
51 typedef unsigned __int32 uint32_t;
52 typedef __int64 int64_t;
53 typedef unsigned __int64 uint64_t;
54 #if !defined(_MSC_VER) || _MSC_VER<1400
55 typedef unsigned int size_t;
56 typedef int ssize_t;
57 #endif
58 #else
59 #include <stdint.h>
60 #endif
61 
62 
63 #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
64 #define JSONSL_STATE_GENERIC
65 #endif /* !defined JSONSL_STATE_GENERIC */
66 
67 #ifdef JSONSL_STATE_GENERIC
68 #define JSONSL_STATE_USER_FIELDS
69 #endif /* JSONSL_STATE_GENERIC */
70 
71 /* Additional fields for component object */
72 #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
73 #define JSONSL_JPR_COMPONENT_USER_FIELDS
74 #endif
75 
76 #ifndef JSONSL_API
77 /**
78  * We require a /DJSONSL_DLL so that users already using this as a static
79  * or embedded library don't get confused
80  */
81 #if defined(_WIN32) && defined(JSONSL_DLL)
82 #define JSONSL_API __declspec(dllexport)
83 #else
84 #define JSONSL_API
85 #endif /* _WIN32 */
86 
87 #endif /* !JSONSL_API */
88 
89 #ifndef JSONSL_INLINE
90 #if defined(_MSC_VER)
91   #define JSONSL_INLINE __inline
92   #elif defined(__GNUC__)
93   #define JSONSL_INLINE __inline__
94   #else
95   #define JSONSL_INLINE inline
96   #endif /* _MSC_VER or __GNUC__ */
97 #endif /* JSONSL_INLINE */
98 
99 #define JSONSL_MAX_LEVELS 512
100 
101 struct jsonsl_st;
102 typedef struct jsonsl_st *jsonsl_t;
103 
104 typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
105 
106 /**
107  * This flag is true when AND'd against a type whose value
108  * must be in "quoutes" i.e. T_HKEY and T_STRING
109  */
110 #define JSONSL_Tf_STRINGY 0xffff00
111 
112 /**
113  * Constant representing the special JSON types.
114  * The values are special and aid in speed (the OBJECT and LIST
115  * values are the char literals of their openings).
116  *
117  * Their actual value is a character which attempts to resemble
118  * some mnemonic reference to the actual type.
119  *
120  * If new types are added, they must fit into the ASCII printable
121  * range (so they should be AND'd with 0x7f and yield something
122  * meaningful)
123  */
124 #define JSONSL_XTYPE \
125     X(STRING,   '"'|JSONSL_Tf_STRINGY) \
126     X(HKEY,     '#'|JSONSL_Tf_STRINGY) \
127     X(OBJECT,   '{') \
128     X(LIST,     '[') \
129     X(SPECIAL,  '^') \
130     X(UESCAPE,  'u')
131 typedef enum {
132 #define X(o, c) \
133     JSONSL_T_##o = c,
134     JSONSL_XTYPE
135     JSONSL_T_UNKNOWN = '?',
136     /* Abstract 'root' object */
137     JSONSL_T_ROOT = 0
138 #undef X
139 } jsonsl_type_t;
140 
141 /**
142  * Subtypes for T_SPECIAL. We define them as flags
143  * because more than one type can be applied to a
144  * given object.
145  */
146 
147 #define JSONSL_XSPECIAL \
148     X(NONE, 0) \
149     X(SIGNED,       1<<0) \
150     X(UNSIGNED,     1<<1) \
151     X(TRUE,         1<<2) \
152     X(FALSE,        1<<3) \
153     X(NULL,         1<<4) \
154     X(FLOAT,        1<<5) \
155     X(EXPONENT,     1<<6) \
156     X(NONASCII,     1<<7) \
157     X(NAN,          1<<8) \
158     X(INF,          1<<9)
159 typedef enum {
160 #define X(o,b) \
161     JSONSL_SPECIALf_##o = b,
162     JSONSL_XSPECIAL
163 #undef X
164     /* Handy flags for checking */
165 
166     JSONSL_SPECIALf_UNKNOWN = 1 << 10,
167 
168     /** @private Private */
169     JSONSL_SPECIALf_ZERO    = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
170     /** @private */
171     JSONSL_SPECIALf_DASH    = 1 << 12,
172     /** @private */
173     JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
174     JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
175 
176     /** Type is numeric */
177     JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
178 
179     /** Type is a boolean */
180     JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
181 
182     /** Type is an "extended", not integral type (but numeric) */
183    JSONSL_SPECIALf_NUMNOINT =
184        (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
185         |JSONSL_SPECIALf_INF)
186 } jsonsl_special_t;
187 
188 
189 /**
190  * These are the various types of stack (or other) events
191  * which will trigger a callback.
192  * Like the type constants, this are also mnemonic
193  */
194 #define JSONSL_XACTION \
195     X(PUSH, '+') \
196     X(POP, '-') \
197     X(UESCAPE, 'U') \
198     X(ERROR, '!')
199 typedef enum {
200 #define X(a,c) \
201     JSONSL_ACTION_##a = c,
202     JSONSL_XACTION
203     JSONSL_ACTION_UNKNOWN = '?'
204 #undef X
205 } jsonsl_action_t;
206 
207 
208 /**
209  * Various errors which may be thrown while parsing JSON
210  */
211 #define JSONSL_XERR \
212 /* Trailing garbage characters */ \
213     X(GARBAGE_TRAILING) \
214 /* We were expecting a 'special' (numeric, true, false, null) */ \
215     X(SPECIAL_EXPECTED) \
216 /* The 'special' value was incomplete */ \
217     X(SPECIAL_INCOMPLETE) \
218 /* Found a stray token */ \
219     X(STRAY_TOKEN) \
220 /* We were expecting a token before this one */ \
221     X(MISSING_TOKEN) \
222 /* Cannot insert because the container is not ready */ \
223     X(CANT_INSERT) \
224 /* Found a '\' outside a string */ \
225     X(ESCAPE_OUTSIDE_STRING) \
226 /* Found a ':' outside of a hash */ \
227     X(KEY_OUTSIDE_OBJECT) \
228 /* found a string outside of a container */ \
229     X(STRING_OUTSIDE_CONTAINER) \
230 /* Found a null byte in middle of string */ \
231     X(FOUND_NULL_BYTE) \
232 /* Current level exceeds limit specified in constructor */ \
233     X(LEVELS_EXCEEDED) \
234 /* Got a } as a result of an opening [ or vice versa */ \
235     X(BRACKET_MISMATCH) \
236 /* We expected a key, but got something else instead */ \
237     X(HKEY_EXPECTED) \
238 /* We got an illegal control character (bad whitespace or something) */ \
239     X(WEIRD_WHITESPACE) \
240 /* Found a \u-escape, but there were less than 4 following hex digits */ \
241     X(UESCAPE_TOOSHORT) \
242 /* Invalid two-character escape */ \
243     X(ESCAPE_INVALID) \
244 /* Trailing comma */ \
245     X(TRAILING_COMMA) \
246 /* An invalid number was passed in a numeric field */ \
247     X(INVALID_NUMBER) \
248 /* Value is missing for object */ \
249     X(VALUE_EXPECTED) \
250 /* The following are for JPR Stuff */ \
251     \
252 /* Found a literal '%' but it was only followed by a single valid hex digit */ \
253     X(PERCENT_BADHEX) \
254 /* jsonpointer URI is malformed '/' */ \
255     X(JPR_BADPATH) \
256 /* Duplicate slash */ \
257     X(JPR_DUPSLASH) \
258 /* No leading root */ \
259     X(JPR_NOROOT) \
260 /* Allocation failure */ \
261     X(ENOMEM) \
262 /* Invalid unicode codepoint detected (in case of escapes) */ \
263     X(INVALID_CODEPOINT)
264 
265 typedef enum {
266     JSONSL_ERROR_SUCCESS = 0,
267 #define X(e) \
268     JSONSL_ERROR_##e,
269     JSONSL_XERR
270 #undef X
271     JSONSL_ERROR_GENERIC
272 } jsonsl_error_t;
273 
274 
275 /**
276  * A state is a single level of the stack.
277  * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
278  * will remain in tact until the item is popped.
279  *
280  * As a result, it means a parent state object may be accessed from a child
281  * object, (the parents fields will all be valid). This allows a user to create
282  * an ad-hoc hierarchy on top of the JSON one.
283  *
284  */
285 struct jsonsl_state_st {
286     /**
287      * The JSON object type
288      */
289     unsigned type;
290 
291     /** If this element is special, then its extended type is here */
292     unsigned special_flags;
293 
294     /**
295      * The position (in terms of number of bytes since the first call to
296      * jsonsl_feed()) at which the state was first pushed. This includes
297      * opening tokens, if applicable.
298      *
299      * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
300      * be the position of the first quote.
301      *
302      * @see jsonsl_st::pos which contains the _current_ position and can be
303      * used during a POP callback to get the length of the element.
304      */
305     size_t pos_begin;
306 
307     /**FIXME: This is redundant as the same information can be derived from
308      * jsonsl_st::pos at pop-time */
309     size_t pos_cur;
310 
311     /**
312      * Level of recursion into nesting. This is mainly a convenience
313      * variable, as this can technically be deduced from the lexer's
314      * level parameter (though the logic is not that simple)
315      */
316     unsigned int level;
317 
318 
319     /**
320      * how many elements in the object/list.
321      * For objects (hashes), an element is either
322      * a key or a value. Thus for one complete pair,
323      * nelem will be 2.
324      *
325      * For special types, this will hold the sum of the digits.
326      * This only holds true for values which are simple signed/unsigned
327      * numbers. Otherwise a special flag is set, and extra handling is not
328      * performed.
329      */
330     uint64_t nelem;
331 
332 
333 
334     /*TODO: merge this and special_flags into a union */
335 
336 
337     /**
338      * Useful for an opening nest, this will prevent a callback from being
339      * invoked on this item or any of its children
340      */
341     int ignore_callback;
342 
343     /**
344      * Counter which is incremented each time an escape ('\') is encountered.
345      * This is used internally for non-string types and should only be
346      * inspected by the user if the state actually represents a string
347      * type.
348      */
349     unsigned int nescapes;
350 
351     /**
352      * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
353      * the macro expansion happens here.
354      *
355      * You can use these fields to store hierarchical or 'tagging' information
356      * for specific objects.
357      *
358      * See the documentation above for the lifetime of the state object (i.e.
359      * if the private data points to allocated memory, it should be freed
360      * when the object is popped, as the state object will be re-used)
361      */
362 #ifndef JSONSL_STATE_GENERIC
363     JSONSL_STATE_USER_FIELDS
364 #else
365 
366     /**
367      * Otherwise, this is a simple void * pointer for anything you want
368      */
369     void *data;
370 #endif /* JSONSL_STATE_USER_FIELDS */
371 };
372 
373 /**Gets the number of elements in the list.
374  * @param st The state. Must be of type JSONSL_T_LIST
375  * @return number of elements in the list
376  */
377 #define JSONSL_LIST_SIZE(st) ((st)->nelem)
378 
379 /**Gets the number of key-value pairs in an object
380  * @param st The state. Must be of type JSONSL_T_OBJECT
381  * @return the number of key-value pairs in the object
382  */
383 #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
384 
385 /**Gets the numeric value.
386  * @param st The state. Must be of type JSONSL_T_SPECIAL and
387  *           special_flags must have the JSONSL_SPECIALf_NUMERIC flag
388  *           set.
389  * @return the numeric value of the state.
390  */
391 #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
392 
393 /*
394  * So now we need some special structure for keeping the
395  * JPR info in sync. Preferrably all in a single block
396  * of memory (there's no need for separate allocations.
397  * So we will define a 'table' with the following layout
398  *
399  * Level    nPosbl  JPR1_last   JPR2_last   JPR3_last
400  *
401  * 0        1       NOMATCH     POSSIBLE    POSSIBLE
402  * 1        0       NOMATCH     NOMATCH     COMPLETE
403  * [ table ends here because no further path is possible]
404  *
405  * Where the JPR..n corresponds to the number of JPRs
406  * requested, and nPosble is a quick flag to determine
407  *
408  * the number of possibilities. In the future this might
409  * be made into a proper 'jump' table,
410  *
411  * Since we always mark JPRs from the higher levels descending
412  * into the lower ones, a prospective child match would first
413  * look at the parent table to check the possibilities, and then
414  * see which ones were possible..
415  *
416  * Thus, the size of this blob would be (and these are all ints here)
417  * nLevels * nJPR * 2.
418  *
419  * the 'Width' of the table would be nJPR*2, and the 'height' would be
420  * nlevels
421  */
422 
423 /**
424  * This is called when a stack change ocurs.
425  *
426  * @param jsn The lexer
427  * @param action The type of action, this can be PUSH or POP
428  * @param state A pointer to the stack currently affected by the action
429  * @param at A pointer to the position of the input buffer which triggered
430  * this action.
431  */
432 typedef void (*jsonsl_stack_callback)(
433         jsonsl_t jsn,
434         jsonsl_action_t action,
435         struct jsonsl_state_st* state,
436         const jsonsl_char_t *at);
437 
438 
439 /**
440  * This is called when an error is encountered.
441  * Sometimes it's possible to 'erase' characters (by replacing them
442  * with whitespace). If you think you have corrected the error, you
443  * can return a true value, in which case the parser will backtrack
444  * and try again.
445  *
446  * @param jsn The lexer
447  * @param error The error which was thrown
448  * @param state the current state
449  * @param a pointer to the position of the input buffer which triggered
450  * the error. Note that this is not const, this is because you have the
451  * possibility of modifying the character in an attempt to correct the
452  * error
453  *
454  * @return zero to bail, nonzero to try again (this only makes sense if
455  * the input buffer has been modified by this callback)
456  */
457 typedef int (*jsonsl_error_callback)(
458         jsonsl_t jsn,
459         jsonsl_error_t error,
460         struct jsonsl_state_st* state,
461         jsonsl_char_t *at);
462 
463 struct jsonsl_st {
464     /** Public, read-only */
465 
466     /** This is the current level of the stack */
467     unsigned int level;
468 
469     /** Flag set to indicate we should stop processing */
470     unsigned int stopfl;
471 
472     /**
473      * This is the current position, relative to the beginning
474      * of the stream.
475      */
476     size_t pos;
477 
478     /** This is the 'bytes' variable passed to feed() */
479     const jsonsl_char_t *base;
480 
481     /** Callback invoked for PUSH actions */
482     jsonsl_stack_callback action_callback_PUSH;
483 
484     /** Callback invoked for POP actions */
485     jsonsl_stack_callback action_callback_POP;
486 
487     /** Default callback for any action, if neither PUSH or POP callbacks are defined */
488     jsonsl_stack_callback action_callback;
489 
490     /**
491      * Do not invoke callbacks for objects deeper than this level.
492      * NOTE: This field establishes the lower bound for ignored callbacks,
493      * and is thus misnamed. `min_ignore_level` would actually make more
494      * sense, but we don't want to break API.
495      */
496     unsigned int max_callback_level;
497 
498     /** The error callback. Invoked when an error happens. Should not be NULL */
499     jsonsl_error_callback error_callback;
500 
501     /* these are boolean flags you can modify. You will be called
502      * about notification for each of these types if the corresponding
503      * variable is true.
504      */
505 
506     /**
507      * @name Callback Booleans.
508      * These determine whether a callback is to be invoked for certain types of objects
509      * @{*/
510 
511     /** Boolean flag to enable or disable the invokcation for events on this type*/
512     int call_SPECIAL;
513     int call_OBJECT;
514     int call_LIST;
515     int call_STRING;
516     int call_HKEY;
517     /*@}*/
518 
519     /**
520      * @name u-Escape handling
521      * Special handling for the \\u-f00d type sequences. These are meant
522      * to be translated back into the corresponding octet(s).
523      * A special callback (if set) is invoked with *at=='u'. An application
524      * may wish to temporarily suspend parsing and handle the 'u-' sequence
525      * internally (or not).
526      */
527 
528      /*@{*/
529 
530     /** Callback to be invoked for a u-escape */
531     jsonsl_stack_callback action_callback_UESCAPE;
532 
533     /** Boolean flag, whether to invoke the callback */
534     int call_UESCAPE;
535 
536     /** Boolean flag, whether we should return after encountering a u-escape:
537      * the callback is invoked and then we return if this is true
538      */
539     int return_UESCAPE;
540     /*@}*/
541 
542     struct {
543         int allow_trailing_comma;
544     } options;
545 
546     /** Put anything here */
547     void *data;
548 
549     /*@{*/
550     /** Private */
551     int in_escape;
552     char expecting;
553     char tok_last;
554     int can_insert;
555     unsigned int levels_max;
556 
557 #ifndef JSONSL_NO_JPR
558     size_t jpr_count;
559     jsonsl_jpr_t *jprs;
560 
561     /* Root pointer for JPR matching information */
562     size_t *jpr_root;
563 #endif /* JSONSL_NO_JPR */
564     /*@}*/
565 
566     /**
567      * This is the stack. Its upper bound is levels_max, or the
568      * nlevels argument passed to jsonsl_new. If you modify this structure,
569      * make sure that this member is last.
570      */
571     struct jsonsl_state_st stack[1];
572 };
573 
574 
575 /**
576  * Creates a new lexer object, with capacity for recursion up to nlevels
577  *
578  * @param nlevels maximum recursion depth
579  */
580 JSONSL_API
581 jsonsl_t jsonsl_new(int nlevels);
582 
583 /**
584  * Feeds data into the lexer.
585  *
586  * @param jsn the lexer object
587  * @param bytes new data to be fed
588  * @param nbytes size of new data
589  */
590 JSONSL_API
591 void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
592 
593 /**
594  * Resets the internal parser state. This does not free the parser
595  * but does clean it internally, so that the next time feed() is called,
596  * it will be treated as a new stream
597  *
598  * @param jsn the lexer
599  */
600 JSONSL_API
601 void jsonsl_reset(jsonsl_t jsn);
602 
603 /**
604  * Frees the lexer, cleaning any allocated memory taken
605  *
606  * @param jsn the lexer
607  */
608 JSONSL_API
609 void jsonsl_destroy(jsonsl_t jsn);
610 
611 /**
612  * Gets the 'parent' element, given the current one
613  *
614  * @param jsn the lexer
615  * @param cur the current nest, which should be a struct jsonsl_nest_st
616  */
617 static JSONSL_INLINE
jsonsl_last_state(const jsonsl_t jsn,const struct jsonsl_state_st * state)618 struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
619                                           const struct jsonsl_state_st *state)
620 {
621     /* Don't complain about overriding array bounds */
622     if (state->level > 1) {
623         return jsn->stack + state->level - 1;
624     } else {
625         return NULL;
626     }
627 }
628 
629 /**
630  * Gets the state of the last fully consumed child of this parent. This is
631  * only valid in the parent's POP callback.
632  *
633  * @param the lexer
634  * @return A pointer to the child.
635  */
636 static JSONSL_INLINE
jsonsl_last_child(const jsonsl_t jsn,const struct jsonsl_state_st * parent)637 struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
638                                           const struct jsonsl_state_st *parent)
639 {
640     return jsn->stack + (parent->level + 1);
641 }
642 
643 /**Call to instruct the parser to stop parsing and return. This is valid
644  * only from within a callback */
645 static JSONSL_INLINE
jsonsl_stop(jsonsl_t jsn)646 void jsonsl_stop(jsonsl_t jsn)
647 {
648     jsn->stopfl = 1;
649 }
650 
651 /**
652  * This enables receiving callbacks on all events. Doesn't do
653  * anything special but helps avoid some boilerplate.
654  * This does not touch the UESCAPE callbacks or flags.
655  */
656 static JSONSL_INLINE
jsonsl_enable_all_callbacks(jsonsl_t jsn)657 void jsonsl_enable_all_callbacks(jsonsl_t jsn)
658 {
659     jsn->call_HKEY = 1;
660     jsn->call_STRING = 1;
661     jsn->call_OBJECT = 1;
662     jsn->call_SPECIAL = 1;
663     jsn->call_LIST = 1;
664 }
665 
666 /**
667  * A macro which returns true if the current state object can
668  * have children. This means a list type or an object type.
669  */
670 #define JSONSL_STATE_IS_CONTAINER(state) \
671         (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
672 
673 /**
674  * These two functions, dump a string representation
675  * of the error or type, respectively. They will never
676  * return NULL
677  */
678 JSONSL_API
679 const char* jsonsl_strerror(jsonsl_error_t err);
680 JSONSL_API
681 const char* jsonsl_strtype(jsonsl_type_t jt);
682 
683 /**
684  * Dumps global metrics to the screen. This is a noop unless
685  * jsonsl was compiled with JSONSL_USE_METRICS
686  */
687 JSONSL_API
688 void jsonsl_dump_global_metrics(void);
689 
690 /* This macro just here for editors to do code folding */
691 #ifndef JSONSL_NO_JPR
692 
693 /**
694  * @name JSON Pointer API
695  *
696  * JSONPointer API. This isn't really related to the lexer (at least not yet)
697  * JSONPointer provides an extremely simple specification for providing
698  * locations within JSON objects. We will extend it a bit and allow for
699  * providing 'wildcard' characters by which to be able to 'query' the stream.
700  *
701  * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
702  *
703  * Currently I'm implementing the 'single query' API which can only use a single
704  * query component. In the future I will integrate my yet-to-be-published
705  * Boyer-Moore-esque prefix searching implementation, in order to allow
706  * multiple paths to be merged into one for quick and efficient searching.
707  *
708  *
709  * JPR (as we'll refer to it within the source) can be used by splitting
710  * the components into mutliple sections, and incrementally 'track' each
711  * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
712  * callback for an object, we will check to see whether the index matching
713  * the component corresponding to the current level contains a match
714  * for our path.
715  *
716  * In order to do this properly, a structure must be maintained within the
717  * parent indicating whether its children are possible matches. This flag
718  * will be 'inherited' by call children which may conform to the match
719  * specification, and discarded by all which do not (thereby eliminating
720  * their children from inheriting it).
721  *
722  * A successful match is a complete one. One can provide multiple paths with
723  * multiple levels of matches e.g.
724  *  /foo/bar/baz/^/blah
725  *
726  *  @{
727  */
728 
729 /** The wildcard character */
730 #ifndef JSONSL_PATH_WILDCARD_CHAR
731 #define JSONSL_PATH_WILDCARD_CHAR '^'
732 #endif /* WILDCARD_CHAR */
733 
734 #define JSONSL_XMATCH \
735     X(COMPLETE,1) \
736     X(POSSIBLE,0) \
737     X(NOMATCH,-1) \
738     X(TYPE_MISMATCH, -2)
739 
740 typedef enum {
741 
742 #define X(T,v) \
743     JSONSL_MATCH_##T = v,
744     JSONSL_XMATCH
745 
746 #undef X
747     JSONSL_MATCH_UNKNOWN
748 } jsonsl_jpr_match_t;
749 
750 typedef enum {
751     JSONSL_PATH_STRING = 1,
752     JSONSL_PATH_WILDCARD,
753     JSONSL_PATH_NUMERIC,
754     JSONSL_PATH_ROOT,
755 
756     /* Special */
757     JSONSL_PATH_INVALID = -1,
758     JSONSL_PATH_NONE = 0
759 } jsonsl_jpr_type_t;
760 
761 struct jsonsl_jpr_component_st {
762     /** The string the component points to */
763     char *pstr;
764     /** if this is a numeric type, the number is 'cached' here */
765     unsigned long idx;
766     /** The length of the string */
767     size_t len;
768     /** The type of component (NUMERIC or STRING) */
769     jsonsl_jpr_type_t ptype;
770 
771     /** Set this to true to enforce type checking between dict keys and array
772      * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
773      * that an array index is actually a child of a dictionary. */
774     short is_arridx;
775 
776     /* Extra fields (for more advanced searches. Default is empty) */
777     JSONSL_JPR_COMPONENT_USER_FIELDS
778 };
779 
780 struct jsonsl_jpr_st {
781     /** Path components */
782     struct jsonsl_jpr_component_st *components;
783     size_t ncomponents;
784 
785     /**Type of the match to be expected. If nonzero, will be compared against
786      * the actual type */
787     unsigned match_type;
788 
789     /** Base of allocated string for components */
790     char *basestr;
791 
792     /** The original match string. Useful for returning to the user */
793     char *orig;
794     size_t norig;
795 };
796 
797 /**
798  * Create a new JPR object.
799  *
800  * @param path the JSONPointer path specification.
801  * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
802  * then more details will be in this variable.
803  *
804  * @return a new jsonsl_jpr_t object, or NULL on error.
805  */
806 JSONSL_API
807 jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
808 
809 /**
810  * Destroy a JPR object
811  */
812 JSONSL_API
813 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
814 
815 /**
816  * Match a JSON object against a type and specific level
817  *
818  * @param jpr the JPR object
819  * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
820  * @param parent_level the level of the parent
821  * @param key the 'key' of the child. If the parent is an array, this should be
822  * empty.
823  * @param nkey - the length of the key. If the parent is an array (T_LIST), then
824  * this should be the current index.
825  *
826  * NOTE: The key of the child means any kind of associative data related to the
827  * element. Thus: <<< { "foo" : [ >>,
828  * the opening array's key is "foo".
829  *
830  * @return a status constant. This indicates whether a match was excluded, possible,
831  * or successful.
832  */
833 JSONSL_API
834 jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
835                                     unsigned int parent_type,
836                                     unsigned int parent_level,
837                                     const char *key, size_t nkey);
838 
839 /**
840  * Alternate matching algorithm. This matching algorithm does not use
841  * JSONPointer but relies on a more structured searching mechanism. It
842  * assumes that there is a clear distinction between array indices and
843  * object keys. In this case, the jsonsl_path_component_st::ptype should
844  * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
845  * jsonsl_path_comonent_st::is_arridx field will be removed in a future
846  * version).
847  *
848  * @param jpr The path
849  * @param parent The parent structure. Can be NULL if this is the root object
850  * @param child The child structure. Should not be NULL
851  * @param key Object key, if an object
852  * @param nkey Length of object key
853  * @return Status constant if successful
854  *
855  * @note
856  * For successful matching, both the key and the path itself should be normalized
857  * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
858  * should currently be done in the application. Another version of this function
859  * may use a temporary buffer in such circumstances (allocated by the application).
860  *
861  * Since this function also checks the state of the child, it should only
862  * be called on PUSH callbacks, and not POP callbacks
863  */
864 JSONSL_API
865 jsonsl_jpr_match_t
866 jsonsl_path_match(jsonsl_jpr_t jpr,
867                   const struct jsonsl_state_st *parent,
868                   const struct jsonsl_state_st *child,
869                   const char *key, size_t nkey);
870 
871 
872 /**
873  * Associate a set of JPR objects with a lexer instance.
874  * This should be called before the lexer has been fed any data (and
875  * behavior is undefined if you don't adhere to this).
876  *
877  * After using this function, you may subsequently call match_state() on
878  * given states (presumably from within the callbacks).
879  *
880  * Note that currently the first JPR is the quickest and comes
881  * pre-allocated with the state structure. Further JPR objects
882  * are chained.
883  *
884  * @param jsn The lexer
885  * @param jprs An array of jsonsl_jpr_t objects
886  * @param njprs How many elements in the jprs array.
887  */
888 JSONSL_API
889 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
890                                  jsonsl_jpr_t *jprs,
891                                  size_t njprs);
892 
893 /**
894  * This follows the same semantics as the normal match,
895  * except we infer parent and type information from the relevant state objects.
896  * The match status (for all possible JPR objects) is set in the *out parameter.
897  *
898  * If a match has succeeded, then its JPR object will be returned. In all other
899  * instances, NULL is returned;
900  *
901  * @param jpr The jsonsl_jpr_t handle
902  * @param state The jsonsl_state_st which is a candidate
903  * @param key The hash key (if applicable, can be NULL if parent is list)
904  * @param nkey Length of hash key (if applicable, can be zero if parent is list)
905  * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
906  * the match result
907  *
908  * @return If a match was completed in full, then the JPR object containing
909  * the matching path will be returned. Otherwise, the return is NULL (note, this
910  * does not mean matching has failed, it can still be part of the match: check
911  * the out parameter).
912  */
913 JSONSL_API
914 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
915                                     struct jsonsl_state_st *state,
916                                     const char *key,
917                                     size_t nkey,
918                                     jsonsl_jpr_match_t *out);
919 
920 
921 /**
922  * Cleanup any memory allocated and any states set by
923  * match_state_init() and match_state()
924  * @param jsn The lexer
925  */
926 JSONSL_API
927 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
928 
929 /**
930  * Return a string representation of the match result returned by match()
931  */
932 JSONSL_API
933 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
934 
935 /* @}*/
936 
937 /**
938  * Utility function to convert escape sequences into their original form.
939  *
940  * The decoders I've sampled do not seem to specify a standard behavior of what
941  * to escape/unescape.
942  *
943  * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
944  * characters (0x00-0x1f) be escaped. It is often common for applications
945  * to escape a '/' - however this may also be desired behavior. the JSON
946  * spec is not clear on this, and therefore jsonsl leaves it up to you.
947  *
948  * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
949  * true when dealing with 'u-escapes' which can be expressed perfectly fine
950  * as utf8. One use case for normalization is JPR string comparison, in which
951  * case two effectively equivalent strings may not match because one is using
952  * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
953  * an empty `toEscape` table, enabling only the `u` index.
954  *
955  * @param in The input string.
956  * @param out An allocated output (should be the same size as in)
957  * @param len the size of the buffer
958  * @param toEscape - A sparse array of characters to unescape. Characters
959  * which are not present in this array, e.g. toEscape['c'] == 0 will be
960  * ignored and passed to the output in their original form.
961  * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
962  * then this variable will have the SPECIALf_NONASCII flag on.
963  *
964  * @param err A pointer to an error variable. If an error ocurrs, it will be
965  * set in this variable
966  * @param errat If not null and an error occurs, this will be set to point
967  * to the position within the string at which the offending character was
968  * encountered.
969  *
970  * @return The effective size of the output buffer.
971  *
972  * @note
973  * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
974  * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
975  * which while still correct in some sense was confusing for many (especially
976  * considering that the inputs were variations of char).
977  *
978  * @note
979  * The output buffer will never be larger than the input buffer, since
980  * standard escape sequences (i.e. '\t') occupy two bytes in the source
981  * but only one byte (when unescaped) in the output. Likewise u-escapes
982  * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
983  * two bytes when escaped.
984  */
985 JSONSL_API
986 size_t jsonsl_util_unescape_ex(const char *in,
987                                char *out,
988                                size_t len,
989                                const int toEscape[128],
990                                unsigned *oflags,
991                                jsonsl_error_t *err,
992                                const char **errat);
993 
994 /**
995  * Convenience macro to avoid passing too many parameters
996  */
997 #define jsonsl_util_unescape(in, out, len, toEscape, err) \
998     jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
999 
1000 #endif /* JSONSL_NO_JPR */
1001 
1002 #ifdef __cplusplus
1003 }
1004 #endif /* __cplusplus */
1005 
1006 #endif /* JSONSL_H_ */
1007