1 /**
2  * JSON Simple/Stacked/Stateful Lexer.
3  * - Does not buffer data
4  * - Maintains state
5  * - Callback oriented
6  * - Lightweight and fast. One source file and one header file
7  *
8  * Copyright (C) 2012-2015 Mark Nunberg
9  * See included LICENSE file for license details.
10  */
11 
12 #ifndef JSONSL_H_
13 #define JSONSL_H_
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <stddef.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <wchar.h>
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif /* __cplusplus */
25 
26 #ifdef JSONSL_USE_WCHAR
27 typedef jsonsl_char_t wchar_t;
28 typedef jsonsl_uchar_t unsigned wchar_t;
29 #else
30 typedef char jsonsl_char_t;
31 typedef unsigned char jsonsl_uchar_t;
32 #endif /* JSONSL_USE_WCHAR */
33 
34 /* Stolen from http-parser.h, and possibly others */
35 #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
36 typedef __int8 int8_t;
37 typedef unsigned __int8 uint8_t;
38 typedef __int16 int16_t;
39 typedef unsigned __int16 uint16_t;
40 typedef __int32 int32_t;
41 typedef unsigned __int32 uint32_t;
42 typedef __int64 int64_t;
43 typedef unsigned __int64 uint64_t;
44 #if !defined(_MSC_VER) || _MSC_VER<1400
45 typedef unsigned int size_t;
46 typedef int ssize_t;
47 #endif
48 #else
49 #include <stdint.h>
50 #endif
51 
52 
53 #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
54 #define JSONSL_STATE_GENERIC
55 #endif /* !defined JSONSL_STATE_GENERIC */
56 
57 #ifdef JSONSL_STATE_GENERIC
58 #define JSONSL_STATE_USER_FIELDS
59 #endif /* JSONSL_STATE_GENERIC */
60 
61 /* Additional fields for component object */
62 #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
63 #define JSONSL_JPR_COMPONENT_USER_FIELDS
64 #endif
65 
66 #ifndef JSONSL_API
67 /**
68  * We require a /DJSONSL_DLL so that users already using this as a static
69  * or embedded library don't get confused
70  */
71 #if defined(_WIN32) && defined(JSONSL_DLL)
72 #define JSONSL_API __declspec(dllexport)
73 #else
74 #define JSONSL_API
75 #endif /* _WIN32 */
76 
77 #endif /* !JSONSL_API */
78 
79 #ifndef JSONSL_INLINE
80 #if defined(_MSC_VER)
81   #define JSONSL_INLINE __inline
82   #elif defined(__GNUC__)
83   #define JSONSL_INLINE __inline__
84   #else
85   #define JSONSL_INLINE inline
86   #endif /* _MSC_VER or __GNUC__ */
87 #endif /* JSONSL_INLINE */
88 
89 #define JSONSL_MAX_LEVELS 512
90 
91 struct jsonsl_st;
92 typedef struct jsonsl_st *jsonsl_t;
93 
94 typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
95 
96 /**
97  * This flag is true when AND'd against a type whose value
98  * must be in "quoutes" i.e. T_HKEY and T_STRING
99  */
100 #define JSONSL_Tf_STRINGY 0xffff00
101 
102 /**
103  * Constant representing the special JSON types.
104  * The values are special and aid in speed (the OBJECT and LIST
105  * values are the char literals of their openings).
106  *
107  * Their actual value is a character which attempts to resemble
108  * some mnemonic reference to the actual type.
109  *
110  * If new types are added, they must fit into the ASCII printable
111  * range (so they should be AND'd with 0x7f and yield something
112  * meaningful)
113  */
114 #define JSONSL_XTYPE \
115     X(STRING,   '"'|JSONSL_Tf_STRINGY) \
116     X(HKEY,     '#'|JSONSL_Tf_STRINGY) \
117     X(OBJECT,   '{') \
118     X(LIST,     '[') \
119     X(SPECIAL,  '^') \
120     X(UESCAPE,  'u')
121 typedef enum {
122 #define X(o, c) \
123     JSONSL_T_##o = c,
124     JSONSL_XTYPE
125     JSONSL_T_UNKNOWN = '?',
126     /* Abstract 'root' object */
127     JSONSL_T_ROOT = 0
128 #undef X
129 } jsonsl_type_t;
130 
131 /**
132  * Subtypes for T_SPECIAL. We define them as flags
133  * because more than one type can be applied to a
134  * given object.
135  */
136 
137 #define JSONSL_XSPECIAL \
138     X(NONE, 0) \
139     X(SIGNED,       1<<0) \
140     X(UNSIGNED,     1<<1) \
141     X(TRUE,         1<<2) \
142     X(FALSE,        1<<3) \
143     X(NULL,         1<<4) \
144     X(FLOAT,        1<<5) \
145     X(EXPONENT,     1<<6) \
146     X(NONASCII,     1<<7)
147 typedef enum {
148 #define X(o,b) \
149     JSONSL_SPECIALf_##o = b,
150     JSONSL_XSPECIAL
151 #undef X
152     /* Handy flags for checking */
153 
154     JSONSL_SPECIALf_UNKNOWN = 1 << 8,
155 
156     /** @private Private */
157     JSONSL_SPECIALf_ZERO    = 1 << 9 | JSONSL_SPECIALf_UNSIGNED,
158     /** @private */
159     JSONSL_SPECIALf_DASH    = 1 << 10,
160 
161     /** Type is numeric */
162     JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
163 
164     /** Type is a boolean */
165     JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
166 
167     /** Type is an "extended", not integral type (but numeric) */
168     JSONSL_SPECIALf_NUMNOINT = (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT)
169 } jsonsl_special_t;
170 
171 
172 /**
173  * These are the various types of stack (or other) events
174  * which will trigger a callback.
175  * Like the type constants, this are also mnemonic
176  */
177 #define JSONSL_XACTION \
178     X(PUSH, '+') \
179     X(POP, '-') \
180     X(UESCAPE, 'U') \
181     X(ERROR, '!')
182 typedef enum {
183 #define X(a,c) \
184     JSONSL_ACTION_##a = c,
185     JSONSL_XACTION
186     JSONSL_ACTION_UNKNOWN = '?'
187 #undef X
188 } jsonsl_action_t;
189 
190 
191 /**
192  * Various errors which may be thrown while parsing JSON
193  */
194 #define JSONSL_XERR \
195 /* Trailing garbage characters */ \
196     X(GARBAGE_TRAILING) \
197 /* We were expecting a 'special' (numeric, true, false, null) */ \
198     X(SPECIAL_EXPECTED) \
199 /* The 'special' value was incomplete */ \
200     X(SPECIAL_INCOMPLETE) \
201 /* Found a stray token */ \
202     X(STRAY_TOKEN) \
203 /* We were expecting a token before this one */ \
204     X(MISSING_TOKEN) \
205 /* Cannot insert because the container is not ready */ \
206     X(CANT_INSERT) \
207 /* Found a '\' outside a string */ \
208     X(ESCAPE_OUTSIDE_STRING) \
209 /* Found a ':' outside of a hash */ \
210     X(KEY_OUTSIDE_OBJECT) \
211 /* found a string outside of a container */ \
212     X(STRING_OUTSIDE_CONTAINER) \
213 /* Found a null byte in middle of string */ \
214     X(FOUND_NULL_BYTE) \
215 /* Current level exceeds limit specified in constructor */ \
216     X(LEVELS_EXCEEDED) \
217 /* Got a } as a result of an opening [ or vice versa */ \
218     X(BRACKET_MISMATCH) \
219 /* We expected a key, but got something else instead */ \
220     X(HKEY_EXPECTED) \
221 /* We got an illegal control character (bad whitespace or something) */ \
222     X(WEIRD_WHITESPACE) \
223 /* Found a \u-escape, but there were less than 4 following hex digits */ \
224     X(UESCAPE_TOOSHORT) \
225 /* Invalid two-character escape */ \
226     X(ESCAPE_INVALID) \
227 /* Trailing comma */ \
228     X(TRAILING_COMMA) \
229 /* An invalid number was passed in a numeric field */ \
230     X(INVALID_NUMBER) \
231 /* Value is missing for object */ \
232     X(VALUE_EXPECTED) \
233 /* The following are for JPR Stuff */ \
234     \
235 /* Found a literal '%' but it was only followed by a single valid hex digit */ \
236     X(PERCENT_BADHEX) \
237 /* jsonpointer URI is malformed '/' */ \
238     X(JPR_BADPATH) \
239 /* Duplicate slash */ \
240     X(JPR_DUPSLASH) \
241 /* No leading root */ \
242     X(JPR_NOROOT) \
243 /* Allocation failure */ \
244     X(ENOMEM)
245 
246 typedef enum {
247     JSONSL_ERROR_SUCCESS = 0,
248 #define X(e) \
249     JSONSL_ERROR_##e,
250     JSONSL_XERR
251 #undef X
252     JSONSL_ERROR_GENERIC
253 } jsonsl_error_t;
254 
255 
256 /**
257  * A state is a single level of the stack.
258  * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
259  * will remain in tact until the item is popped.
260  *
261  * As a result, it means a parent state object may be accessed from a child
262  * object, (the parents fields will all be valid). This allows a user to create
263  * an ad-hoc hierarchy on top of the JSON one.
264  *
265  */
266 struct jsonsl_state_st {
267     /**
268      * The JSON object type
269      */
270     unsigned type;
271 
272     /** If this element is special, then its extended type is here */
273     unsigned special_flags;
274 
275     /**
276      * The position (in terms of number of bytes since the first call to
277      * jsonsl_feed()) at which the state was first pushed. This includes
278      * opening tokens, if applicable.
279      *
280      * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
281      * be the position of the first quote.
282      *
283      * @see jsonsl_st::pos which contains the _current_ position and can be
284      * used during a POP callback to get the length of the element.
285      */
286     size_t pos_begin;
287 
288     /**FIXME: This is redundant as the same information can be derived from
289      * jsonsl_st::pos at pop-time */
290     size_t pos_cur;
291 
292     /**
293      * Level of recursion into nesting. This is mainly a convenience
294      * variable, as this can technically be deduced from the lexer's
295      * level parameter (though the logic is not that simple)
296      */
297     unsigned int level;
298 
299 
300     /**
301      * how many elements in the object/list.
302      * For objects (hashes), an element is either
303      * a key or a value. Thus for one complete pair,
304      * nelem will be 2.
305      *
306      * For special types, this will hold the sum of the digits.
307      * This only holds true for values which are simple signed/unsigned
308      * numbers. Otherwise a special flag is set, and extra handling is not
309      * performed.
310      */
311     uint64_t nelem;
312 
313 
314 
315     /*TODO: merge this and special_flags into a union */
316 
317 
318     /**
319      * Useful for an opening nest, this will prevent a callback from being
320      * invoked on this item or any of its children
321      */
322     int ignore_callback;
323 
324     /**
325      * Counter which is incremented each time an escape ('\') is encountered.
326      * This is used internally for non-string types and should only be
327      * inspected by the user if the state actually represents a string
328      * type.
329      */
330     unsigned int nescapes;
331 
332     /**
333      * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
334      * the macro expansion happens here.
335      *
336      * You can use these fields to store hierarchical or 'tagging' information
337      * for specific objects.
338      *
339      * See the documentation above for the lifetime of the state object (i.e.
340      * if the private data points to allocated memory, it should be freed
341      * when the object is popped, as the state object will be re-used)
342      */
343 #ifndef JSONSL_STATE_GENERIC
344     JSONSL_STATE_USER_FIELDS
345 #else
346 
347     /**
348      * Otherwise, this is a simple void * pointer for anything you want
349      */
350     void *data;
351 #endif /* JSONSL_STATE_USER_FIELDS */
352 };
353 
354 /**Gets the number of elements in the list.
355  * @param st The state. Must be of type JSONSL_T_LIST
356  * @return number of elements in the list
357  */
358 #define JSONSL_LIST_SIZE(st) ((st)->nelem)
359 
360 /**Gets the number of key-value pairs in an object
361  * @param st The state. Must be of type JSONSL_T_OBJECT
362  * @return the number of key-value pairs in the object
363  */
364 #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
365 
366 /**Gets the numeric value.
367  * @param st The state. Must be of type JSONSL_T_SPECIAL and
368  *           special_flags must have the JSONSL_SPECIALf_NUMERIC flag
369  *           set.
370  * @return the numeric value of the state.
371  */
372 #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
373 
374 /*
375  * So now we need some special structure for keeping the
376  * JPR info in sync. Preferrably all in a single block
377  * of memory (there's no need for separate allocations.
378  * So we will define a 'table' with the following layout
379  *
380  * Level    nPosbl  JPR1_last   JPR2_last   JPR3_last
381  *
382  * 0        1       NOMATCH     POSSIBLE    POSSIBLE
383  * 1        0       NOMATCH     NOMATCH     COMPLETE
384  * [ table ends here because no further path is possible]
385  *
386  * Where the JPR..n corresponds to the number of JPRs
387  * requested, and nPosble is a quick flag to determine
388  *
389  * the number of possibilities. In the future this might
390  * be made into a proper 'jump' table,
391  *
392  * Since we always mark JPRs from the higher levels descending
393  * into the lower ones, a prospective child match would first
394  * look at the parent table to check the possibilities, and then
395  * see which ones were possible..
396  *
397  * Thus, the size of this blob would be (and these are all ints here)
398  * nLevels * nJPR * 2.
399  *
400  * the 'Width' of the table would be nJPR*2, and the 'height' would be
401  * nlevels
402  */
403 
404 /**
405  * This is called when a stack change ocurs.
406  *
407  * @param jsn The lexer
408  * @param action The type of action, this can be PUSH or POP
409  * @param state A pointer to the stack currently affected by the action
410  * @param at A pointer to the position of the input buffer which triggered
411  * this action.
412  */
413 typedef void (*jsonsl_stack_callback)(
414         jsonsl_t jsn,
415         jsonsl_action_t action,
416         struct jsonsl_state_st* state,
417         const jsonsl_char_t *at);
418 
419 
420 /**
421  * This is called when an error is encountered.
422  * Sometimes it's possible to 'erase' characters (by replacing them
423  * with whitespace). If you think you have corrected the error, you
424  * can return a true value, in which case the parser will backtrack
425  * and try again.
426  *
427  * @param jsn The lexer
428  * @param error The error which was thrown
429  * @param state the current state
430  * @param a pointer to the position of the input buffer which triggered
431  * the error. Note that this is not const, this is because you have the
432  * possibility of modifying the character in an attempt to correct the
433  * error
434  *
435  * @return zero to bail, nonzero to try again (this only makes sense if
436  * the input buffer has been modified by this callback)
437  */
438 typedef int (*jsonsl_error_callback)(
439         jsonsl_t jsn,
440         jsonsl_error_t error,
441         struct jsonsl_state_st* state,
442         jsonsl_char_t *at);
443 
444 struct jsonsl_st {
445     /** Public, read-only */
446 
447     /** This is the current level of the stack */
448     unsigned int level;
449 
450     /** Flag set to indicate we should stop processing */
451     unsigned int stopfl;
452 
453     /**
454      * This is the current position, relative to the beginning
455      * of the stream.
456      */
457     size_t pos;
458 
459     /** This is the 'bytes' variable passed to feed() */
460     const jsonsl_char_t *base;
461 
462     /** Callback invoked for PUSH actions */
463     jsonsl_stack_callback action_callback_PUSH;
464 
465     /** Callback invoked for POP actions */
466     jsonsl_stack_callback action_callback_POP;
467 
468     /** Default callback for any action, if neither PUSH or POP callbacks are defined */
469     jsonsl_stack_callback action_callback;
470 
471     /**
472      * Do not invoke callbacks for objects deeper than this level.
473      * NOTE: This field establishes the lower bound for ignored callbacks,
474      * and is thus misnamed. `min_ignore_level` would actually make more
475      * sense, but we don't want to break API.
476      */
477     unsigned int max_callback_level;
478 
479     /** The error callback. Invoked when an error happens. Should not be NULL */
480     jsonsl_error_callback error_callback;
481 
482     /* these are boolean flags you can modify. You will be called
483      * about notification for each of these types if the corresponding
484      * variable is true.
485      */
486 
487     /**
488      * @name Callback Booleans.
489      * These determine whether a callback is to be invoked for certain types of objects
490      * @{*/
491 
492     /** Boolean flag to enable or disable the invokcation for events on this type*/
493     int call_SPECIAL;
494     int call_OBJECT;
495     int call_LIST;
496     int call_STRING;
497     int call_HKEY;
498     /*@}*/
499 
500     /**
501      * @name u-Escape handling
502      * Special handling for the \\u-f00d type sequences. These are meant
503      * to be translated back into the corresponding octet(s).
504      * A special callback (if set) is invoked with *at=='u'. An application
505      * may wish to temporarily suspend parsing and handle the 'u-' sequence
506      * internally (or not).
507      */
508 
509      /*@{*/
510 
511     /** Callback to be invoked for a u-escape */
512     jsonsl_stack_callback action_callback_UESCAPE;
513 
514     /** Boolean flag, whether to invoke the callback */
515     int call_UESCAPE;
516 
517     /** Boolean flag, whether we should return after encountering a u-escape:
518      * the callback is invoked and then we return if this is true
519      */
520     int return_UESCAPE;
521     /*@}*/
522 
523     struct {
524         int allow_trailing_comma;
525     } options;
526 
527     /** Put anything here */
528     void *data;
529 
530     /*@{*/
531     /** Private */
532     int in_escape;
533     char expecting;
534     char tok_last;
535     int can_insert;
536     unsigned int levels_max;
537 
538 #ifndef JSONSL_NO_JPR
539     size_t jpr_count;
540     jsonsl_jpr_t *jprs;
541 
542     /* Root pointer for JPR matching information */
543     size_t *jpr_root;
544 #endif /* JSONSL_NO_JPR */
545     /*@}*/
546 
547     /**
548      * This is the stack. Its upper bound is levels_max, or the
549      * nlevels argument passed to jsonsl_new. If you modify this structure,
550      * make sure that this member is last.
551      */
552     struct jsonsl_state_st stack[1];
553 };
554 
555 
556 /**
557  * Creates a new lexer object, with capacity for recursion up to nlevels
558  *
559  * @param nlevels maximum recursion depth
560  */
561 JSONSL_API
562 jsonsl_t jsonsl_new(int nlevels);
563 
564 /**
565  * Feeds data into the lexer.
566  *
567  * @param jsn the lexer object
568  * @param bytes new data to be fed
569  * @param nbytes size of new data
570  */
571 JSONSL_API
572 void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
573 
574 /**
575  * Resets the internal parser state. This does not free the parser
576  * but does clean it internally, so that the next time feed() is called,
577  * it will be treated as a new stream
578  *
579  * @param jsn the lexer
580  */
581 JSONSL_API
582 void jsonsl_reset(jsonsl_t jsn);
583 
584 /**
585  * Frees the lexer, cleaning any allocated memory taken
586  *
587  * @param jsn the lexer
588  */
589 JSONSL_API
590 void jsonsl_destroy(jsonsl_t jsn);
591 
592 /**
593  * Gets the 'parent' element, given the current one
594  *
595  * @param jsn the lexer
596  * @param cur the current nest, which should be a struct jsonsl_nest_st
597  */
598 static JSONSL_INLINE
jsonsl_last_state(const jsonsl_t jsn,const struct jsonsl_state_st * state)599 struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
600                                           const struct jsonsl_state_st *state)
601 {
602     /* Don't complain about overriding array bounds */
603     if (state->level > 1) {
604         return jsn->stack + state->level - 1;
605     } else {
606         return NULL;
607     }
608 }
609 
610 /**
611  * Gets the state of the last fully consumed child of this parent. This is
612  * only valid in the parent's POP callback.
613  *
614  * @param the lexer
615  * @return A pointer to the child.
616  */
617 static JSONSL_INLINE
jsonsl_last_child(const jsonsl_t jsn,const struct jsonsl_state_st * parent)618 struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
619                                           const struct jsonsl_state_st *parent)
620 {
621     return jsn->stack + (parent->level + 1);
622 }
623 
624 /**Call to instruct the parser to stop parsing and return. This is valid
625  * only from within a callback */
626 static JSONSL_INLINE
jsonsl_stop(jsonsl_t jsn)627 void jsonsl_stop(jsonsl_t jsn)
628 {
629     jsn->stopfl = 1;
630 }
631 
632 /**
633  * This enables receiving callbacks on all events. Doesn't do
634  * anything special but helps avoid some boilerplate.
635  * This does not touch the UESCAPE callbacks or flags.
636  */
637 static JSONSL_INLINE
jsonsl_enable_all_callbacks(jsonsl_t jsn)638 void jsonsl_enable_all_callbacks(jsonsl_t jsn)
639 {
640     jsn->call_HKEY = 1;
641     jsn->call_STRING = 1;
642     jsn->call_OBJECT = 1;
643     jsn->call_SPECIAL = 1;
644     jsn->call_LIST = 1;
645 }
646 
647 /**
648  * A macro which returns true if the current state object can
649  * have children. This means a list type or an object type.
650  */
651 #define JSONSL_STATE_IS_CONTAINER(state) \
652         (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
653 
654 /**
655  * These two functions, dump a string representation
656  * of the error or type, respectively. They will never
657  * return NULL
658  */
659 JSONSL_API
660 const char* jsonsl_strerror(jsonsl_error_t err);
661 JSONSL_API
662 const char* jsonsl_strtype(jsonsl_type_t jt);
663 
664 /**
665  * Dumps global metrics to the screen. This is a noop unless
666  * jsonsl was compiled with JSONSL_USE_METRICS
667  */
668 JSONSL_API
669 void jsonsl_dump_global_metrics(void);
670 
671 /* This macro just here for editors to do code folding */
672 #ifndef JSONSL_NO_JPR
673 
674 /**
675  * @name JSON Pointer API
676  *
677  * JSONPointer API. This isn't really related to the lexer (at least not yet)
678  * JSONPointer provides an extremely simple specification for providing
679  * locations within JSON objects. We will extend it a bit and allow for
680  * providing 'wildcard' characters by which to be able to 'query' the stream.
681  *
682  * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
683  *
684  * Currently I'm implementing the 'single query' API which can only use a single
685  * query component. In the future I will integrate my yet-to-be-published
686  * Boyer-Moore-esque prefix searching implementation, in order to allow
687  * multiple paths to be merged into one for quick and efficient searching.
688  *
689  *
690  * JPR (as we'll refer to it within the source) can be used by splitting
691  * the components into mutliple sections, and incrementally 'track' each
692  * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
693  * callback for an object, we will check to see whether the index matching
694  * the component corresponding to the current level contains a match
695  * for our path.
696  *
697  * In order to do this properly, a structure must be maintained within the
698  * parent indicating whether its children are possible matches. This flag
699  * will be 'inherited' by call children which may conform to the match
700  * specification, and discarded by all which do not (thereby eliminating
701  * their children from inheriting it).
702  *
703  * A successful match is a complete one. One can provide multiple paths with
704  * multiple levels of matches e.g.
705  *  /foo/bar/baz/^/blah
706  *
707  *  @{
708  */
709 
710 /** The wildcard character */
711 #ifndef JSONSL_PATH_WILDCARD_CHAR
712 #define JSONSL_PATH_WILDCARD_CHAR '^'
713 #endif /* WILDCARD_CHAR */
714 
715 #define JSONSL_XMATCH \
716     X(COMPLETE,1) \
717     X(POSSIBLE,0) \
718     X(NOMATCH,-1) \
719     X(TYPE_MISMATCH, -2)
720 
721 typedef enum {
722 
723 #define X(T,v) \
724     JSONSL_MATCH_##T = v,
725     JSONSL_XMATCH
726 
727 #undef X
728     JSONSL_MATCH_UNKNOWN
729 } jsonsl_jpr_match_t;
730 
731 typedef enum {
732     JSONSL_PATH_STRING = 1,
733     JSONSL_PATH_WILDCARD,
734     JSONSL_PATH_NUMERIC,
735     JSONSL_PATH_ROOT,
736 
737     /* Special */
738     JSONSL_PATH_INVALID = -1,
739     JSONSL_PATH_NONE = 0
740 } jsonsl_jpr_type_t;
741 
742 struct jsonsl_jpr_component_st {
743     /** The string the component points to */
744     char *pstr;
745     /** if this is a numeric type, the number is 'cached' here */
746     unsigned long idx;
747     /** The length of the string */
748     size_t len;
749     /** The type of component (NUMERIC or STRING) */
750     jsonsl_jpr_type_t ptype;
751 
752     /** Set this to true to enforce type checking between dict keys and array
753      * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
754      * that an array index is actually a child of a dictionary. */
755     short is_arridx;
756 
757     /* Extra fields (for more advanced searches. Default is empty) */
758     JSONSL_JPR_COMPONENT_USER_FIELDS
759 };
760 
761 struct jsonsl_jpr_st {
762     /** Path components */
763     struct jsonsl_jpr_component_st *components;
764     size_t ncomponents;
765 
766     /** Base of allocated string for components */
767     char *basestr;
768 
769     /** The original match string. Useful for returning to the user */
770     char *orig;
771     size_t norig;
772 };
773 
774 
775 
776 /**
777  * Create a new JPR object.
778  *
779  * @param path the JSONPointer path specification.
780  * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
781  * then more details will be in this variable.
782  *
783  * @return a new jsonsl_jpr_t object, or NULL on error.
784  */
785 JSONSL_API
786 jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
787 
788 /**
789  * Destroy a JPR object
790  */
791 JSONSL_API
792 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
793 
794 /**
795  * Match a JSON object against a type and specific level
796  *
797  * @param jpr the JPR object
798  * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
799  * @param parent_level the level of the parent
800  * @param key the 'key' of the child. If the parent is an array, this should be
801  * empty.
802  * @param nkey - the length of the key. If the parent is an array (T_LIST), then
803  * this should be the current index.
804  *
805  * NOTE: The key of the child means any kind of associative data related to the
806  * element. Thus: <<< { "foo" : [ >>,
807  * the opening array's key is "foo".
808  *
809  * @return a status constant. This indicates whether a match was excluded, possible,
810  * or successful.
811  */
812 JSONSL_API
813 jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
814                                     unsigned int parent_type,
815                                     unsigned int parent_level,
816                                     const char *key, size_t nkey);
817 
818 
819 /**
820  * Associate a set of JPR objects with a lexer instance.
821  * This should be called before the lexer has been fed any data (and
822  * behavior is undefined if you don't adhere to this).
823  *
824  * After using this function, you may subsequently call match_state() on
825  * given states (presumably from within the callbacks).
826  *
827  * Note that currently the first JPR is the quickest and comes
828  * pre-allocated with the state structure. Further JPR objects
829  * are chained.
830  *
831  * @param jsn The lexer
832  * @param jprs An array of jsonsl_jpr_t objects
833  * @param njprs How many elements in the jprs array.
834  */
835 JSONSL_API
836 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
837                                  jsonsl_jpr_t *jprs,
838                                  size_t njprs);
839 
840 /**
841  * This follows the same semantics as the normal match,
842  * except we infer parent and type information from the relevant state objects.
843  * The match status (for all possible JPR objects) is set in the *out parameter.
844  *
845  * If a match has succeeded, then its JPR object will be returned. In all other
846  * instances, NULL is returned;
847  *
848  * @param jpr The jsonsl_jpr_t handle
849  * @param state The jsonsl_state_st which is a candidate
850  * @param key The hash key (if applicable, can be NULL if parent is list)
851  * @param nkey Length of hash key (if applicable, can be zero if parent is list)
852  * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
853  * the match result
854  *
855  * @return If a match was completed in full, then the JPR object containing
856  * the matching path will be returned. Otherwise, the return is NULL (note, this
857  * does not mean matching has failed, it can still be part of the match: check
858  * the out parameter).
859  */
860 JSONSL_API
861 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
862                                     struct jsonsl_state_st *state,
863                                     const char *key,
864                                     size_t nkey,
865                                     jsonsl_jpr_match_t *out);
866 
867 
868 /**
869  * Cleanup any memory allocated and any states set by
870  * match_state_init() and match_state()
871  * @param jsn The lexer
872  */
873 JSONSL_API
874 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
875 
876 /**
877  * Return a string representation of the match result returned by match()
878  */
879 JSONSL_API
880 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
881 
882 /* @}*/
883 
884 /**
885  * Utility function to convert escape sequences into their original form.
886  *
887  * The decoders I've sampled do not seem to specify a standard behavior of what
888  * to escape/unescape.
889  *
890  * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
891  * characters (0x00-0x1f) be escaped. It is often common for applications
892  * to escape a '/' - however this may also be desired behavior. the JSON
893  * spec is not clear on this, and therefore jsonsl leaves it up to you.
894  *
895  * @param in The input string.
896  * @param out An allocated output (should be the same size as in)
897  * @param len the size of the buffer
898  * @param toEscape - A sparse array of characters to unescape. Characters
899  * which are not present in this array, e.g. toEscape['c'] == 0 will be
900  * ignored and passed to the output in their original form.
901  * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
902  * then this variable will have the SPECIALf_NONASCII flag on.
903  *
904  * @param err A pointer to an error variable. If an error ocurrs, it will be
905  * set in this variable
906  * @param errat If not null and an error occurs, this will be set to point
907  * to the position within the string at which the offending character was
908  * encountered.
909  *
910  * @return The effective size of the output buffer.
911  */
912 JSONSL_API
913 size_t jsonsl_util_unescape_ex(const char *in,
914                                char *out,
915                                size_t len,
916                                const int toEscape[128],
917                                unsigned *oflags,
918                                jsonsl_error_t *err,
919                                const char **errat);
920 
921 /**
922  * Convenience macro to avoid passing too many parameters
923  */
924 #define jsonsl_util_unescape(in, out, len, toEscape, err) \
925     jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
926 
927 #endif /* JSONSL_NO_JPR */
928 
929 /**
930  * HERE BE CHARACTER TABLES!
931  */
932 #define JSONSL_CHARTABLE_string_nopass \
933 /* 0x00 */ 1 /* <NUL> */, /* 0x00 */  \
934 /* 0x01 */ 1 /* <SOH> */, /* 0x01 */  \
935 /* 0x02 */ 1 /* <STX> */, /* 0x02 */  \
936 /* 0x03 */ 1 /* <ETX> */, /* 0x03 */  \
937 /* 0x04 */ 1 /* <EOT> */, /* 0x04 */  \
938 /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */  \
939 /* 0x06 */ 1 /* <ACK> */, /* 0x06 */  \
940 /* 0x07 */ 1 /* <BEL> */, /* 0x07 */  \
941 /* 0x08 */ 1 /* <BS> */, /* 0x08 */  \
942 /* 0x09 */ 1 /* <HT> */, /* 0x09 */  \
943 /* 0x0a */ 1 /* <LF> */, /* 0x0a */  \
944 /* 0x0b */ 1 /* <VT> */, /* 0x0b */  \
945 /* 0x0c */ 1 /* <FF> */, /* 0x0c */  \
946 /* 0x0d */ 1 /* <CR> */, /* 0x0d */  \
947 /* 0x0e */ 1 /* <SO> */, /* 0x0e */  \
948 /* 0x0f */ 1 /* <SI> */, /* 0x0f */  \
949 /* 0x10 */ 1 /* <DLE> */, /* 0x10 */  \
950 /* 0x11 */ 1 /* <DC1> */, /* 0x11 */  \
951 /* 0x12 */ 1 /* <DC2> */, /* 0x12 */  \
952 /* 0x13 */ 1 /* <DC3> */, /* 0x13 */  \
953 /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */  \
954 /* 0x22 */ 1 /* <"> */, /* 0x22 */  \
955 /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */  \
956 /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */  \
957 /* 0x5c */ 1 /* <\> */, /* 0x5c */  \
958 /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */  \
959 /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */  \
960 /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */  \
961 /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */  \
962 /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */  \
963 /* 0xfd */ 0,0 /* 0xfe */  \
964 
965 
966 
967 #ifdef __cplusplus
968 }
969 #endif /* __cplusplus */
970 
971 #endif /* JSONSL_H_ */
972