1 /**
2 * JSON Simple/Stacked/Stateful Lexer.
3 * - Does not buffer data
4 * - Maintains state
5 * - Callback oriented
6 * - Lightweight and fast. One source file and one header file
7 *
8 * Copyright (C) 2012-2015 Mark Nunberg
9 * See included LICENSE file for license details.
10 */
11
12 #ifndef JSONSL_H_
13 #define JSONSL_H_
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <stddef.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <wchar.h>
21
22 #ifdef __cplusplus
23 extern "C" {
24 #endif /* __cplusplus */
25
26 #ifdef JSONSL_USE_WCHAR
27 typedef jsonsl_char_t wchar_t;
28 typedef jsonsl_uchar_t unsigned wchar_t;
29 #else
30 typedef char jsonsl_char_t;
31 typedef unsigned char jsonsl_uchar_t;
32 #endif /* JSONSL_USE_WCHAR */
33
34 /* Stolen from http-parser.h, and possibly others */
35 #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
36 typedef __int8 int8_t;
37 typedef unsigned __int8 uint8_t;
38 typedef __int16 int16_t;
39 typedef unsigned __int16 uint16_t;
40 typedef __int32 int32_t;
41 typedef unsigned __int32 uint32_t;
42 typedef __int64 int64_t;
43 typedef unsigned __int64 uint64_t;
44 #if !defined(_MSC_VER) || _MSC_VER<1400
45 typedef unsigned int size_t;
46 typedef int ssize_t;
47 #endif
48 #else
49 #include <stdint.h>
50 #endif
51
52
53 #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
54 #define JSONSL_STATE_GENERIC
55 #endif /* !defined JSONSL_STATE_GENERIC */
56
57 #ifdef JSONSL_STATE_GENERIC
58 #define JSONSL_STATE_USER_FIELDS
59 #endif /* JSONSL_STATE_GENERIC */
60
61 /* Additional fields for component object */
62 #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
63 #define JSONSL_JPR_COMPONENT_USER_FIELDS
64 #endif
65
66 #ifndef JSONSL_API
67 /**
68 * We require a /DJSONSL_DLL so that users already using this as a static
69 * or embedded library don't get confused
70 */
71 #if defined(_WIN32) && defined(JSONSL_DLL)
72 #define JSONSL_API __declspec(dllexport)
73 #else
74 #define JSONSL_API
75 #endif /* _WIN32 */
76
77 #endif /* !JSONSL_API */
78
79 #ifndef JSONSL_INLINE
80 #if defined(_MSC_VER)
81 #define JSONSL_INLINE __inline
82 #elif defined(__GNUC__)
83 #define JSONSL_INLINE __inline__
84 #else
85 #define JSONSL_INLINE inline
86 #endif /* _MSC_VER or __GNUC__ */
87 #endif /* JSONSL_INLINE */
88
89 #define JSONSL_MAX_LEVELS 512
90
91 struct jsonsl_st;
92 typedef struct jsonsl_st *jsonsl_t;
93
94 typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
95
96 /**
97 * This flag is true when AND'd against a type whose value
98 * must be in "quoutes" i.e. T_HKEY and T_STRING
99 */
100 #define JSONSL_Tf_STRINGY 0xffff00
101
102 /**
103 * Constant representing the special JSON types.
104 * The values are special and aid in speed (the OBJECT and LIST
105 * values are the char literals of their openings).
106 *
107 * Their actual value is a character which attempts to resemble
108 * some mnemonic reference to the actual type.
109 *
110 * If new types are added, they must fit into the ASCII printable
111 * range (so they should be AND'd with 0x7f and yield something
112 * meaningful)
113 */
114 #define JSONSL_XTYPE \
115 X(STRING, '"'|JSONSL_Tf_STRINGY) \
116 X(HKEY, '#'|JSONSL_Tf_STRINGY) \
117 X(OBJECT, '{') \
118 X(LIST, '[') \
119 X(SPECIAL, '^') \
120 X(UESCAPE, 'u')
121 typedef enum {
122 #define X(o, c) \
123 JSONSL_T_##o = c,
124 JSONSL_XTYPE
125 JSONSL_T_UNKNOWN = '?',
126 /* Abstract 'root' object */
127 JSONSL_T_ROOT = 0
128 #undef X
129 } jsonsl_type_t;
130
131 /**
132 * Subtypes for T_SPECIAL. We define them as flags
133 * because more than one type can be applied to a
134 * given object.
135 */
136
137 #define JSONSL_XSPECIAL \
138 X(NONE, 0) \
139 X(SIGNED, 1<<0) \
140 X(UNSIGNED, 1<<1) \
141 X(TRUE, 1<<2) \
142 X(FALSE, 1<<3) \
143 X(NULL, 1<<4) \
144 X(FLOAT, 1<<5) \
145 X(EXPONENT, 1<<6) \
146 X(NONASCII, 1<<7)
147 typedef enum {
148 #define X(o,b) \
149 JSONSL_SPECIALf_##o = b,
150 JSONSL_XSPECIAL
151 #undef X
152 /* Handy flags for checking */
153
154 JSONSL_SPECIALf_UNKNOWN = 1 << 8,
155
156 /** @private Private */
157 JSONSL_SPECIALf_ZERO = 1 << 9 | JSONSL_SPECIALf_UNSIGNED,
158 /** @private */
159 JSONSL_SPECIALf_DASH = 1 << 10,
160
161 /** Type is numeric */
162 JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
163
164 /** Type is a boolean */
165 JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
166
167 /** Type is an "extended", not integral type (but numeric) */
168 JSONSL_SPECIALf_NUMNOINT = (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT)
169 } jsonsl_special_t;
170
171
172 /**
173 * These are the various types of stack (or other) events
174 * which will trigger a callback.
175 * Like the type constants, this are also mnemonic
176 */
177 #define JSONSL_XACTION \
178 X(PUSH, '+') \
179 X(POP, '-') \
180 X(UESCAPE, 'U') \
181 X(ERROR, '!')
182 typedef enum {
183 #define X(a,c) \
184 JSONSL_ACTION_##a = c,
185 JSONSL_XACTION
186 JSONSL_ACTION_UNKNOWN = '?'
187 #undef X
188 } jsonsl_action_t;
189
190
191 /**
192 * Various errors which may be thrown while parsing JSON
193 */
194 #define JSONSL_XERR \
195 /* Trailing garbage characters */ \
196 X(GARBAGE_TRAILING) \
197 /* We were expecting a 'special' (numeric, true, false, null) */ \
198 X(SPECIAL_EXPECTED) \
199 /* The 'special' value was incomplete */ \
200 X(SPECIAL_INCOMPLETE) \
201 /* Found a stray token */ \
202 X(STRAY_TOKEN) \
203 /* We were expecting a token before this one */ \
204 X(MISSING_TOKEN) \
205 /* Cannot insert because the container is not ready */ \
206 X(CANT_INSERT) \
207 /* Found a '\' outside a string */ \
208 X(ESCAPE_OUTSIDE_STRING) \
209 /* Found a ':' outside of a hash */ \
210 X(KEY_OUTSIDE_OBJECT) \
211 /* found a string outside of a container */ \
212 X(STRING_OUTSIDE_CONTAINER) \
213 /* Found a null byte in middle of string */ \
214 X(FOUND_NULL_BYTE) \
215 /* Current level exceeds limit specified in constructor */ \
216 X(LEVELS_EXCEEDED) \
217 /* Got a } as a result of an opening [ or vice versa */ \
218 X(BRACKET_MISMATCH) \
219 /* We expected a key, but got something else instead */ \
220 X(HKEY_EXPECTED) \
221 /* We got an illegal control character (bad whitespace or something) */ \
222 X(WEIRD_WHITESPACE) \
223 /* Found a \u-escape, but there were less than 4 following hex digits */ \
224 X(UESCAPE_TOOSHORT) \
225 /* Invalid two-character escape */ \
226 X(ESCAPE_INVALID) \
227 /* Trailing comma */ \
228 X(TRAILING_COMMA) \
229 /* An invalid number was passed in a numeric field */ \
230 X(INVALID_NUMBER) \
231 /* Value is missing for object */ \
232 X(VALUE_EXPECTED) \
233 /* The following are for JPR Stuff */ \
234 \
235 /* Found a literal '%' but it was only followed by a single valid hex digit */ \
236 X(PERCENT_BADHEX) \
237 /* jsonpointer URI is malformed '/' */ \
238 X(JPR_BADPATH) \
239 /* Duplicate slash */ \
240 X(JPR_DUPSLASH) \
241 /* No leading root */ \
242 X(JPR_NOROOT) \
243 /* Allocation failure */ \
244 X(ENOMEM)
245
246 typedef enum {
247 JSONSL_ERROR_SUCCESS = 0,
248 #define X(e) \
249 JSONSL_ERROR_##e,
250 JSONSL_XERR
251 #undef X
252 JSONSL_ERROR_GENERIC
253 } jsonsl_error_t;
254
255
256 /**
257 * A state is a single level of the stack.
258 * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
259 * will remain in tact until the item is popped.
260 *
261 * As a result, it means a parent state object may be accessed from a child
262 * object, (the parents fields will all be valid). This allows a user to create
263 * an ad-hoc hierarchy on top of the JSON one.
264 *
265 */
266 struct jsonsl_state_st {
267 /**
268 * The JSON object type
269 */
270 unsigned type;
271
272 /** If this element is special, then its extended type is here */
273 unsigned special_flags;
274
275 /**
276 * The position (in terms of number of bytes since the first call to
277 * jsonsl_feed()) at which the state was first pushed. This includes
278 * opening tokens, if applicable.
279 *
280 * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
281 * be the position of the first quote.
282 *
283 * @see jsonsl_st::pos which contains the _current_ position and can be
284 * used during a POP callback to get the length of the element.
285 */
286 size_t pos_begin;
287
288 /**FIXME: This is redundant as the same information can be derived from
289 * jsonsl_st::pos at pop-time */
290 size_t pos_cur;
291
292 /**
293 * Level of recursion into nesting. This is mainly a convenience
294 * variable, as this can technically be deduced from the lexer's
295 * level parameter (though the logic is not that simple)
296 */
297 unsigned int level;
298
299
300 /**
301 * how many elements in the object/list.
302 * For objects (hashes), an element is either
303 * a key or a value. Thus for one complete pair,
304 * nelem will be 2.
305 *
306 * For special types, this will hold the sum of the digits.
307 * This only holds true for values which are simple signed/unsigned
308 * numbers. Otherwise a special flag is set, and extra handling is not
309 * performed.
310 */
311 uint64_t nelem;
312
313
314
315 /*TODO: merge this and special_flags into a union */
316
317
318 /**
319 * Useful for an opening nest, this will prevent a callback from being
320 * invoked on this item or any of its children
321 */
322 int ignore_callback;
323
324 /**
325 * Counter which is incremented each time an escape ('\') is encountered.
326 * This is used internally for non-string types and should only be
327 * inspected by the user if the state actually represents a string
328 * type.
329 */
330 unsigned int nescapes;
331
332 /**
333 * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
334 * the macro expansion happens here.
335 *
336 * You can use these fields to store hierarchical or 'tagging' information
337 * for specific objects.
338 *
339 * See the documentation above for the lifetime of the state object (i.e.
340 * if the private data points to allocated memory, it should be freed
341 * when the object is popped, as the state object will be re-used)
342 */
343 #ifndef JSONSL_STATE_GENERIC
344 JSONSL_STATE_USER_FIELDS
345 #else
346
347 /**
348 * Otherwise, this is a simple void * pointer for anything you want
349 */
350 void *data;
351 #endif /* JSONSL_STATE_USER_FIELDS */
352 };
353
354 /**Gets the number of elements in the list.
355 * @param st The state. Must be of type JSONSL_T_LIST
356 * @return number of elements in the list
357 */
358 #define JSONSL_LIST_SIZE(st) ((st)->nelem)
359
360 /**Gets the number of key-value pairs in an object
361 * @param st The state. Must be of type JSONSL_T_OBJECT
362 * @return the number of key-value pairs in the object
363 */
364 #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
365
366 /**Gets the numeric value.
367 * @param st The state. Must be of type JSONSL_T_SPECIAL and
368 * special_flags must have the JSONSL_SPECIALf_NUMERIC flag
369 * set.
370 * @return the numeric value of the state.
371 */
372 #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
373
374 /*
375 * So now we need some special structure for keeping the
376 * JPR info in sync. Preferrably all in a single block
377 * of memory (there's no need for separate allocations.
378 * So we will define a 'table' with the following layout
379 *
380 * Level nPosbl JPR1_last JPR2_last JPR3_last
381 *
382 * 0 1 NOMATCH POSSIBLE POSSIBLE
383 * 1 0 NOMATCH NOMATCH COMPLETE
384 * [ table ends here because no further path is possible]
385 *
386 * Where the JPR..n corresponds to the number of JPRs
387 * requested, and nPosble is a quick flag to determine
388 *
389 * the number of possibilities. In the future this might
390 * be made into a proper 'jump' table,
391 *
392 * Since we always mark JPRs from the higher levels descending
393 * into the lower ones, a prospective child match would first
394 * look at the parent table to check the possibilities, and then
395 * see which ones were possible..
396 *
397 * Thus, the size of this blob would be (and these are all ints here)
398 * nLevels * nJPR * 2.
399 *
400 * the 'Width' of the table would be nJPR*2, and the 'height' would be
401 * nlevels
402 */
403
404 /**
405 * This is called when a stack change ocurs.
406 *
407 * @param jsn The lexer
408 * @param action The type of action, this can be PUSH or POP
409 * @param state A pointer to the stack currently affected by the action
410 * @param at A pointer to the position of the input buffer which triggered
411 * this action.
412 */
413 typedef void (*jsonsl_stack_callback)(
414 jsonsl_t jsn,
415 jsonsl_action_t action,
416 struct jsonsl_state_st* state,
417 const jsonsl_char_t *at);
418
419
420 /**
421 * This is called when an error is encountered.
422 * Sometimes it's possible to 'erase' characters (by replacing them
423 * with whitespace). If you think you have corrected the error, you
424 * can return a true value, in which case the parser will backtrack
425 * and try again.
426 *
427 * @param jsn The lexer
428 * @param error The error which was thrown
429 * @param state the current state
430 * @param a pointer to the position of the input buffer which triggered
431 * the error. Note that this is not const, this is because you have the
432 * possibility of modifying the character in an attempt to correct the
433 * error
434 *
435 * @return zero to bail, nonzero to try again (this only makes sense if
436 * the input buffer has been modified by this callback)
437 */
438 typedef int (*jsonsl_error_callback)(
439 jsonsl_t jsn,
440 jsonsl_error_t error,
441 struct jsonsl_state_st* state,
442 jsonsl_char_t *at);
443
444 struct jsonsl_st {
445 /** Public, read-only */
446
447 /** This is the current level of the stack */
448 unsigned int level;
449
450 /** Flag set to indicate we should stop processing */
451 unsigned int stopfl;
452
453 /**
454 * This is the current position, relative to the beginning
455 * of the stream.
456 */
457 size_t pos;
458
459 /** This is the 'bytes' variable passed to feed() */
460 const jsonsl_char_t *base;
461
462 /** Callback invoked for PUSH actions */
463 jsonsl_stack_callback action_callback_PUSH;
464
465 /** Callback invoked for POP actions */
466 jsonsl_stack_callback action_callback_POP;
467
468 /** Default callback for any action, if neither PUSH or POP callbacks are defined */
469 jsonsl_stack_callback action_callback;
470
471 /**
472 * Do not invoke callbacks for objects deeper than this level.
473 * NOTE: This field establishes the lower bound for ignored callbacks,
474 * and is thus misnamed. `min_ignore_level` would actually make more
475 * sense, but we don't want to break API.
476 */
477 unsigned int max_callback_level;
478
479 /** The error callback. Invoked when an error happens. Should not be NULL */
480 jsonsl_error_callback error_callback;
481
482 /* these are boolean flags you can modify. You will be called
483 * about notification for each of these types if the corresponding
484 * variable is true.
485 */
486
487 /**
488 * @name Callback Booleans.
489 * These determine whether a callback is to be invoked for certain types of objects
490 * @{*/
491
492 /** Boolean flag to enable or disable the invokcation for events on this type*/
493 int call_SPECIAL;
494 int call_OBJECT;
495 int call_LIST;
496 int call_STRING;
497 int call_HKEY;
498 /*@}*/
499
500 /**
501 * @name u-Escape handling
502 * Special handling for the \\u-f00d type sequences. These are meant
503 * to be translated back into the corresponding octet(s).
504 * A special callback (if set) is invoked with *at=='u'. An application
505 * may wish to temporarily suspend parsing and handle the 'u-' sequence
506 * internally (or not).
507 */
508
509 /*@{*/
510
511 /** Callback to be invoked for a u-escape */
512 jsonsl_stack_callback action_callback_UESCAPE;
513
514 /** Boolean flag, whether to invoke the callback */
515 int call_UESCAPE;
516
517 /** Boolean flag, whether we should return after encountering a u-escape:
518 * the callback is invoked and then we return if this is true
519 */
520 int return_UESCAPE;
521 /*@}*/
522
523 struct {
524 int allow_trailing_comma;
525 } options;
526
527 /** Put anything here */
528 void *data;
529
530 /*@{*/
531 /** Private */
532 int in_escape;
533 char expecting;
534 char tok_last;
535 int can_insert;
536 unsigned int levels_max;
537
538 #ifndef JSONSL_NO_JPR
539 size_t jpr_count;
540 jsonsl_jpr_t *jprs;
541
542 /* Root pointer for JPR matching information */
543 size_t *jpr_root;
544 #endif /* JSONSL_NO_JPR */
545 /*@}*/
546
547 /**
548 * This is the stack. Its upper bound is levels_max, or the
549 * nlevels argument passed to jsonsl_new. If you modify this structure,
550 * make sure that this member is last.
551 */
552 struct jsonsl_state_st stack[1];
553 };
554
555
556 /**
557 * Creates a new lexer object, with capacity for recursion up to nlevels
558 *
559 * @param nlevels maximum recursion depth
560 */
561 JSONSL_API
562 jsonsl_t jsonsl_new(int nlevels);
563
564 /**
565 * Feeds data into the lexer.
566 *
567 * @param jsn the lexer object
568 * @param bytes new data to be fed
569 * @param nbytes size of new data
570 */
571 JSONSL_API
572 void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
573
574 /**
575 * Resets the internal parser state. This does not free the parser
576 * but does clean it internally, so that the next time feed() is called,
577 * it will be treated as a new stream
578 *
579 * @param jsn the lexer
580 */
581 JSONSL_API
582 void jsonsl_reset(jsonsl_t jsn);
583
584 /**
585 * Frees the lexer, cleaning any allocated memory taken
586 *
587 * @param jsn the lexer
588 */
589 JSONSL_API
590 void jsonsl_destroy(jsonsl_t jsn);
591
592 /**
593 * Gets the 'parent' element, given the current one
594 *
595 * @param jsn the lexer
596 * @param cur the current nest, which should be a struct jsonsl_nest_st
597 */
598 static JSONSL_INLINE
jsonsl_last_state(const jsonsl_t jsn,const struct jsonsl_state_st * state)599 struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
600 const struct jsonsl_state_st *state)
601 {
602 /* Don't complain about overriding array bounds */
603 if (state->level > 1) {
604 return jsn->stack + state->level - 1;
605 } else {
606 return NULL;
607 }
608 }
609
610 /**
611 * Gets the state of the last fully consumed child of this parent. This is
612 * only valid in the parent's POP callback.
613 *
614 * @param the lexer
615 * @return A pointer to the child.
616 */
617 static JSONSL_INLINE
jsonsl_last_child(const jsonsl_t jsn,const struct jsonsl_state_st * parent)618 struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
619 const struct jsonsl_state_st *parent)
620 {
621 return jsn->stack + (parent->level + 1);
622 }
623
624 /**Call to instruct the parser to stop parsing and return. This is valid
625 * only from within a callback */
626 static JSONSL_INLINE
jsonsl_stop(jsonsl_t jsn)627 void jsonsl_stop(jsonsl_t jsn)
628 {
629 jsn->stopfl = 1;
630 }
631
632 /**
633 * This enables receiving callbacks on all events. Doesn't do
634 * anything special but helps avoid some boilerplate.
635 * This does not touch the UESCAPE callbacks or flags.
636 */
637 static JSONSL_INLINE
jsonsl_enable_all_callbacks(jsonsl_t jsn)638 void jsonsl_enable_all_callbacks(jsonsl_t jsn)
639 {
640 jsn->call_HKEY = 1;
641 jsn->call_STRING = 1;
642 jsn->call_OBJECT = 1;
643 jsn->call_SPECIAL = 1;
644 jsn->call_LIST = 1;
645 }
646
647 /**
648 * A macro which returns true if the current state object can
649 * have children. This means a list type or an object type.
650 */
651 #define JSONSL_STATE_IS_CONTAINER(state) \
652 (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
653
654 /**
655 * These two functions, dump a string representation
656 * of the error or type, respectively. They will never
657 * return NULL
658 */
659 JSONSL_API
660 const char* jsonsl_strerror(jsonsl_error_t err);
661 JSONSL_API
662 const char* jsonsl_strtype(jsonsl_type_t jt);
663
664 /**
665 * Dumps global metrics to the screen. This is a noop unless
666 * jsonsl was compiled with JSONSL_USE_METRICS
667 */
668 JSONSL_API
669 void jsonsl_dump_global_metrics(void);
670
671 /* This macro just here for editors to do code folding */
672 #ifndef JSONSL_NO_JPR
673
674 /**
675 * @name JSON Pointer API
676 *
677 * JSONPointer API. This isn't really related to the lexer (at least not yet)
678 * JSONPointer provides an extremely simple specification for providing
679 * locations within JSON objects. We will extend it a bit and allow for
680 * providing 'wildcard' characters by which to be able to 'query' the stream.
681 *
682 * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
683 *
684 * Currently I'm implementing the 'single query' API which can only use a single
685 * query component. In the future I will integrate my yet-to-be-published
686 * Boyer-Moore-esque prefix searching implementation, in order to allow
687 * multiple paths to be merged into one for quick and efficient searching.
688 *
689 *
690 * JPR (as we'll refer to it within the source) can be used by splitting
691 * the components into mutliple sections, and incrementally 'track' each
692 * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
693 * callback for an object, we will check to see whether the index matching
694 * the component corresponding to the current level contains a match
695 * for our path.
696 *
697 * In order to do this properly, a structure must be maintained within the
698 * parent indicating whether its children are possible matches. This flag
699 * will be 'inherited' by call children which may conform to the match
700 * specification, and discarded by all which do not (thereby eliminating
701 * their children from inheriting it).
702 *
703 * A successful match is a complete one. One can provide multiple paths with
704 * multiple levels of matches e.g.
705 * /foo/bar/baz/^/blah
706 *
707 * @{
708 */
709
710 /** The wildcard character */
711 #ifndef JSONSL_PATH_WILDCARD_CHAR
712 #define JSONSL_PATH_WILDCARD_CHAR '^'
713 #endif /* WILDCARD_CHAR */
714
715 #define JSONSL_XMATCH \
716 X(COMPLETE,1) \
717 X(POSSIBLE,0) \
718 X(NOMATCH,-1) \
719 X(TYPE_MISMATCH, -2)
720
721 typedef enum {
722
723 #define X(T,v) \
724 JSONSL_MATCH_##T = v,
725 JSONSL_XMATCH
726
727 #undef X
728 JSONSL_MATCH_UNKNOWN
729 } jsonsl_jpr_match_t;
730
731 typedef enum {
732 JSONSL_PATH_STRING = 1,
733 JSONSL_PATH_WILDCARD,
734 JSONSL_PATH_NUMERIC,
735 JSONSL_PATH_ROOT,
736
737 /* Special */
738 JSONSL_PATH_INVALID = -1,
739 JSONSL_PATH_NONE = 0
740 } jsonsl_jpr_type_t;
741
742 struct jsonsl_jpr_component_st {
743 /** The string the component points to */
744 char *pstr;
745 /** if this is a numeric type, the number is 'cached' here */
746 unsigned long idx;
747 /** The length of the string */
748 size_t len;
749 /** The type of component (NUMERIC or STRING) */
750 jsonsl_jpr_type_t ptype;
751
752 /** Set this to true to enforce type checking between dict keys and array
753 * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
754 * that an array index is actually a child of a dictionary. */
755 short is_arridx;
756
757 /* Extra fields (for more advanced searches. Default is empty) */
758 JSONSL_JPR_COMPONENT_USER_FIELDS
759 };
760
761 struct jsonsl_jpr_st {
762 /** Path components */
763 struct jsonsl_jpr_component_st *components;
764 size_t ncomponents;
765
766 /** Base of allocated string for components */
767 char *basestr;
768
769 /** The original match string. Useful for returning to the user */
770 char *orig;
771 size_t norig;
772 };
773
774
775
776 /**
777 * Create a new JPR object.
778 *
779 * @param path the JSONPointer path specification.
780 * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
781 * then more details will be in this variable.
782 *
783 * @return a new jsonsl_jpr_t object, or NULL on error.
784 */
785 JSONSL_API
786 jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
787
788 /**
789 * Destroy a JPR object
790 */
791 JSONSL_API
792 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
793
794 /**
795 * Match a JSON object against a type and specific level
796 *
797 * @param jpr the JPR object
798 * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
799 * @param parent_level the level of the parent
800 * @param key the 'key' of the child. If the parent is an array, this should be
801 * empty.
802 * @param nkey - the length of the key. If the parent is an array (T_LIST), then
803 * this should be the current index.
804 *
805 * NOTE: The key of the child means any kind of associative data related to the
806 * element. Thus: <<< { "foo" : [ >>,
807 * the opening array's key is "foo".
808 *
809 * @return a status constant. This indicates whether a match was excluded, possible,
810 * or successful.
811 */
812 JSONSL_API
813 jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
814 unsigned int parent_type,
815 unsigned int parent_level,
816 const char *key, size_t nkey);
817
818
819 /**
820 * Associate a set of JPR objects with a lexer instance.
821 * This should be called before the lexer has been fed any data (and
822 * behavior is undefined if you don't adhere to this).
823 *
824 * After using this function, you may subsequently call match_state() on
825 * given states (presumably from within the callbacks).
826 *
827 * Note that currently the first JPR is the quickest and comes
828 * pre-allocated with the state structure. Further JPR objects
829 * are chained.
830 *
831 * @param jsn The lexer
832 * @param jprs An array of jsonsl_jpr_t objects
833 * @param njprs How many elements in the jprs array.
834 */
835 JSONSL_API
836 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
837 jsonsl_jpr_t *jprs,
838 size_t njprs);
839
840 /**
841 * This follows the same semantics as the normal match,
842 * except we infer parent and type information from the relevant state objects.
843 * The match status (for all possible JPR objects) is set in the *out parameter.
844 *
845 * If a match has succeeded, then its JPR object will be returned. In all other
846 * instances, NULL is returned;
847 *
848 * @param jpr The jsonsl_jpr_t handle
849 * @param state The jsonsl_state_st which is a candidate
850 * @param key The hash key (if applicable, can be NULL if parent is list)
851 * @param nkey Length of hash key (if applicable, can be zero if parent is list)
852 * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
853 * the match result
854 *
855 * @return If a match was completed in full, then the JPR object containing
856 * the matching path will be returned. Otherwise, the return is NULL (note, this
857 * does not mean matching has failed, it can still be part of the match: check
858 * the out parameter).
859 */
860 JSONSL_API
861 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
862 struct jsonsl_state_st *state,
863 const char *key,
864 size_t nkey,
865 jsonsl_jpr_match_t *out);
866
867
868 /**
869 * Cleanup any memory allocated and any states set by
870 * match_state_init() and match_state()
871 * @param jsn The lexer
872 */
873 JSONSL_API
874 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
875
876 /**
877 * Return a string representation of the match result returned by match()
878 */
879 JSONSL_API
880 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
881
882 /* @}*/
883
884 /**
885 * Utility function to convert escape sequences into their original form.
886 *
887 * The decoders I've sampled do not seem to specify a standard behavior of what
888 * to escape/unescape.
889 *
890 * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
891 * characters (0x00-0x1f) be escaped. It is often common for applications
892 * to escape a '/' - however this may also be desired behavior. the JSON
893 * spec is not clear on this, and therefore jsonsl leaves it up to you.
894 *
895 * @param in The input string.
896 * @param out An allocated output (should be the same size as in)
897 * @param len the size of the buffer
898 * @param toEscape - A sparse array of characters to unescape. Characters
899 * which are not present in this array, e.g. toEscape['c'] == 0 will be
900 * ignored and passed to the output in their original form.
901 * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
902 * then this variable will have the SPECIALf_NONASCII flag on.
903 *
904 * @param err A pointer to an error variable. If an error ocurrs, it will be
905 * set in this variable
906 * @param errat If not null and an error occurs, this will be set to point
907 * to the position within the string at which the offending character was
908 * encountered.
909 *
910 * @return The effective size of the output buffer.
911 */
912 JSONSL_API
913 size_t jsonsl_util_unescape_ex(const char *in,
914 char *out,
915 size_t len,
916 const int toEscape[128],
917 unsigned *oflags,
918 jsonsl_error_t *err,
919 const char **errat);
920
921 /**
922 * Convenience macro to avoid passing too many parameters
923 */
924 #define jsonsl_util_unescape(in, out, len, toEscape, err) \
925 jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
926
927 #endif /* JSONSL_NO_JPR */
928
929 /**
930 * HERE BE CHARACTER TABLES!
931 */
932 #define JSONSL_CHARTABLE_string_nopass \
933 /* 0x00 */ 1 /* <NUL> */, /* 0x00 */ \
934 /* 0x01 */ 1 /* <SOH> */, /* 0x01 */ \
935 /* 0x02 */ 1 /* <STX> */, /* 0x02 */ \
936 /* 0x03 */ 1 /* <ETX> */, /* 0x03 */ \
937 /* 0x04 */ 1 /* <EOT> */, /* 0x04 */ \
938 /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */ \
939 /* 0x06 */ 1 /* <ACK> */, /* 0x06 */ \
940 /* 0x07 */ 1 /* <BEL> */, /* 0x07 */ \
941 /* 0x08 */ 1 /* <BS> */, /* 0x08 */ \
942 /* 0x09 */ 1 /* <HT> */, /* 0x09 */ \
943 /* 0x0a */ 1 /* <LF> */, /* 0x0a */ \
944 /* 0x0b */ 1 /* <VT> */, /* 0x0b */ \
945 /* 0x0c */ 1 /* <FF> */, /* 0x0c */ \
946 /* 0x0d */ 1 /* <CR> */, /* 0x0d */ \
947 /* 0x0e */ 1 /* <SO> */, /* 0x0e */ \
948 /* 0x0f */ 1 /* <SI> */, /* 0x0f */ \
949 /* 0x10 */ 1 /* <DLE> */, /* 0x10 */ \
950 /* 0x11 */ 1 /* <DC1> */, /* 0x11 */ \
951 /* 0x12 */ 1 /* <DC2> */, /* 0x12 */ \
952 /* 0x13 */ 1 /* <DC3> */, /* 0x13 */ \
953 /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ \
954 /* 0x22 */ 1 /* <"> */, /* 0x22 */ \
955 /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ \
956 /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ \
957 /* 0x5c */ 1 /* <\> */, /* 0x5c */ \
958 /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ \
959 /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ \
960 /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ \
961 /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ \
962 /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ \
963 /* 0xfd */ 0,0 /* 0xfe */ \
964
965
966
967 #ifdef __cplusplus
968 }
969 #endif /* __cplusplus */
970
971 #endif /* JSONSL_H_ */
972