1 /* https://github.com/mnunberg/jsonsl */
2
3 /**
4 * JSON Simple/Stacked/Stateful Lexer.
5 * - Does not buffer data
6 * - Maintains state
7 * - Callback oriented
8 * - Lightweight and fast. One source file and one header file
9 *
10 * Copyright (C) 2012-2015 Mark Nunberg
11 * See included LICENSE file for license details.
12 */
13
14 #ifndef JSONSL_H_
15 #define JSONSL_H_
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <stddef.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <wchar.h>
23
24 #ifdef __cplusplus
25 extern "C" {
26 #endif /* __cplusplus */
27
28 #ifdef JSONSL_USE_WCHAR
29 typedef jsonsl_char_t wchar_t;
30 typedef jsonsl_uchar_t unsigned wchar_t;
31 #else
32 typedef char jsonsl_char_t;
33 typedef unsigned char jsonsl_uchar_t;
34 #endif /* JSONSL_USE_WCHAR */
35
36 #ifdef JSONSL_PARSE_NAN
37 #define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN
38 #define JSONSL__INF_PROXY JSONSL_SPECIALf_INF
39 #else
40 #define JSONSL__NAN_PROXY 0
41 #define JSONSL__INF_PROXY 0
42 #endif
43
44 /* Stolen from http-parser.h, and possibly others */
45 #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
46 typedef __int8 int8_t;
47 typedef unsigned __int8 uint8_t;
48 typedef __int16 int16_t;
49 typedef unsigned __int16 uint16_t;
50 typedef __int32 int32_t;
51 typedef unsigned __int32 uint32_t;
52 typedef __int64 int64_t;
53 typedef unsigned __int64 uint64_t;
54 #if !defined(_MSC_VER) || _MSC_VER<1400
55 typedef unsigned int size_t;
56 typedef int ssize_t;
57 #endif
58 #else
59 #include <stdint.h>
60 #endif
61
62
63 #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
64 #define JSONSL_STATE_GENERIC
65 #endif /* !defined JSONSL_STATE_GENERIC */
66
67 #ifdef JSONSL_STATE_GENERIC
68 #define JSONSL_STATE_USER_FIELDS
69 #endif /* JSONSL_STATE_GENERIC */
70
71 /* Additional fields for component object */
72 #ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
73 #define JSONSL_JPR_COMPONENT_USER_FIELDS
74 #endif
75
76 #ifndef JSONSL_API
77 /**
78 * We require a /DJSONSL_DLL so that users already using this as a static
79 * or embedded library don't get confused
80 */
81 #if defined(_WIN32) && defined(JSONSL_DLL)
82 #define JSONSL_API __declspec(dllexport)
83 #else
84 #define JSONSL_API
85 #endif /* _WIN32 */
86
87 #endif /* !JSONSL_API */
88
89 #ifndef JSONSL_INLINE
90 #if defined(_MSC_VER)
91 #define JSONSL_INLINE __inline
92 #elif defined(__GNUC__)
93 #define JSONSL_INLINE __inline__
94 #else
95 #define JSONSL_INLINE inline
96 #endif /* _MSC_VER or __GNUC__ */
97 #endif /* JSONSL_INLINE */
98
99 #define JSONSL_MAX_LEVELS 512
100
101 struct jsonsl_st;
102 typedef struct jsonsl_st *jsonsl_t;
103
104 typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
105
106 /**
107 * This flag is true when AND'd against a type whose value
108 * must be in "quoutes" i.e. T_HKEY and T_STRING
109 */
110 #define JSONSL_Tf_STRINGY 0xffff00
111
112 /**
113 * Constant representing the special JSON types.
114 * The values are special and aid in speed (the OBJECT and LIST
115 * values are the char literals of their openings).
116 *
117 * Their actual value is a character which attempts to resemble
118 * some mnemonic reference to the actual type.
119 *
120 * If new types are added, they must fit into the ASCII printable
121 * range (so they should be AND'd with 0x7f and yield something
122 * meaningful)
123 */
124 #define JSONSL_XTYPE \
125 X(STRING, '"'|JSONSL_Tf_STRINGY) \
126 X(HKEY, '#'|JSONSL_Tf_STRINGY) \
127 X(OBJECT, '{') \
128 X(LIST, '[') \
129 X(SPECIAL, '^') \
130 X(UESCAPE, 'u')
131 typedef enum {
132 #define X(o, c) \
133 JSONSL_T_##o = c,
134 JSONSL_XTYPE
135 JSONSL_T_UNKNOWN = '?',
136 /* Abstract 'root' object */
137 JSONSL_T_ROOT = 0
138 #undef X
139 } jsonsl_type_t;
140
141 /**
142 * Subtypes for T_SPECIAL. We define them as flags
143 * because more than one type can be applied to a
144 * given object.
145 */
146
147 #define JSONSL_XSPECIAL \
148 X(NONE, 0) \
149 X(SIGNED, 1<<0) \
150 X(UNSIGNED, 1<<1) \
151 X(TRUE, 1<<2) \
152 X(FALSE, 1<<3) \
153 X(NULL, 1<<4) \
154 X(FLOAT, 1<<5) \
155 X(EXPONENT, 1<<6) \
156 X(NONASCII, 1<<7) \
157 X(NAN, 1<<8) \
158 X(INF, 1<<9)
159 typedef enum {
160 #define X(o,b) \
161 JSONSL_SPECIALf_##o = b,
162 JSONSL_XSPECIAL
163 #undef X
164 /* Handy flags for checking */
165
166 JSONSL_SPECIALf_UNKNOWN = 1 << 10,
167
168 /** @private Private */
169 JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED,
170 /** @private */
171 JSONSL_SPECIALf_DASH = 1 << 12,
172 /** @private */
173 JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF),
174 JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED),
175
176 /** Type is numeric */
177 JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
178
179 /** Type is a boolean */
180 JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
181
182 /** Type is an "extended", not integral type (but numeric) */
183 JSONSL_SPECIALf_NUMNOINT =
184 (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN
185 |JSONSL_SPECIALf_INF)
186 } jsonsl_special_t;
187
188
189 /**
190 * These are the various types of stack (or other) events
191 * which will trigger a callback.
192 * Like the type constants, this are also mnemonic
193 */
194 #define JSONSL_XACTION \
195 X(PUSH, '+') \
196 X(POP, '-') \
197 X(UESCAPE, 'U') \
198 X(ERROR, '!')
199 typedef enum {
200 #define X(a,c) \
201 JSONSL_ACTION_##a = c,
202 JSONSL_XACTION
203 JSONSL_ACTION_UNKNOWN = '?'
204 #undef X
205 } jsonsl_action_t;
206
207
208 /**
209 * Various errors which may be thrown while parsing JSON
210 */
211 #define JSONSL_XERR \
212 /* Trailing garbage characters */ \
213 X(GARBAGE_TRAILING) \
214 /* We were expecting a 'special' (numeric, true, false, null) */ \
215 X(SPECIAL_EXPECTED) \
216 /* The 'special' value was incomplete */ \
217 X(SPECIAL_INCOMPLETE) \
218 /* Found a stray token */ \
219 X(STRAY_TOKEN) \
220 /* We were expecting a token before this one */ \
221 X(MISSING_TOKEN) \
222 /* Cannot insert because the container is not ready */ \
223 X(CANT_INSERT) \
224 /* Found a '\' outside a string */ \
225 X(ESCAPE_OUTSIDE_STRING) \
226 /* Found a ':' outside of a hash */ \
227 X(KEY_OUTSIDE_OBJECT) \
228 /* found a string outside of a container */ \
229 X(STRING_OUTSIDE_CONTAINER) \
230 /* Found a null byte in middle of string */ \
231 X(FOUND_NULL_BYTE) \
232 /* Current level exceeds limit specified in constructor */ \
233 X(LEVELS_EXCEEDED) \
234 /* Got a } as a result of an opening [ or vice versa */ \
235 X(BRACKET_MISMATCH) \
236 /* We expected a key, but got something else instead */ \
237 X(HKEY_EXPECTED) \
238 /* We got an illegal control character (bad whitespace or something) */ \
239 X(WEIRD_WHITESPACE) \
240 /* Found a \u-escape, but there were less than 4 following hex digits */ \
241 X(UESCAPE_TOOSHORT) \
242 /* Invalid two-character escape */ \
243 X(ESCAPE_INVALID) \
244 /* Trailing comma */ \
245 X(TRAILING_COMMA) \
246 /* An invalid number was passed in a numeric field */ \
247 X(INVALID_NUMBER) \
248 /* Value is missing for object */ \
249 X(VALUE_EXPECTED) \
250 /* The following are for JPR Stuff */ \
251 \
252 /* Found a literal '%' but it was only followed by a single valid hex digit */ \
253 X(PERCENT_BADHEX) \
254 /* jsonpointer URI is malformed '/' */ \
255 X(JPR_BADPATH) \
256 /* Duplicate slash */ \
257 X(JPR_DUPSLASH) \
258 /* No leading root */ \
259 X(JPR_NOROOT) \
260 /* Allocation failure */ \
261 X(ENOMEM) \
262 /* Invalid unicode codepoint detected (in case of escapes) */ \
263 X(INVALID_CODEPOINT)
264
265 typedef enum {
266 JSONSL_ERROR_SUCCESS = 0,
267 #define X(e) \
268 JSONSL_ERROR_##e,
269 JSONSL_XERR
270 #undef X
271 JSONSL_ERROR_GENERIC
272 } jsonsl_error_t;
273
274
275 /**
276 * A state is a single level of the stack.
277 * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
278 * will remain in tact until the item is popped.
279 *
280 * As a result, it means a parent state object may be accessed from a child
281 * object, (the parents fields will all be valid). This allows a user to create
282 * an ad-hoc hierarchy on top of the JSON one.
283 *
284 */
285 struct jsonsl_state_st {
286 /**
287 * The JSON object type
288 */
289 unsigned type;
290
291 /** If this element is special, then its extended type is here */
292 unsigned special_flags;
293
294 /**
295 * The position (in terms of number of bytes since the first call to
296 * jsonsl_feed()) at which the state was first pushed. This includes
297 * opening tokens, if applicable.
298 *
299 * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
300 * be the position of the first quote.
301 *
302 * @see jsonsl_st::pos which contains the _current_ position and can be
303 * used during a POP callback to get the length of the element.
304 */
305 size_t pos_begin;
306
307 /**FIXME: This is redundant as the same information can be derived from
308 * jsonsl_st::pos at pop-time */
309 size_t pos_cur;
310
311 /**
312 * Level of recursion into nesting. This is mainly a convenience
313 * variable, as this can technically be deduced from the lexer's
314 * level parameter (though the logic is not that simple)
315 */
316 unsigned int level;
317
318
319 /**
320 * how many elements in the object/list.
321 * For objects (hashes), an element is either
322 * a key or a value. Thus for one complete pair,
323 * nelem will be 2.
324 *
325 * For special types, this will hold the sum of the digits.
326 * This only holds true for values which are simple signed/unsigned
327 * numbers. Otherwise a special flag is set, and extra handling is not
328 * performed.
329 */
330 uint64_t nelem;
331
332
333
334 /*TODO: merge this and special_flags into a union */
335
336
337 /**
338 * Useful for an opening nest, this will prevent a callback from being
339 * invoked on this item or any of its children
340 */
341 int ignore_callback;
342
343 /**
344 * Counter which is incremented each time an escape ('\') is encountered.
345 * This is used internally for non-string types and should only be
346 * inspected by the user if the state actually represents a string
347 * type.
348 */
349 unsigned int nescapes;
350
351 /**
352 * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
353 * the macro expansion happens here.
354 *
355 * You can use these fields to store hierarchical or 'tagging' information
356 * for specific objects.
357 *
358 * See the documentation above for the lifetime of the state object (i.e.
359 * if the private data points to allocated memory, it should be freed
360 * when the object is popped, as the state object will be re-used)
361 */
362 #ifndef JSONSL_STATE_GENERIC
363 JSONSL_STATE_USER_FIELDS
364 #else
365
366 /**
367 * Otherwise, this is a simple void * pointer for anything you want
368 */
369 void *data;
370 #endif /* JSONSL_STATE_USER_FIELDS */
371 };
372
373 /**Gets the number of elements in the list.
374 * @param st The state. Must be of type JSONSL_T_LIST
375 * @return number of elements in the list
376 */
377 #define JSONSL_LIST_SIZE(st) ((st)->nelem)
378
379 /**Gets the number of key-value pairs in an object
380 * @param st The state. Must be of type JSONSL_T_OBJECT
381 * @return the number of key-value pairs in the object
382 */
383 #define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
384
385 /**Gets the numeric value.
386 * @param st The state. Must be of type JSONSL_T_SPECIAL and
387 * special_flags must have the JSONSL_SPECIALf_NUMERIC flag
388 * set.
389 * @return the numeric value of the state.
390 */
391 #define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
392
393 /*
394 * So now we need some special structure for keeping the
395 * JPR info in sync. Preferrably all in a single block
396 * of memory (there's no need for separate allocations.
397 * So we will define a 'table' with the following layout
398 *
399 * Level nPosbl JPR1_last JPR2_last JPR3_last
400 *
401 * 0 1 NOMATCH POSSIBLE POSSIBLE
402 * 1 0 NOMATCH NOMATCH COMPLETE
403 * [ table ends here because no further path is possible]
404 *
405 * Where the JPR..n corresponds to the number of JPRs
406 * requested, and nPosble is a quick flag to determine
407 *
408 * the number of possibilities. In the future this might
409 * be made into a proper 'jump' table,
410 *
411 * Since we always mark JPRs from the higher levels descending
412 * into the lower ones, a prospective child match would first
413 * look at the parent table to check the possibilities, and then
414 * see which ones were possible..
415 *
416 * Thus, the size of this blob would be (and these are all ints here)
417 * nLevels * nJPR * 2.
418 *
419 * the 'Width' of the table would be nJPR*2, and the 'height' would be
420 * nlevels
421 */
422
423 /**
424 * This is called when a stack change ocurs.
425 *
426 * @param jsn The lexer
427 * @param action The type of action, this can be PUSH or POP
428 * @param state A pointer to the stack currently affected by the action
429 * @param at A pointer to the position of the input buffer which triggered
430 * this action.
431 */
432 typedef void (*jsonsl_stack_callback)(
433 jsonsl_t jsn,
434 jsonsl_action_t action,
435 struct jsonsl_state_st* state,
436 const jsonsl_char_t *at);
437
438
439 /**
440 * This is called when an error is encountered.
441 * Sometimes it's possible to 'erase' characters (by replacing them
442 * with whitespace). If you think you have corrected the error, you
443 * can return a true value, in which case the parser will backtrack
444 * and try again.
445 *
446 * @param jsn The lexer
447 * @param error The error which was thrown
448 * @param state the current state
449 * @param a pointer to the position of the input buffer which triggered
450 * the error. Note that this is not const, this is because you have the
451 * possibility of modifying the character in an attempt to correct the
452 * error
453 *
454 * @return zero to bail, nonzero to try again (this only makes sense if
455 * the input buffer has been modified by this callback)
456 */
457 typedef int (*jsonsl_error_callback)(
458 jsonsl_t jsn,
459 jsonsl_error_t error,
460 struct jsonsl_state_st* state,
461 jsonsl_char_t *at);
462
463 struct jsonsl_st {
464 /** Public, read-only */
465
466 /** This is the current level of the stack */
467 unsigned int level;
468
469 /** Flag set to indicate we should stop processing */
470 unsigned int stopfl;
471
472 /**
473 * This is the current position, relative to the beginning
474 * of the stream.
475 */
476 size_t pos;
477
478 /** This is the 'bytes' variable passed to feed() */
479 const jsonsl_char_t *base;
480
481 /** Callback invoked for PUSH actions */
482 jsonsl_stack_callback action_callback_PUSH;
483
484 /** Callback invoked for POP actions */
485 jsonsl_stack_callback action_callback_POP;
486
487 /** Default callback for any action, if neither PUSH or POP callbacks are defined */
488 jsonsl_stack_callback action_callback;
489
490 /**
491 * Do not invoke callbacks for objects deeper than this level.
492 * NOTE: This field establishes the lower bound for ignored callbacks,
493 * and is thus misnamed. `min_ignore_level` would actually make more
494 * sense, but we don't want to break API.
495 */
496 unsigned int max_callback_level;
497
498 /** The error callback. Invoked when an error happens. Should not be NULL */
499 jsonsl_error_callback error_callback;
500
501 /* these are boolean flags you can modify. You will be called
502 * about notification for each of these types if the corresponding
503 * variable is true.
504 */
505
506 /**
507 * @name Callback Booleans.
508 * These determine whether a callback is to be invoked for certain types of objects
509 * @{*/
510
511 /** Boolean flag to enable or disable the invokcation for events on this type*/
512 int call_SPECIAL;
513 int call_OBJECT;
514 int call_LIST;
515 int call_STRING;
516 int call_HKEY;
517 /*@}*/
518
519 /**
520 * @name u-Escape handling
521 * Special handling for the \\u-f00d type sequences. These are meant
522 * to be translated back into the corresponding octet(s).
523 * A special callback (if set) is invoked with *at=='u'. An application
524 * may wish to temporarily suspend parsing and handle the 'u-' sequence
525 * internally (or not).
526 */
527
528 /*@{*/
529
530 /** Callback to be invoked for a u-escape */
531 jsonsl_stack_callback action_callback_UESCAPE;
532
533 /** Boolean flag, whether to invoke the callback */
534 int call_UESCAPE;
535
536 /** Boolean flag, whether we should return after encountering a u-escape:
537 * the callback is invoked and then we return if this is true
538 */
539 int return_UESCAPE;
540 /*@}*/
541
542 struct {
543 int allow_trailing_comma;
544 } options;
545
546 /** Put anything here */
547 void *data;
548
549 /*@{*/
550 /** Private */
551 int in_escape;
552 char expecting;
553 char tok_last;
554 int can_insert;
555 unsigned int levels_max;
556
557 #ifndef JSONSL_NO_JPR
558 size_t jpr_count;
559 jsonsl_jpr_t *jprs;
560
561 /* Root pointer for JPR matching information */
562 size_t *jpr_root;
563 #endif /* JSONSL_NO_JPR */
564 /*@}*/
565
566 /**
567 * This is the stack. Its upper bound is levels_max, or the
568 * nlevels argument passed to jsonsl_new. If you modify this structure,
569 * make sure that this member is last.
570 */
571 struct jsonsl_state_st stack[1];
572 };
573
574
575 /**
576 * Creates a new lexer object, with capacity for recursion up to nlevels
577 *
578 * @param nlevels maximum recursion depth
579 */
580 JSONSL_API
581 jsonsl_t jsonsl_new(int nlevels);
582
583 /**
584 * Feeds data into the lexer.
585 *
586 * @param jsn the lexer object
587 * @param bytes new data to be fed
588 * @param nbytes size of new data
589 */
590 JSONSL_API
591 void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
592
593 /**
594 * Resets the internal parser state. This does not free the parser
595 * but does clean it internally, so that the next time feed() is called,
596 * it will be treated as a new stream
597 *
598 * @param jsn the lexer
599 */
600 JSONSL_API
601 void jsonsl_reset(jsonsl_t jsn);
602
603 /**
604 * Frees the lexer, cleaning any allocated memory taken
605 *
606 * @param jsn the lexer
607 */
608 JSONSL_API
609 void jsonsl_destroy(jsonsl_t jsn);
610
611 /**
612 * Gets the 'parent' element, given the current one
613 *
614 * @param jsn the lexer
615 * @param cur the current nest, which should be a struct jsonsl_nest_st
616 */
617 static JSONSL_INLINE
jsonsl_last_state(const jsonsl_t jsn,const struct jsonsl_state_st * state)618 struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
619 const struct jsonsl_state_st *state)
620 {
621 /* Don't complain about overriding array bounds */
622 if (state->level > 1) {
623 return jsn->stack + state->level - 1;
624 } else {
625 return NULL;
626 }
627 }
628
629 /**
630 * Gets the state of the last fully consumed child of this parent. This is
631 * only valid in the parent's POP callback.
632 *
633 * @param the lexer
634 * @return A pointer to the child.
635 */
636 static JSONSL_INLINE
jsonsl_last_child(const jsonsl_t jsn,const struct jsonsl_state_st * parent)637 struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
638 const struct jsonsl_state_st *parent)
639 {
640 return jsn->stack + (parent->level + 1);
641 }
642
643 /**Call to instruct the parser to stop parsing and return. This is valid
644 * only from within a callback */
645 static JSONSL_INLINE
jsonsl_stop(jsonsl_t jsn)646 void jsonsl_stop(jsonsl_t jsn)
647 {
648 jsn->stopfl = 1;
649 }
650
651 /**
652 * This enables receiving callbacks on all events. Doesn't do
653 * anything special but helps avoid some boilerplate.
654 * This does not touch the UESCAPE callbacks or flags.
655 */
656 static JSONSL_INLINE
jsonsl_enable_all_callbacks(jsonsl_t jsn)657 void jsonsl_enable_all_callbacks(jsonsl_t jsn)
658 {
659 jsn->call_HKEY = 1;
660 jsn->call_STRING = 1;
661 jsn->call_OBJECT = 1;
662 jsn->call_SPECIAL = 1;
663 jsn->call_LIST = 1;
664 }
665
666 /**
667 * A macro which returns true if the current state object can
668 * have children. This means a list type or an object type.
669 */
670 #define JSONSL_STATE_IS_CONTAINER(state) \
671 (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
672
673 /**
674 * These two functions, dump a string representation
675 * of the error or type, respectively. They will never
676 * return NULL
677 */
678 JSONSL_API
679 const char* jsonsl_strerror(jsonsl_error_t err);
680 JSONSL_API
681 const char* jsonsl_strtype(jsonsl_type_t jt);
682
683 /**
684 * Dumps global metrics to the screen. This is a noop unless
685 * jsonsl was compiled with JSONSL_USE_METRICS
686 */
687 JSONSL_API
688 void jsonsl_dump_global_metrics(void);
689
690 /* This macro just here for editors to do code folding */
691 #ifndef JSONSL_NO_JPR
692
693 /**
694 * @name JSON Pointer API
695 *
696 * JSONPointer API. This isn't really related to the lexer (at least not yet)
697 * JSONPointer provides an extremely simple specification for providing
698 * locations within JSON objects. We will extend it a bit and allow for
699 * providing 'wildcard' characters by which to be able to 'query' the stream.
700 *
701 * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
702 *
703 * Currently I'm implementing the 'single query' API which can only use a single
704 * query component. In the future I will integrate my yet-to-be-published
705 * Boyer-Moore-esque prefix searching implementation, in order to allow
706 * multiple paths to be merged into one for quick and efficient searching.
707 *
708 *
709 * JPR (as we'll refer to it within the source) can be used by splitting
710 * the components into mutliple sections, and incrementally 'track' each
711 * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
712 * callback for an object, we will check to see whether the index matching
713 * the component corresponding to the current level contains a match
714 * for our path.
715 *
716 * In order to do this properly, a structure must be maintained within the
717 * parent indicating whether its children are possible matches. This flag
718 * will be 'inherited' by call children which may conform to the match
719 * specification, and discarded by all which do not (thereby eliminating
720 * their children from inheriting it).
721 *
722 * A successful match is a complete one. One can provide multiple paths with
723 * multiple levels of matches e.g.
724 * /foo/bar/baz/^/blah
725 *
726 * @{
727 */
728
729 /** The wildcard character */
730 #ifndef JSONSL_PATH_WILDCARD_CHAR
731 #define JSONSL_PATH_WILDCARD_CHAR '^'
732 #endif /* WILDCARD_CHAR */
733
734 #define JSONSL_XMATCH \
735 X(COMPLETE,1) \
736 X(POSSIBLE,0) \
737 X(NOMATCH,-1) \
738 X(TYPE_MISMATCH, -2)
739
740 typedef enum {
741
742 #define X(T,v) \
743 JSONSL_MATCH_##T = v,
744 JSONSL_XMATCH
745
746 #undef X
747 JSONSL_MATCH_UNKNOWN
748 } jsonsl_jpr_match_t;
749
750 typedef enum {
751 JSONSL_PATH_STRING = 1,
752 JSONSL_PATH_WILDCARD,
753 JSONSL_PATH_NUMERIC,
754 JSONSL_PATH_ROOT,
755
756 /* Special */
757 JSONSL_PATH_INVALID = -1,
758 JSONSL_PATH_NONE = 0
759 } jsonsl_jpr_type_t;
760
761 struct jsonsl_jpr_component_st {
762 /** The string the component points to */
763 char *pstr;
764 /** if this is a numeric type, the number is 'cached' here */
765 unsigned long idx;
766 /** The length of the string */
767 size_t len;
768 /** The type of component (NUMERIC or STRING) */
769 jsonsl_jpr_type_t ptype;
770
771 /** Set this to true to enforce type checking between dict keys and array
772 * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
773 * that an array index is actually a child of a dictionary. */
774 short is_arridx;
775
776 /* Extra fields (for more advanced searches. Default is empty) */
777 JSONSL_JPR_COMPONENT_USER_FIELDS
778 };
779
780 struct jsonsl_jpr_st {
781 /** Path components */
782 struct jsonsl_jpr_component_st *components;
783 size_t ncomponents;
784
785 /**Type of the match to be expected. If nonzero, will be compared against
786 * the actual type */
787 unsigned match_type;
788
789 /** Base of allocated string for components */
790 char *basestr;
791
792 /** The original match string. Useful for returning to the user */
793 char *orig;
794 size_t norig;
795 };
796
797 /**
798 * Create a new JPR object.
799 *
800 * @param path the JSONPointer path specification.
801 * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
802 * then more details will be in this variable.
803 *
804 * @return a new jsonsl_jpr_t object, or NULL on error.
805 */
806 JSONSL_API
807 jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
808
809 /**
810 * Destroy a JPR object
811 */
812 JSONSL_API
813 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
814
815 /**
816 * Match a JSON object against a type and specific level
817 *
818 * @param jpr the JPR object
819 * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
820 * @param parent_level the level of the parent
821 * @param key the 'key' of the child. If the parent is an array, this should be
822 * empty.
823 * @param nkey - the length of the key. If the parent is an array (T_LIST), then
824 * this should be the current index.
825 *
826 * NOTE: The key of the child means any kind of associative data related to the
827 * element. Thus: <<< { "foo" : [ >>,
828 * the opening array's key is "foo".
829 *
830 * @return a status constant. This indicates whether a match was excluded, possible,
831 * or successful.
832 */
833 JSONSL_API
834 jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
835 unsigned int parent_type,
836 unsigned int parent_level,
837 const char *key, size_t nkey);
838
839 /**
840 * Alternate matching algorithm. This matching algorithm does not use
841 * JSONPointer but relies on a more structured searching mechanism. It
842 * assumes that there is a clear distinction between array indices and
843 * object keys. In this case, the jsonsl_path_component_st::ptype should
844 * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
845 * jsonsl_path_comonent_st::is_arridx field will be removed in a future
846 * version).
847 *
848 * @param jpr The path
849 * @param parent The parent structure. Can be NULL if this is the root object
850 * @param child The child structure. Should not be NULL
851 * @param key Object key, if an object
852 * @param nkey Length of object key
853 * @return Status constant if successful
854 *
855 * @note
856 * For successful matching, both the key and the path itself should be normalized
857 * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
858 * should currently be done in the application. Another version of this function
859 * may use a temporary buffer in such circumstances (allocated by the application).
860 *
861 * Since this function also checks the state of the child, it should only
862 * be called on PUSH callbacks, and not POP callbacks
863 */
864 JSONSL_API
865 jsonsl_jpr_match_t
866 jsonsl_path_match(jsonsl_jpr_t jpr,
867 const struct jsonsl_state_st *parent,
868 const struct jsonsl_state_st *child,
869 const char *key, size_t nkey);
870
871
872 /**
873 * Associate a set of JPR objects with a lexer instance.
874 * This should be called before the lexer has been fed any data (and
875 * behavior is undefined if you don't adhere to this).
876 *
877 * After using this function, you may subsequently call match_state() on
878 * given states (presumably from within the callbacks).
879 *
880 * Note that currently the first JPR is the quickest and comes
881 * pre-allocated with the state structure. Further JPR objects
882 * are chained.
883 *
884 * @param jsn The lexer
885 * @param jprs An array of jsonsl_jpr_t objects
886 * @param njprs How many elements in the jprs array.
887 */
888 JSONSL_API
889 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
890 jsonsl_jpr_t *jprs,
891 size_t njprs);
892
893 /**
894 * This follows the same semantics as the normal match,
895 * except we infer parent and type information from the relevant state objects.
896 * The match status (for all possible JPR objects) is set in the *out parameter.
897 *
898 * If a match has succeeded, then its JPR object will be returned. In all other
899 * instances, NULL is returned;
900 *
901 * @param jpr The jsonsl_jpr_t handle
902 * @param state The jsonsl_state_st which is a candidate
903 * @param key The hash key (if applicable, can be NULL if parent is list)
904 * @param nkey Length of hash key (if applicable, can be zero if parent is list)
905 * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
906 * the match result
907 *
908 * @return If a match was completed in full, then the JPR object containing
909 * the matching path will be returned. Otherwise, the return is NULL (note, this
910 * does not mean matching has failed, it can still be part of the match: check
911 * the out parameter).
912 */
913 JSONSL_API
914 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
915 struct jsonsl_state_st *state,
916 const char *key,
917 size_t nkey,
918 jsonsl_jpr_match_t *out);
919
920
921 /**
922 * Cleanup any memory allocated and any states set by
923 * match_state_init() and match_state()
924 * @param jsn The lexer
925 */
926 JSONSL_API
927 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
928
929 /**
930 * Return a string representation of the match result returned by match()
931 */
932 JSONSL_API
933 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
934
935 /* @}*/
936
937 /**
938 * Utility function to convert escape sequences into their original form.
939 *
940 * The decoders I've sampled do not seem to specify a standard behavior of what
941 * to escape/unescape.
942 *
943 * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
944 * characters (0x00-0x1f) be escaped. It is often common for applications
945 * to escape a '/' - however this may also be desired behavior. the JSON
946 * spec is not clear on this, and therefore jsonsl leaves it up to you.
947 *
948 * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
949 * true when dealing with 'u-escapes' which can be expressed perfectly fine
950 * as utf8. One use case for normalization is JPR string comparison, in which
951 * case two effectively equivalent strings may not match because one is using
952 * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
953 * an empty `toEscape` table, enabling only the `u` index.
954 *
955 * @param in The input string.
956 * @param out An allocated output (should be the same size as in)
957 * @param len the size of the buffer
958 * @param toEscape - A sparse array of characters to unescape. Characters
959 * which are not present in this array, e.g. toEscape['c'] == 0 will be
960 * ignored and passed to the output in their original form.
961 * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
962 * then this variable will have the SPECIALf_NONASCII flag on.
963 *
964 * @param err A pointer to an error variable. If an error ocurrs, it will be
965 * set in this variable
966 * @param errat If not null and an error occurs, this will be set to point
967 * to the position within the string at which the offending character was
968 * encountered.
969 *
970 * @return The effective size of the output buffer.
971 *
972 * @note
973 * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
974 * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
975 * which while still correct in some sense was confusing for many (especially
976 * considering that the inputs were variations of char).
977 *
978 * @note
979 * The output buffer will never be larger than the input buffer, since
980 * standard escape sequences (i.e. '\t') occupy two bytes in the source
981 * but only one byte (when unescaped) in the output. Likewise u-escapes
982 * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
983 * two bytes when escaped.
984 */
985 JSONSL_API
986 size_t jsonsl_util_unescape_ex(const char *in,
987 char *out,
988 size_t len,
989 const int toEscape[128],
990 unsigned *oflags,
991 jsonsl_error_t *err,
992 const char **errat);
993
994 /**
995 * Convenience macro to avoid passing too many parameters
996 */
997 #define jsonsl_util_unescape(in, out, len, toEscape, err) \
998 jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
999
1000 #endif /* JSONSL_NO_JPR */
1001
1002 #ifdef __cplusplus
1003 }
1004 #endif /* __cplusplus */
1005
1006 #endif /* JSONSL_H_ */
1007