1 /* Copyright (C) 2012-2015 Mark Nunberg.
2  *
3  * See included LICENSE file for license details.
4  */
5 
6 #include "jsonsl.h"
7 #include <assert.h>
8 #include <limits.h>
9 #include <ctype.h>
10 
11 #ifdef JSONSL_USE_METRICS
12 #define XMETRICS \
13     X(STRINGY_INSIGNIFICANT) \
14     X(STRINGY_SLOWPATH) \
15     X(ALLOWED_WHITESPACE) \
16     X(QUOTE_FASTPATH) \
17     X(SPECIAL_FASTPATH) \
18     X(SPECIAL_WSPOP) \
19     X(SPECIAL_SLOWPATH) \
20     X(GENERIC) \
21     X(STRUCTURAL_TOKEN) \
22     X(SPECIAL_SWITCHFIRST) \
23     X(STRINGY_CATCH) \
24     X(NUMBER_FASTPATH) \
25     X(ESCAPES) \
26     X(TOTAL) \
27 
28 struct jsonsl_metrics_st {
29 #define X(m) \
30     unsigned long metric_##m;
31     XMETRICS
32 #undef X
33 };
34 
35 static struct jsonsl_metrics_st GlobalMetrics = { 0 };
36 static unsigned long GenericCounter[0x100] = { 0 };
37 static unsigned long StringyCatchCounter[0x100] = { 0 };
38 
39 #define INCR_METRIC(m) \
40     GlobalMetrics.metric_##m++;
41 
42 #define INCR_GENERIC(c) \
43         INCR_METRIC(GENERIC); \
44         GenericCounter[c]++; \
45 
46 #define INCR_STRINGY_CATCH(c) \
47     INCR_METRIC(STRINGY_CATCH); \
48     StringyCatchCounter[c]++;
49 
50 JSONSL_API
jsonsl_dump_global_metrics(void)51 void jsonsl_dump_global_metrics(void)
52 {
53     int ii;
54     printf("JSONSL Metrics:\n");
55 #define X(m) \
56     printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
57            (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
58     XMETRICS
59 #undef X
60     printf("Generic Characters:\n");
61     for (ii = 0; ii < 0xff; ii++) {
62         if (GenericCounter[ii]) {
63             printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
64         }
65     }
66     printf("Weird string loop\n");
67     for (ii = 0; ii < 0xff; ii++) {
68         if (StringyCatchCounter[ii]) {
69             printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
70         }
71     }
72 }
73 
74 #else
75 #define INCR_METRIC(m)
76 #define INCR_GENERIC(c)
77 #define INCR_STRINGY_CATCH(c)
78 JSONSL_API
jsonsl_dump_global_metrics(void)79 void jsonsl_dump_global_metrics(void) { }
80 #endif /* JSONSL_USE_METRICS */
81 
82 #define CASE_DIGITS \
83 case '1': \
84 case '2': \
85 case '3': \
86 case '4': \
87 case '5': \
88 case '6': \
89 case '7': \
90 case '8': \
91 case '9': \
92 case '0':
93 
94 static unsigned extract_special(unsigned);
95 static int is_special_end(unsigned);
96 static int is_allowed_whitespace(unsigned);
97 static int is_allowed_escape(unsigned);
98 static char get_escape_equiv(unsigned);
99 
100 JSONSL_API
jsonsl_new(int nlevels)101 jsonsl_t jsonsl_new(int nlevels)
102 {
103     struct jsonsl_st *jsn = (struct jsonsl_st *)
104             calloc(1, sizeof (*jsn) +
105                     ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
106             );
107 
108     jsn->levels_max = nlevels;
109     jsn->max_callback_level = -1;
110     jsonsl_reset(jsn);
111     return jsn;
112 }
113 
114 JSONSL_API
jsonsl_reset(jsonsl_t jsn)115 void jsonsl_reset(jsonsl_t jsn)
116 {
117     unsigned int ii;
118     jsn->tok_last = 0;
119     jsn->can_insert = 1;
120     jsn->pos = 0;
121     jsn->level = 0;
122     jsn->stopfl = 0;
123     jsn->in_escape = 0;
124     jsn->expecting = 0;
125 
126     memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st)));
127 
128     for (ii = 0; ii < jsn->levels_max; ii++) {
129         jsn->stack[ii].level = ii;
130     }
131 }
132 
133 JSONSL_API
jsonsl_destroy(jsonsl_t jsn)134 void jsonsl_destroy(jsonsl_t jsn)
135 {
136     if (jsn) {
137         free(jsn);
138     }
139 }
140 
141 
142 #define FASTPARSE_EXHAUSTED 1
143 #define FASTPARSE_BREAK 0
144 static const int chrt_string_nopass[0x100] = { JSONSL_CHARTABLE_string_nopass };
145 
146 /*
147  * This function is meant to accelerate string parsing, reducing the main loop's
148  * check if we are indeed a string.
149  *
150  * @param jsn the parser
151  * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
152  * @param[in,out] nbytes_p A pointer to the current size of the buffer
153  * @return true if all bytes have been exhausted (and thus the main loop can
154  * return), false if a special character was examined which requires greater
155  * examination.
156  */
157 static int
jsonsl__str_fastparse(jsonsl_t jsn,const jsonsl_uchar_t ** bytes_p,size_t * nbytes_p)158 jsonsl__str_fastparse(jsonsl_t jsn,
159                       const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
160 {
161     int exhausted = 1;
162     size_t nbytes = *nbytes_p;
163     const jsonsl_uchar_t *bytes = *bytes_p;
164 
165     for (; nbytes; nbytes--, bytes++) {
166         if (
167 #ifdef JSONSL_USE_WCHAR
168                 *bytes >= 0x100 ||
169 #endif /* JSONSL_USE_WCHAR */
170                 (!chrt_string_nopass[*bytes])) {
171             INCR_METRIC(TOTAL);
172             INCR_METRIC(STRINGY_INSIGNIFICANT);
173         } else {
174             exhausted = 0;
175             break;
176         }
177     }
178 
179     /* Once we're done here, re-calculate the position variables */
180     jsn->pos += (*nbytes_p - nbytes);
181     if (exhausted) {
182         return FASTPARSE_EXHAUSTED;
183     }
184 
185     *nbytes_p = nbytes;
186     *bytes_p = bytes;
187     return FASTPARSE_BREAK;
188 }
189 
190 /* Functions exactly like str_fastparse, except it also accepts a 'state'
191  * argument, since the number's value is updated in the state. */
192 static int
jsonsl__num_fastparse(jsonsl_t jsn,const jsonsl_uchar_t ** bytes_p,size_t * nbytes_p,struct jsonsl_state_st * state)193 jsonsl__num_fastparse(jsonsl_t jsn,
194                       const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
195                       struct jsonsl_state_st *state)
196 {
197     int exhausted = 1;
198     size_t nbytes = *nbytes_p;
199     const jsonsl_uchar_t *bytes = *bytes_p;
200 
201     for (; nbytes; nbytes--, bytes++) {
202         jsonsl_uchar_t c = *bytes;
203         if (isdigit(c)) {
204             INCR_METRIC(TOTAL);
205             INCR_METRIC(NUMBER_FASTPATH);
206             state->nelem = (state->nelem * 10) + (c - 0x30);
207         } else {
208             exhausted = 0;
209             break;
210         }
211     }
212     jsn->pos += (*nbytes_p - nbytes);
213     if (exhausted) {
214         return FASTPARSE_EXHAUSTED;
215     }
216     *nbytes_p = nbytes;
217     *bytes_p = bytes;
218     return FASTPARSE_BREAK;
219 }
220 
221 JSONSL_API
222 void
jsonsl_feed(jsonsl_t jsn,const jsonsl_char_t * bytes,size_t nbytes)223 jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
224 {
225 
226 #define INVOKE_ERROR(eb) \
227     if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
228         goto GT_AGAIN; \
229     } \
230     return;
231 
232 #define STACK_PUSH \
233     if (jsn->level >= (levels_max-1)) { \
234         jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
235         return; \
236     } \
237     state = jsn->stack + (++jsn->level); \
238     state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
239     state->pos_begin = jsn->pos;
240 
241 #define STACK_POP_NOPOS \
242     state->pos_cur = jsn->pos; \
243     state = jsn->stack + (--jsn->level);
244 
245 
246 #define STACK_POP \
247     STACK_POP_NOPOS; \
248     state->pos_cur = jsn->pos;
249 
250 #define CALLBACK_AND_POP_NOPOS(T) \
251         state->pos_cur = jsn->pos; \
252         DO_CALLBACK(T, POP); \
253         state->nescapes = 0; \
254         state = jsn->stack + (--jsn->level);
255 
256 #define CALLBACK_AND_POP(T) \
257         CALLBACK_AND_POP_NOPOS(T); \
258         state->pos_cur = jsn->pos;
259 
260 #define SPECIAL_POP \
261     CALLBACK_AND_POP(SPECIAL); \
262     jsn->expecting = 0; \
263     jsn->tok_last = 0; \
264 
265 #define CUR_CHAR (*(jsonsl_uchar_t*)c)
266 
267 #define DO_CALLBACK(T, action) \
268     if (jsn->call_##T && \
269             jsn->max_callback_level > state->level && \
270             state->ignore_callback == 0) { \
271         \
272         if (jsn->action_callback_##action) { \
273             jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
274         } else if (jsn->action_callback) { \
275             jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
276         } \
277         if (jsn->stopfl) { return; } \
278     }
279 
280     /**
281      * Verifies that we are able to insert the (non-string) item into a hash.
282      */
283 #define ENSURE_HVAL \
284     if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
285         INVOKE_ERROR(HKEY_EXPECTED); \
286     }
287 
288 #define VERIFY_SPECIAL(lit) \
289         if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
290             INVOKE_ERROR(SPECIAL_EXPECTED); \
291         }
292 
293 #define STATE_SPECIAL_LENGTH \
294     (state)->nescapes
295 
296 #define IS_NORMAL_NUMBER \
297     ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
298         (state)->special_flags == JSONSL_SPECIALf_SIGNED)
299 
300 #define STATE_NUM_LAST jsn->tok_last
301 
302 #define CONTINUE_NEXT_CHAR() continue
303 
304     const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
305     size_t levels_max = jsn->levels_max;
306     struct jsonsl_state_st *state = jsn->stack + jsn->level;
307     jsn->base = bytes;
308 
309     for (; nbytes; nbytes--, jsn->pos++, c++) {
310         unsigned state_type;
311         INCR_METRIC(TOTAL);
312 
313         GT_AGAIN:
314         state_type = state->type;
315         /* Most common type is typically a string: */
316         if (state_type & JSONSL_Tf_STRINGY) {
317             /* Special escape handling for some stuff */
318             if (jsn->in_escape) {
319                 jsn->in_escape = 0;
320                 if (!is_allowed_escape(CUR_CHAR)) {
321                     INVOKE_ERROR(ESCAPE_INVALID);
322                 } else if (CUR_CHAR == 'u') {
323                     DO_CALLBACK(UESCAPE, UESCAPE);
324                     if (jsn->return_UESCAPE) {
325                         return;
326                     }
327                 }
328                 CONTINUE_NEXT_CHAR();
329             }
330 
331             if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
332                     FASTPARSE_EXHAUSTED) {
333                 /* No need to readjust variables as we've exhausted the iterator */
334                 return;
335             } else {
336                 if (CUR_CHAR == '"') {
337                     goto GT_QUOTE;
338                 } else if (CUR_CHAR == '\\') {
339                     goto GT_ESCAPE;
340                 } else {
341                     INVOKE_ERROR(WEIRD_WHITESPACE);
342                 }
343             }
344             INCR_METRIC(STRINGY_SLOWPATH);
345 
346         } else if (state_type == JSONSL_T_SPECIAL) {
347             /* Fast track for signed/unsigned */
348             if (IS_NORMAL_NUMBER) {
349                 if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
350                         FASTPARSE_EXHAUSTED) {
351                     return;
352                 } else {
353                     goto GT_SPECIAL_NUMERIC;
354                 }
355             } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
356                 if (!isdigit(CUR_CHAR)) {
357                     INVOKE_ERROR(INVALID_NUMBER);
358                 }
359 
360                 if (CUR_CHAR == '0') {
361                     state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
362                 } else if (isdigit(CUR_CHAR)) {
363                     state->special_flags = JSONSL_SPECIALf_SIGNED;
364                     state->nelem = CUR_CHAR - 0x30;
365                 } else {
366                     INVOKE_ERROR(INVALID_NUMBER);
367                 }
368                 CONTINUE_NEXT_CHAR();
369 
370             } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
371                 if (isdigit(CUR_CHAR)) {
372                     /* Following a zero! */
373                     INVOKE_ERROR(INVALID_NUMBER);
374                 }
375                 /* Unset the 'zero' flag: */
376                 if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
377                     state->special_flags = JSONSL_SPECIALf_SIGNED;
378                 } else {
379                     state->special_flags = JSONSL_SPECIALf_UNSIGNED;
380                 }
381                 goto GT_SPECIAL_NUMERIC;
382             }
383 
384             if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
385                 GT_SPECIAL_NUMERIC:
386                 switch (CUR_CHAR) {
387                 CASE_DIGITS
388                     STATE_NUM_LAST = '1';
389                     CONTINUE_NEXT_CHAR();
390 
391                 case '.':
392                     if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
393                         INVOKE_ERROR(INVALID_NUMBER);
394                     }
395                     state->special_flags |= JSONSL_SPECIALf_FLOAT;
396                     STATE_NUM_LAST = '.';
397                     CONTINUE_NEXT_CHAR();
398 
399                 case 'e':
400                 case 'E':
401                     if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
402                         INVOKE_ERROR(INVALID_NUMBER);
403                     }
404                     state->special_flags |= JSONSL_SPECIALf_EXPONENT;
405                     STATE_NUM_LAST = 'e';
406                     CONTINUE_NEXT_CHAR();
407 
408                 case '-':
409                 case '+':
410                     if (STATE_NUM_LAST != 'e') {
411                         INVOKE_ERROR(INVALID_NUMBER);
412                     }
413                     STATE_NUM_LAST = '-';
414                     CONTINUE_NEXT_CHAR();
415 
416                 default:
417                     if (is_special_end(CUR_CHAR)) {
418                         goto GT_SPECIAL_POP;
419                     }
420                     INVOKE_ERROR(INVALID_NUMBER);
421                     break;
422                 }
423             }
424             /* else if (!NUMERIC) */
425             if (!is_special_end(CUR_CHAR)) {
426                 STATE_SPECIAL_LENGTH++;
427 
428                 /* Verify TRUE, FALSE, NULL */
429                 if (state->special_flags == JSONSL_SPECIALf_TRUE) {
430                     VERIFY_SPECIAL("true");
431                 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
432                     VERIFY_SPECIAL("false");
433                 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
434                     VERIFY_SPECIAL("null");
435                 }
436                 INCR_METRIC(SPECIAL_FASTPATH);
437                 CONTINUE_NEXT_CHAR();
438             }
439 
440             GT_SPECIAL_POP:
441             if (IS_NORMAL_NUMBER) {
442                 /* Nothing */
443             } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
444                     state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
445                 /* 0 is unsigned! */
446                 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
447             } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
448                 /* Still in dash! */
449                 INVOKE_ERROR(INVALID_NUMBER);
450             } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
451                 /* Check that we're not at the end of a token */
452                 if (STATE_NUM_LAST != '1') {
453                     INVOKE_ERROR(INVALID_NUMBER);
454                 }
455             } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
456                 if (STATE_SPECIAL_LENGTH != 4) {
457                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
458                 }
459                 state->nelem = 1;
460             } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
461                 if (STATE_SPECIAL_LENGTH != 5) {
462                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
463                 }
464             } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
465                 if (STATE_SPECIAL_LENGTH != 4) {
466                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
467                 }
468             }
469             SPECIAL_POP;
470             jsn->expecting = ',';
471             if (is_allowed_whitespace(CUR_CHAR)) {
472                 CONTINUE_NEXT_CHAR();
473             }
474             /**
475              * This works because we have a non-whitespace token
476              * which is not a special token. If this is a structural
477              * character then it will be gracefully handled by the
478              * switch statement. Otherwise it will default to the 'special'
479              * state again,
480              */
481             goto GT_STRUCTURAL_TOKEN;
482         } else if (is_allowed_whitespace(CUR_CHAR)) {
483             INCR_METRIC(ALLOWED_WHITESPACE);
484             /* So we're not special. Harmless insignificant whitespace
485              * passthrough
486              */
487             CONTINUE_NEXT_CHAR();
488         } else if (extract_special(CUR_CHAR)) {
489             /* not a string, whitespace, or structural token. must be special */
490             goto GT_SPECIAL_BEGIN;
491         }
492 
493         INCR_GENERIC(CUR_CHAR);
494 
495         if (CUR_CHAR == '"') {
496             GT_QUOTE:
497             jsn->can_insert = 0;
498             switch (state_type) {
499 
500             /* the end of a string or hash key */
501             case JSONSL_T_STRING:
502                 CALLBACK_AND_POP(STRING);
503                 CONTINUE_NEXT_CHAR();
504             case JSONSL_T_HKEY:
505                 CALLBACK_AND_POP(HKEY);
506                 CONTINUE_NEXT_CHAR();
507 
508             case JSONSL_T_OBJECT:
509                 state->nelem++;
510                 if ( (state->nelem-1) % 2 ) {
511                     /* Odd, this must be a hash value */
512                     if (jsn->tok_last != ':') {
513                         INVOKE_ERROR(MISSING_TOKEN);
514                     }
515                     jsn->expecting = ','; /* Can't figure out what to expect next */
516                     jsn->tok_last = 0;
517 
518                     STACK_PUSH;
519                     state->type = JSONSL_T_STRING;
520                     DO_CALLBACK(STRING, PUSH);
521 
522                 } else {
523                     /* hash key */
524                     if (jsn->expecting != '"') {
525                         INVOKE_ERROR(STRAY_TOKEN);
526                     }
527                     jsn->tok_last = 0;
528                     jsn->expecting = ':';
529 
530                     STACK_PUSH;
531                     state->type = JSONSL_T_HKEY;
532                     DO_CALLBACK(HKEY, PUSH);
533                 }
534                 CONTINUE_NEXT_CHAR();
535 
536             case JSONSL_T_LIST:
537                 state->nelem++;
538                 STACK_PUSH;
539                 state->type = JSONSL_T_STRING;
540                 jsn->expecting = ',';
541                 jsn->tok_last = 0;
542                 DO_CALLBACK(STRING, PUSH);
543                 CONTINUE_NEXT_CHAR();
544 
545             case JSONSL_T_SPECIAL:
546                 INVOKE_ERROR(STRAY_TOKEN);
547                 break;
548 
549             default:
550                 INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
551                 break;
552             } /* switch(state->type) */
553         } else if (CUR_CHAR == '\\') {
554             GT_ESCAPE:
555             INCR_METRIC(ESCAPES);
556         /* Escape */
557             if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
558                 INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
559             }
560             state->nescapes++;
561             jsn->in_escape = 1;
562             CONTINUE_NEXT_CHAR();
563         } /* " or \ */
564 
565         GT_STRUCTURAL_TOKEN:
566         switch (CUR_CHAR) {
567         case ':':
568             INCR_METRIC(STRUCTURAL_TOKEN);
569             if (jsn->expecting != CUR_CHAR) {
570                 INVOKE_ERROR(STRAY_TOKEN);
571             }
572             jsn->tok_last = ':';
573             jsn->can_insert = 1;
574             jsn->expecting = '"';
575             CONTINUE_NEXT_CHAR();
576 
577         case ',':
578             INCR_METRIC(STRUCTURAL_TOKEN);
579             /**
580              * The comma is one of the more generic tokens.
581              * In the context of an OBJECT, the can_insert flag
582              * should never be set, and no other action is
583              * necessary.
584              */
585             if (jsn->expecting != CUR_CHAR) {
586                 /* make this branch execute only when we haven't manually
587                  * just placed the ',' in the expecting register.
588                  */
589                 INVOKE_ERROR(STRAY_TOKEN);
590             }
591 
592             if (state->type == JSONSL_T_OBJECT) {
593                 /* end of hash value, expect a string as a hash key */
594                 jsn->expecting = '"';
595             } else {
596                 jsn->can_insert = 1;
597             }
598 
599             jsn->tok_last = ',';
600             jsn->expecting = '"';
601             CONTINUE_NEXT_CHAR();
602 
603             /* new list or object */
604             /* hashes are more common */
605         case '{':
606         case '[':
607             INCR_METRIC(STRUCTURAL_TOKEN);
608             if (!jsn->can_insert) {
609                 INVOKE_ERROR(CANT_INSERT);
610             }
611 
612             ENSURE_HVAL;
613             state->nelem++;
614 
615             STACK_PUSH;
616             /* because the constants match the opening delimiters, we can do this: */
617             state->type = CUR_CHAR;
618             state->nelem = 0;
619             jsn->can_insert = 1;
620             if (CUR_CHAR == '{') {
621                 /* If we're a hash, we expect a key first, which is quouted */
622                 jsn->expecting = '"';
623             }
624             if (CUR_CHAR == JSONSL_T_OBJECT) {
625                 DO_CALLBACK(OBJECT, PUSH);
626             } else {
627                 DO_CALLBACK(LIST, PUSH);
628             }
629             jsn->tok_last = 0;
630             CONTINUE_NEXT_CHAR();
631 
632             /* closing of list or object */
633         case '}':
634         case ']':
635             INCR_METRIC(STRUCTURAL_TOKEN);
636             if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
637                 INVOKE_ERROR(TRAILING_COMMA);
638             }
639 
640             jsn->can_insert = 0;
641             jsn->level--;
642             jsn->expecting = ',';
643             jsn->tok_last = 0;
644             if (CUR_CHAR == ']') {
645                 if (state->type != '[') {
646                     INVOKE_ERROR(BRACKET_MISMATCH);
647                 }
648                 DO_CALLBACK(LIST, POP);
649             } else {
650                 if (state->type != '{') {
651                     INVOKE_ERROR(BRACKET_MISMATCH);
652                 } else if (state->nelem && state->nelem % 2 != 0) {
653                     INVOKE_ERROR(VALUE_EXPECTED);
654                 }
655                 DO_CALLBACK(OBJECT, POP);
656             }
657             state = jsn->stack + jsn->level;
658             state->pos_cur = jsn->pos;
659             CONTINUE_NEXT_CHAR();
660 
661         default:
662             GT_SPECIAL_BEGIN:
663             /**
664              * Not a string, not a structural token, and not benign whitespace.
665              * Technically we should iterate over the character always, but since
666              * we are not doing full numerical/value decoding anyway (but only hinting),
667              * we only check upon entry.
668              */
669             if (state->type != JSONSL_T_SPECIAL) {
670                 int special_flags = extract_special(CUR_CHAR);
671                 if (!special_flags) {
672                     /**
673                      * Try to do some heuristics here anyway to figure out what kind of
674                      * error this is. The 'special' case is a fallback scenario anyway.
675                      */
676                     if (CUR_CHAR == '\0') {
677                         INVOKE_ERROR(FOUND_NULL_BYTE);
678                     } else if (CUR_CHAR < 0x20) {
679                         INVOKE_ERROR(WEIRD_WHITESPACE);
680                     } else {
681                         INVOKE_ERROR(SPECIAL_EXPECTED);
682                     }
683                 }
684                 ENSURE_HVAL;
685                 state->nelem++;
686                 if (!jsn->can_insert) {
687                     INVOKE_ERROR(CANT_INSERT);
688                 }
689                 STACK_PUSH;
690                 state->type = JSONSL_T_SPECIAL;
691                 state->special_flags = special_flags;
692                 STATE_SPECIAL_LENGTH = 1;
693 
694                 if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
695                     state->nelem = CUR_CHAR - 0x30;
696                     STATE_NUM_LAST = '1';
697                 } else {
698                     STATE_NUM_LAST = '-';
699                     state->nelem = 0;
700                 }
701                 DO_CALLBACK(SPECIAL, PUSH);
702             }
703             CONTINUE_NEXT_CHAR();
704         }
705     }
706 }
707 
708 JSONSL_API
jsonsl_strerror(jsonsl_error_t err)709 const char* jsonsl_strerror(jsonsl_error_t err)
710 {
711     if (err == JSONSL_ERROR_SUCCESS) {
712         return "SUCCESS";
713     }
714 #define X(t) \
715     if (err == JSONSL_ERROR_##t) \
716         return #t;
717     JSONSL_XERR;
718 #undef X
719     return "<UNKNOWN_ERROR>";
720 }
721 
722 JSONSL_API
jsonsl_strtype(jsonsl_type_t type)723 const char *jsonsl_strtype(jsonsl_type_t type)
724 {
725 #define X(o,c) \
726     if (type == JSONSL_T_##o) \
727         return #o;
728     JSONSL_XTYPE
729 #undef X
730     return "UNKNOWN TYPE";
731 
732 }
733 
734 /*
735  *
736  * JPR/JSONPointer functions
737  *
738  *
739  */
740 #ifndef JSONSL_NO_JPR
741 static
742 jsonsl_jpr_type_t
populate_component(char * in,struct jsonsl_jpr_component_st * component,char ** next,jsonsl_error_t * errp)743 populate_component(char *in,
744                    struct jsonsl_jpr_component_st *component,
745                    char **next,
746                    jsonsl_error_t *errp)
747 {
748     unsigned long pctval;
749     char *c = NULL, *outp = NULL, *end = NULL;
750     size_t input_len;
751     jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
752 
753     if (*next == NULL || *(*next) == '\0') {
754         return JSONSL_PATH_NONE;
755     }
756 
757     /* Replace the next / with a NULL */
758     *next = strstr(in, "/");
759     if (*next != NULL) {
760         *(*next) = '\0'; /* drop the forward slash */
761         input_len = *next - in;
762         end = *next;
763         *next += 1; /* next character after the '/' */
764     } else {
765         input_len = strlen(in);
766         end = in + input_len + 1;
767     }
768 
769     component->pstr = in;
770 
771     /* Check for special components of interest */
772     if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
773         /* Lone wildcard */
774         ret = JSONSL_PATH_WILDCARD;
775         goto GT_RET;
776     } else if (isdigit(*in)) {
777         /* ASCII Numeric */
778         char *endptr;
779         component->idx = strtoul(in, &endptr, 10);
780         if (endptr && *endptr == '\0') {
781             ret = JSONSL_PATH_NUMERIC;
782             goto GT_RET;
783         }
784     }
785 
786     /* Default, it's a string */
787     ret = JSONSL_PATH_STRING;
788     for (c = outp = in; c < end; c++, outp++) {
789         char origc;
790         if (*c != '%') {
791             goto GT_ASSIGN;
792         }
793         /*
794          * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
795          */
796 
797         /* Need %XX */
798         if (c+2 >= end) {
799             *errp = JSONSL_ERROR_PERCENT_BADHEX;
800             return JSONSL_PATH_INVALID;
801         }
802         if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
803             *errp = JSONSL_ERROR_PERCENT_BADHEX;
804             return JSONSL_PATH_INVALID;
805         }
806 
807         /* Temporarily null-terminate the characters */
808         origc = *(c+3);
809         *(c+3) = '\0';
810         pctval = strtoul(c+1, NULL, 16);
811         *(c+3) = origc;
812 
813         *outp = (char) pctval;
814         c += 2;
815         continue;
816 
817         GT_ASSIGN:
818         *outp = *c;
819     }
820     /* Null-terminate the string */
821     for (; outp < c; outp++) {
822         *outp = '\0';
823     }
824 
825     GT_RET:
826     component->ptype = ret;
827     if (ret != JSONSL_PATH_WILDCARD) {
828         component->len = strlen(component->pstr);
829     }
830     return ret;
831 }
832 
833 JSONSL_API
834 jsonsl_jpr_t
jsonsl_jpr_new(const char * path,jsonsl_error_t * errp)835 jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
836 {
837     char *my_copy = NULL;
838     int count, curidx;
839     struct jsonsl_jpr_st *ret = NULL;
840     struct jsonsl_jpr_component_st *components = NULL;
841     size_t origlen;
842     jsonsl_error_t errstacked;
843 
844 #define JPR_BAIL(err) *errp = err; goto GT_ERROR;
845 
846     if (errp == NULL) {
847         errp = &errstacked;
848     }
849 
850     if (path == NULL || *path != '/') {
851         JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
852         return NULL;
853     }
854 
855     count = 1;
856     path++;
857     {
858         const char *c = path;
859         for (; *c; c++) {
860             if (*c == '/') {
861                 count++;
862                 if (*(c+1) == '/') {
863                     JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
864                 }
865             }
866         }
867     }
868     if(*path) {
869         count++;
870     }
871 
872     components = (struct jsonsl_jpr_component_st *)
873             malloc(sizeof(*components) * count);
874     if (!components) {
875         JPR_BAIL(JSONSL_ERROR_ENOMEM);
876     }
877 
878     my_copy = (char *)malloc(strlen(path) + 1);
879     if (!my_copy) {
880         JPR_BAIL(JSONSL_ERROR_ENOMEM);
881     }
882 
883     strcpy(my_copy, path);
884 
885     components[0].ptype = JSONSL_PATH_ROOT;
886 
887     if (*my_copy) {
888         char *cur = my_copy;
889         int pathret = JSONSL_PATH_STRING;
890         curidx = 1;
891         while (pathret > 0 && curidx < count) {
892             pathret = populate_component(cur, components + curidx, &cur, errp);
893             if (pathret > 0) {
894                 curidx++;
895             } else {
896                 break;
897             }
898         }
899 
900         if (pathret == JSONSL_PATH_INVALID) {
901             JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
902         }
903     } else {
904         curidx = 1;
905     }
906 
907     path--; /*revert path to leading '/' */
908     origlen = strlen(path) + 1;
909     ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
910     if (!ret) {
911         JPR_BAIL(JSONSL_ERROR_ENOMEM);
912     }
913     ret->orig = (char *)malloc(origlen);
914     if (!ret->orig) {
915         JPR_BAIL(JSONSL_ERROR_ENOMEM);
916     }
917     ret->components = components;
918     ret->ncomponents = curidx;
919     ret->basestr = my_copy;
920     ret->norig = origlen-1;
921     strcpy(ret->orig, path);
922 
923     return ret;
924 
925     GT_ERROR:
926     free(my_copy);
927     free(components);
928     if (ret) {
929         free(ret->orig);
930     }
931     free(ret);
932     return NULL;
933 #undef JPR_BAIL
934 }
935 
jsonsl_jpr_destroy(jsonsl_jpr_t jpr)936 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
937 {
938     free(jpr->components);
939     free(jpr->basestr);
940     free(jpr->orig);
941     free(jpr);
942 }
943 
944 JSONSL_API
945 jsonsl_jpr_match_t
jsonsl_jpr_match(jsonsl_jpr_t jpr,unsigned int parent_type,unsigned int parent_level,const char * key,size_t nkey)946 jsonsl_jpr_match(jsonsl_jpr_t jpr,
947                    unsigned int parent_type,
948                    unsigned int parent_level,
949                    const char *key,
950                    size_t nkey)
951 {
952     /* find our current component. This is the child level */
953     int cmpret;
954     struct jsonsl_jpr_component_st *p_component;
955     p_component = jpr->components + parent_level;
956 
957     if (parent_level >= jpr->ncomponents) {
958         return JSONSL_MATCH_NOMATCH;
959     }
960 
961     /* Lone query for 'root' element. Always matches */
962     if (parent_level == 0) {
963         if (jpr->ncomponents == 1) {
964             return JSONSL_MATCH_COMPLETE;
965         } else {
966             return JSONSL_MATCH_POSSIBLE;
967         }
968     }
969 
970     /* Wildcard, always matches */
971     if (p_component->ptype == JSONSL_PATH_WILDCARD) {
972         if (parent_level == jpr->ncomponents-1) {
973             return JSONSL_MATCH_COMPLETE;
974         } else {
975             return JSONSL_MATCH_POSSIBLE;
976         }
977     }
978 
979     /* Check numeric array index. This gets its special block so we can avoid
980      * string comparisons */
981     if (p_component->ptype == JSONSL_PATH_NUMERIC) {
982         if (parent_type == JSONSL_T_LIST) {
983             if (p_component->idx != nkey) {
984                 /* Wrong index */
985                 return JSONSL_MATCH_NOMATCH;
986             } else {
987                 if (parent_level == jpr->ncomponents-1) {
988                     /* This is the last element of the path */
989                     return JSONSL_MATCH_COMPLETE;
990                 } else {
991                     /* Intermediate element */
992                     return JSONSL_MATCH_POSSIBLE;
993                 }
994             }
995         } else if (p_component->is_arridx) {
996             /* Numeric and an array index (set explicitly by user). But not
997              * a list for a parent */
998             return JSONSL_MATCH_TYPE_MISMATCH;
999         }
1000     } else if (parent_type == JSONSL_T_LIST) {
1001         return JSONSL_MATCH_TYPE_MISMATCH;
1002     }
1003 
1004     /* Check lengths */
1005     if (p_component->len != nkey) {
1006         return JSONSL_MATCH_NOMATCH;
1007     }
1008 
1009     /* Check string comparison */
1010     cmpret = strncmp(p_component->pstr, key, nkey);
1011     if (cmpret == 0) {
1012         if (parent_level == jpr->ncomponents-1) {
1013             return JSONSL_MATCH_COMPLETE;
1014         } else {
1015             return JSONSL_MATCH_POSSIBLE;
1016         }
1017     }
1018 
1019     return JSONSL_MATCH_NOMATCH;
1020 }
1021 
1022 JSONSL_API
jsonsl_jpr_match_state_init(jsonsl_t jsn,jsonsl_jpr_t * jprs,size_t njprs)1023 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
1024                                  jsonsl_jpr_t *jprs,
1025                                  size_t njprs)
1026 {
1027     size_t ii, *firstjmp;
1028     if (njprs == 0) {
1029         return;
1030     }
1031     jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs);
1032     jsn->jpr_count = njprs;
1033     jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max);
1034     memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs);
1035     /* Set the initial jump table values */
1036 
1037     firstjmp = jsn->jpr_root;
1038     for (ii = 0; ii < njprs; ii++) {
1039         firstjmp[ii] = ii+1;
1040     }
1041 }
1042 
1043 JSONSL_API
jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)1044 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)
1045 {
1046     if (jsn->jpr_count == 0) {
1047         return;
1048     }
1049 
1050     free(jsn->jpr_root);
1051     free(jsn->jprs);
1052     jsn->jprs = NULL;
1053     jsn->jpr_root = NULL;
1054     jsn->jpr_count = 0;
1055 }
1056 
1057 /**
1058  * This function should be called exactly once on each element...
1059  * This should also be called in recursive order, since we rely
1060  * on the parent having been initalized for a match.
1061  *
1062  * Since the parent is checked for a match as well, we maintain a 'serial' counter.
1063  * Whenever we traverse an element, we expect the serial to be the same as a global
1064  * integer. If they do not match, we re-initialize the context, and set the serial.
1065  *
1066  * This ensures a type of consistency without having a proactive reset by the
1067  * main lexer itself.
1068  *
1069  */
1070 JSONSL_API
jsonsl_jpr_match_state(jsonsl_t jsn,struct jsonsl_state_st * state,const char * key,size_t nkey,jsonsl_jpr_match_t * out)1071 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
1072                                     struct jsonsl_state_st *state,
1073                                     const char *key,
1074                                     size_t nkey,
1075                                     jsonsl_jpr_match_t *out)
1076 {
1077     struct jsonsl_state_st *parent_state;
1078     jsonsl_jpr_t ret = NULL;
1079 
1080     /* Jump and JPR tables for our own state and the parent state */
1081     size_t *jmptable, *pjmptable;
1082     size_t jmp_cur, ii, ourjmpidx;
1083 
1084     if (!jsn->jpr_root) {
1085         *out = JSONSL_MATCH_NOMATCH;
1086         return NULL;
1087     }
1088 
1089     pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1));
1090     jmptable = pjmptable + jsn->jpr_count;
1091 
1092     /* If the parent cannot match, then invalidate it */
1093     if (*pjmptable == 0) {
1094         *jmptable = 0;
1095         *out = JSONSL_MATCH_NOMATCH;
1096         return NULL;
1097     }
1098 
1099     parent_state = jsn->stack + state->level - 1;
1100 
1101     if (parent_state->type == JSONSL_T_LIST) {
1102         nkey = (size_t) parent_state->nelem;
1103     }
1104 
1105     *jmptable = 0;
1106     ourjmpidx = 0;
1107     memset(jmptable, 0, sizeof(int) * jsn->jpr_count);
1108 
1109     for (ii = 0; ii <  jsn->jpr_count; ii++) {
1110         jmp_cur = pjmptable[ii];
1111         if (jmp_cur) {
1112             jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1];
1113             *out = jsonsl_jpr_match(jpr,
1114                                     parent_state->type,
1115                                     parent_state->level,
1116                                     key, nkey);
1117             if (*out == JSONSL_MATCH_COMPLETE) {
1118                 ret = jpr;
1119                 *jmptable = 0;
1120                 return ret;
1121             } else if (*out == JSONSL_MATCH_POSSIBLE) {
1122                 jmptable[ourjmpidx] = ii+1;
1123                 ourjmpidx++;
1124             }
1125         } else {
1126             break;
1127         }
1128     }
1129     if (!*jmptable) {
1130         *out = JSONSL_MATCH_NOMATCH;
1131     }
1132     return NULL;
1133 }
1134 
1135 JSONSL_API
jsonsl_strmatchtype(jsonsl_jpr_match_t match)1136 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match)
1137 {
1138 #define X(T,v) \
1139     if ( match == JSONSL_MATCH_##T ) \
1140         return #T;
1141     JSONSL_XMATCH
1142 #undef X
1143     return "<UNKNOWN>";
1144 }
1145 
1146 #endif /* JSONSL_WITH_JPR */
1147 
1148 /**
1149  * Utility function to convert escape sequences
1150  */
1151 JSONSL_API
jsonsl_util_unescape_ex(const char * in,char * out,size_t len,const int toEscape[128],unsigned * oflags,jsonsl_error_t * err,const char ** errat)1152 size_t jsonsl_util_unescape_ex(const char *in,
1153                                char *out,
1154                                size_t len,
1155                                const int toEscape[128],
1156                                unsigned *oflags,
1157                                jsonsl_error_t *err,
1158                                const char **errat)
1159 {
1160     const unsigned char *c = (const unsigned char*)in;
1161     int in_escape = 0;
1162     size_t origlen = len;
1163     /* difference between the length of the input buffer and the output buffer */
1164     size_t ndiff = 0;
1165     if (oflags) {
1166         *oflags = 0;
1167     }
1168 #define UNESCAPE_BAIL(e,offset) \
1169     *err = JSONSL_ERROR_##e; \
1170     if (errat) { \
1171         *errat = (const char*)(c+ (ptrdiff_t)(offset)); \
1172     } \
1173     return 0;
1174 
1175     for (; len; len--, c++, out++) {
1176         unsigned int uesc_val[2];
1177         if (in_escape) {
1178             /* inside a previously ignored escape. Ignore */
1179             in_escape = 0;
1180             goto GT_ASSIGN;
1181         }
1182 
1183         if (*c != '\\') {
1184             /* Not an escape, so we don't care about this */
1185             goto GT_ASSIGN;
1186         }
1187 
1188         if (len < 2) {
1189             UNESCAPE_BAIL(ESCAPE_INVALID, 0);
1190         }
1191         if (!is_allowed_escape(c[1])) {
1192             UNESCAPE_BAIL(ESCAPE_INVALID, 1)
1193         }
1194         if ((toEscape[(unsigned char)c[1] & 0x7f] == 0 &&
1195                 c[1] != '\\' && c[1] != '"')) {
1196             /* if we don't want to unescape this string, just continue with
1197              * the escape flag set
1198              */
1199             in_escape = 1;
1200             goto GT_ASSIGN;
1201         }
1202 
1203         if (c[1] != 'u') {
1204             /* simple skip-and-replace using pre-defined maps.
1205              * TODO: should the maps actually reflect the desired
1206              * replacement character in toEscape?
1207              */
1208             char esctmp = get_escape_equiv(c[1]);
1209             if (esctmp) {
1210                 /* Check if there is a corresponding replacement */
1211                 *out = esctmp;
1212             } else {
1213                 /* Just gobble up the 'reverse-solidus' */
1214                 *out = c[1];
1215             }
1216             len--;
1217             ndiff++;
1218             c++;
1219             /* do not assign, just continue */
1220             continue;
1221         }
1222 
1223         /* next == 'u' */
1224         if (len < 6) {
1225             /* Need at least six characters:
1226              * { [0] = '\\', [1] = 'u', [2] = 'f', [3] = 'f', [4] = 'f', [5] = 'f' }
1227              */
1228             UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1);
1229         }
1230 
1231         if (sscanf((const char*)(c+2), "%02x%02x", uesc_val, uesc_val+1) != 2) {
1232             /* We treat the sequence as two octets */
1233             UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1);
1234         }
1235 
1236         /* By now, we gobble up all the six bytes (current implied + 5 next
1237          * characters), and have at least four missing bytes from the output
1238          * buffer.
1239          */
1240         len -= 5;
1241         c += 5;
1242 
1243         ndiff += 4;
1244         if (uesc_val[0] == 0) {
1245             /* only one byte is extracted from the two
1246              * possible octets. Increment the diff counter by one.
1247              */
1248             *out = uesc_val[1];
1249             if (oflags && *(unsigned char*)out > 0x7f) {
1250                 *oflags |= JSONSL_SPECIALf_NONASCII;
1251             }
1252             ndiff++;
1253         } else {
1254             *(out++) = uesc_val[0];
1255             *out = uesc_val[1];
1256             if (oflags && (uesc_val[0] > 0x7f || uesc_val[1] > 0x7f)) {
1257                 *oflags |= JSONSL_SPECIALf_NONASCII;
1258             }
1259         }
1260         continue;
1261 
1262         /* Only reached by previous branches */
1263         GT_ASSIGN:
1264         *out = *c;
1265     }
1266     *err = JSONSL_ERROR_SUCCESS;
1267     return origlen - ndiff;
1268 }
1269 
1270 /**
1271  * Character Table definitions.
1272  * These were all generated via srcutil/genchartables.pl
1273  */
1274 
1275 /**
1276  * This table contains the beginnings of non-string
1277  * allowable (bareword) values.
1278  */
1279 static unsigned short Special_Table[0x100] = {
1280         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1281         /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */
1282         /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */
1283         /* 0x2e */ 0,0, /* 0x2f */
1284         /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */
1285         /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */
1286         /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */
1287         /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */
1288         /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */
1289         /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */
1290         /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */
1291         /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */
1292         /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */
1293         /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */
1294         /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x59 */
1295         /* 0x5a */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */
1296         /* 0x66 */ JSONSL_SPECIALf_FALSE /* <f> */, /* 0x66 */
1297         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1298         /* 0x6e */ JSONSL_SPECIALf_NULL /* <n> */, /* 0x6e */
1299         /* 0x6f */ 0,0,0,0,0, /* 0x73 */
1300         /* 0x74 */ JSONSL_SPECIALf_TRUE /* <t> */, /* 0x74 */
1301         /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1302         /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1303         /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1304         /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1305         /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */
1306 };
1307 
1308 /**
1309  * Contains characters which signal the termination of any of the 'special' bareword
1310  * values.
1311  */
1312 static int Special_Endings[0x100] = {
1313         /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1314         /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1315         /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1316         /* 0x0b */ 0,0, /* 0x0c */
1317         /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1318         /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1319         /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1320         /* 0x21 */ 0, /* 0x21 */
1321         /* 0x22 */ 1 /* " */, /* 0x22 */
1322         /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */
1323         /* 0x2c */ 1 /* , */, /* 0x2c */
1324         /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */
1325         /* 0x3a */ 1 /* : */, /* 0x3a */
1326         /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */
1327         /* 0x5b */ 1 /* [ */, /* 0x5b */
1328         /* 0x5c */ 1 /* \ */, /* 0x5c */
1329         /* 0x5d */ 1 /* ] */, /* 0x5d */
1330         /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */
1331         /* 0x7b */ 1 /* { */, /* 0x7b */
1332         /* 0x7c */ 0, /* 0x7c */
1333         /* 0x7d */ 1 /* } */, /* 0x7d */
1334         /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */
1335         /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */
1336         /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */
1337         /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */
1338         /* 0xfe */ 0 /* 0xfe */
1339 };
1340 
1341 /**
1342  * This table contains entries for the allowed whitespace as per RFC 4627
1343  */
1344 static int Allowed_Whitespace[0x100] = {
1345         /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1346         /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1347         /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1348         /* 0x0b */ 0,0, /* 0x0c */
1349         /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1350         /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1351         /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1352         /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */
1353         /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */
1354         /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */
1355         /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */
1356         /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */
1357         /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */
1358         /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1359 };
1360 
1361 /**
1362  * Allowable two-character 'common' escapes:
1363  */
1364 static int Allowed_Escapes[0x100] = {
1365         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1366         /* 0x20 */ 0,0, /* 0x21 */
1367         /* 0x22 */ 1 /* <"> */, /* 0x22 */
1368         /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
1369         /* 0x2f */ 1 /* </> */, /* 0x2f */
1370         /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
1371         /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1372         /* 0x5c */ 1 /* <\> */, /* 0x5c */
1373         /* 0x5d */ 0,0,0,0,0, /* 0x61 */
1374         /* 0x62 */ 1 /* <b> */, /* 0x62 */
1375         /* 0x63 */ 0,0,0, /* 0x65 */
1376         /* 0x66 */ 1 /* <f> */, /* 0x66 */
1377         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1378         /* 0x6e */ 1 /* <n> */, /* 0x6e */
1379         /* 0x6f */ 0,0,0, /* 0x71 */
1380         /* 0x72 */ 1 /* <r> */, /* 0x72 */
1381         /* 0x73 */ 0, /* 0x73 */
1382         /* 0x74 */ 1 /* <t> */, /* 0x74 */
1383         /* 0x75 */ 1 /* <u> */, /* 0x75 */
1384         /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
1385         /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
1386         /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
1387         /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
1388         /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
1389 };
1390 
1391 /**
1392  * This table contains the _values_ for a given (single) escaped character.
1393  */
1394 static unsigned char Escape_Equivs[0x100] = {
1395         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1396         /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
1397         /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
1398         /* 0x60 */ 0,0, /* 0x61 */
1399         /* 0x62 */ 8 /* <b> */, /* 0x62 */
1400         /* 0x63 */ 0,0,0, /* 0x65 */
1401         /* 0x66 */ 12 /* <f> */, /* 0x66 */
1402         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1403         /* 0x6e */ 10 /* <n> */, /* 0x6e */
1404         /* 0x6f */ 0,0,0, /* 0x71 */
1405         /* 0x72 */ 13 /* <r> */, /* 0x72 */
1406         /* 0x73 */ 0, /* 0x73 */
1407         /* 0x74 */ 9 /* <t> */, /* 0x74 */
1408         /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1409         /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1410         /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1411         /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1412         /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1413 };
1414 
1415 /* Definitions of above-declared static functions */
get_escape_equiv(unsigned c)1416 static char get_escape_equiv(unsigned c) {
1417     return Escape_Equivs[c & 0xff];
1418 }
extract_special(unsigned c)1419 static unsigned extract_special(unsigned c) {
1420     return Special_Table[c & 0xff];
1421 }
is_special_end(unsigned c)1422 static int is_special_end(unsigned c) {
1423     return Special_Endings[c & 0xff];
1424 }
is_allowed_whitespace(unsigned c)1425 static int is_allowed_whitespace(unsigned c) {
1426     return c == ' ' || Allowed_Whitespace[c & 0xff];
1427 }
is_allowed_escape(unsigned c)1428 static int is_allowed_escape(unsigned c) {
1429     return Allowed_Escapes[c & 0xff];
1430 }
1431 
1432 /* Clean up all our macros! */
1433 #undef INCR_METRIC
1434 #undef INCR_GENERIC
1435 #undef INCR_STRINGY_CATCH
1436 #undef CASE_DIGITS
1437 #undef INVOKE_ERROR
1438 #undef STACK_PUSH
1439 #undef STACK_POP_NOPOS
1440 #undef STACK_POP
1441 #undef CALLBACK_AND_POP_NOPOS
1442 #undef CALLBACK_AND_POP
1443 #undef SPECIAL_POP
1444 #undef CUR_CHAR
1445 #undef DO_CALLBACK
1446 #undef ENSURE_HVAL
1447 #undef VERIFY_SPECIAL
1448 #undef STATE_SPECIAL_LENGTH
1449 #undef IS_NORMAL_NUMBER
1450 #undef STATE_NUM_LAST
1451 #undef FASTPARSE_EXHAUSTED
1452 #undef FASTPARSE_BREAK
1453