1 /* Copyright (C) 2012-2015 Mark Nunberg.
2 *
3 * See included LICENSE file for license details.
4 */
5
6 #include "jsonsl.h"
7 #include <assert.h>
8 #include <limits.h>
9 #include <ctype.h>
10
11 #ifdef JSONSL_USE_METRICS
12 #define XMETRICS \
13 X(STRINGY_INSIGNIFICANT) \
14 X(STRINGY_SLOWPATH) \
15 X(ALLOWED_WHITESPACE) \
16 X(QUOTE_FASTPATH) \
17 X(SPECIAL_FASTPATH) \
18 X(SPECIAL_WSPOP) \
19 X(SPECIAL_SLOWPATH) \
20 X(GENERIC) \
21 X(STRUCTURAL_TOKEN) \
22 X(SPECIAL_SWITCHFIRST) \
23 X(STRINGY_CATCH) \
24 X(NUMBER_FASTPATH) \
25 X(ESCAPES) \
26 X(TOTAL) \
27
28 struct jsonsl_metrics_st {
29 #define X(m) \
30 unsigned long metric_##m;
31 XMETRICS
32 #undef X
33 };
34
35 static struct jsonsl_metrics_st GlobalMetrics = { 0 };
36 static unsigned long GenericCounter[0x100] = { 0 };
37 static unsigned long StringyCatchCounter[0x100] = { 0 };
38
39 #define INCR_METRIC(m) \
40 GlobalMetrics.metric_##m++;
41
42 #define INCR_GENERIC(c) \
43 INCR_METRIC(GENERIC); \
44 GenericCounter[c]++; \
45
46 #define INCR_STRINGY_CATCH(c) \
47 INCR_METRIC(STRINGY_CATCH); \
48 StringyCatchCounter[c]++;
49
50 JSONSL_API
jsonsl_dump_global_metrics(void)51 void jsonsl_dump_global_metrics(void)
52 {
53 int ii;
54 printf("JSONSL Metrics:\n");
55 #define X(m) \
56 printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
57 (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
58 XMETRICS
59 #undef X
60 printf("Generic Characters:\n");
61 for (ii = 0; ii < 0xff; ii++) {
62 if (GenericCounter[ii]) {
63 printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
64 }
65 }
66 printf("Weird string loop\n");
67 for (ii = 0; ii < 0xff; ii++) {
68 if (StringyCatchCounter[ii]) {
69 printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
70 }
71 }
72 }
73
74 #else
75 #define INCR_METRIC(m)
76 #define INCR_GENERIC(c)
77 #define INCR_STRINGY_CATCH(c)
78 JSONSL_API
jsonsl_dump_global_metrics(void)79 void jsonsl_dump_global_metrics(void) { }
80 #endif /* JSONSL_USE_METRICS */
81
82 #define CASE_DIGITS \
83 case '1': \
84 case '2': \
85 case '3': \
86 case '4': \
87 case '5': \
88 case '6': \
89 case '7': \
90 case '8': \
91 case '9': \
92 case '0':
93
94 static unsigned extract_special(unsigned);
95 static int is_special_end(unsigned);
96 static int is_allowed_whitespace(unsigned);
97 static int is_allowed_escape(unsigned);
98 static char get_escape_equiv(unsigned);
99
100 JSONSL_API
jsonsl_new(int nlevels)101 jsonsl_t jsonsl_new(int nlevels)
102 {
103 struct jsonsl_st *jsn = (struct jsonsl_st *)
104 calloc(1, sizeof (*jsn) +
105 ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
106 );
107
108 jsn->levels_max = nlevels;
109 jsn->max_callback_level = -1;
110 jsonsl_reset(jsn);
111 return jsn;
112 }
113
114 JSONSL_API
jsonsl_reset(jsonsl_t jsn)115 void jsonsl_reset(jsonsl_t jsn)
116 {
117 unsigned int ii;
118 jsn->tok_last = 0;
119 jsn->can_insert = 1;
120 jsn->pos = 0;
121 jsn->level = 0;
122 jsn->stopfl = 0;
123 jsn->in_escape = 0;
124 jsn->expecting = 0;
125
126 memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st)));
127
128 for (ii = 0; ii < jsn->levels_max; ii++) {
129 jsn->stack[ii].level = ii;
130 }
131 }
132
133 JSONSL_API
jsonsl_destroy(jsonsl_t jsn)134 void jsonsl_destroy(jsonsl_t jsn)
135 {
136 if (jsn) {
137 free(jsn);
138 }
139 }
140
141
142 #define FASTPARSE_EXHAUSTED 1
143 #define FASTPARSE_BREAK 0
144 static const int chrt_string_nopass[0x100] = { JSONSL_CHARTABLE_string_nopass };
145
146 /*
147 * This function is meant to accelerate string parsing, reducing the main loop's
148 * check if we are indeed a string.
149 *
150 * @param jsn the parser
151 * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
152 * @param[in,out] nbytes_p A pointer to the current size of the buffer
153 * @return true if all bytes have been exhausted (and thus the main loop can
154 * return), false if a special character was examined which requires greater
155 * examination.
156 */
157 static int
jsonsl__str_fastparse(jsonsl_t jsn,const jsonsl_uchar_t ** bytes_p,size_t * nbytes_p)158 jsonsl__str_fastparse(jsonsl_t jsn,
159 const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
160 {
161 int exhausted = 1;
162 size_t nbytes = *nbytes_p;
163 const jsonsl_uchar_t *bytes = *bytes_p;
164
165 for (; nbytes; nbytes--, bytes++) {
166 if (
167 #ifdef JSONSL_USE_WCHAR
168 *bytes >= 0x100 ||
169 #endif /* JSONSL_USE_WCHAR */
170 (!chrt_string_nopass[*bytes])) {
171 INCR_METRIC(TOTAL);
172 INCR_METRIC(STRINGY_INSIGNIFICANT);
173 } else {
174 exhausted = 0;
175 break;
176 }
177 }
178
179 /* Once we're done here, re-calculate the position variables */
180 jsn->pos += (*nbytes_p - nbytes);
181 if (exhausted) {
182 return FASTPARSE_EXHAUSTED;
183 }
184
185 *nbytes_p = nbytes;
186 *bytes_p = bytes;
187 return FASTPARSE_BREAK;
188 }
189
190 /* Functions exactly like str_fastparse, except it also accepts a 'state'
191 * argument, since the number's value is updated in the state. */
192 static int
jsonsl__num_fastparse(jsonsl_t jsn,const jsonsl_uchar_t ** bytes_p,size_t * nbytes_p,struct jsonsl_state_st * state)193 jsonsl__num_fastparse(jsonsl_t jsn,
194 const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
195 struct jsonsl_state_st *state)
196 {
197 int exhausted = 1;
198 size_t nbytes = *nbytes_p;
199 const jsonsl_uchar_t *bytes = *bytes_p;
200
201 for (; nbytes; nbytes--, bytes++) {
202 jsonsl_uchar_t c = *bytes;
203 if (isdigit(c)) {
204 INCR_METRIC(TOTAL);
205 INCR_METRIC(NUMBER_FASTPATH);
206 state->nelem = (state->nelem * 10) + (c - 0x30);
207 } else {
208 exhausted = 0;
209 break;
210 }
211 }
212 jsn->pos += (*nbytes_p - nbytes);
213 if (exhausted) {
214 return FASTPARSE_EXHAUSTED;
215 }
216 *nbytes_p = nbytes;
217 *bytes_p = bytes;
218 return FASTPARSE_BREAK;
219 }
220
221 JSONSL_API
222 void
jsonsl_feed(jsonsl_t jsn,const jsonsl_char_t * bytes,size_t nbytes)223 jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
224 {
225
226 #define INVOKE_ERROR(eb) \
227 if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
228 goto GT_AGAIN; \
229 } \
230 return;
231
232 #define STACK_PUSH \
233 if (jsn->level >= (levels_max-1)) { \
234 jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
235 return; \
236 } \
237 state = jsn->stack + (++jsn->level); \
238 state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
239 state->pos_begin = jsn->pos;
240
241 #define STACK_POP_NOPOS \
242 state->pos_cur = jsn->pos; \
243 state = jsn->stack + (--jsn->level);
244
245
246 #define STACK_POP \
247 STACK_POP_NOPOS; \
248 state->pos_cur = jsn->pos;
249
250 #define CALLBACK_AND_POP_NOPOS(T) \
251 state->pos_cur = jsn->pos; \
252 DO_CALLBACK(T, POP); \
253 state->nescapes = 0; \
254 state = jsn->stack + (--jsn->level);
255
256 #define CALLBACK_AND_POP(T) \
257 CALLBACK_AND_POP_NOPOS(T); \
258 state->pos_cur = jsn->pos;
259
260 #define SPECIAL_POP \
261 CALLBACK_AND_POP(SPECIAL); \
262 jsn->expecting = 0; \
263 jsn->tok_last = 0; \
264
265 #define CUR_CHAR (*(jsonsl_uchar_t*)c)
266
267 #define DO_CALLBACK(T, action) \
268 if (jsn->call_##T && \
269 jsn->max_callback_level > state->level && \
270 state->ignore_callback == 0) { \
271 \
272 if (jsn->action_callback_##action) { \
273 jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
274 } else if (jsn->action_callback) { \
275 jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
276 } \
277 if (jsn->stopfl) { return; } \
278 }
279
280 /**
281 * Verifies that we are able to insert the (non-string) item into a hash.
282 */
283 #define ENSURE_HVAL \
284 if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
285 INVOKE_ERROR(HKEY_EXPECTED); \
286 }
287
288 #define VERIFY_SPECIAL(lit) \
289 if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
290 INVOKE_ERROR(SPECIAL_EXPECTED); \
291 }
292
293 #define STATE_SPECIAL_LENGTH \
294 (state)->nescapes
295
296 #define IS_NORMAL_NUMBER \
297 ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
298 (state)->special_flags == JSONSL_SPECIALf_SIGNED)
299
300 #define STATE_NUM_LAST jsn->tok_last
301
302 #define CONTINUE_NEXT_CHAR() continue
303
304 const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
305 size_t levels_max = jsn->levels_max;
306 struct jsonsl_state_st *state = jsn->stack + jsn->level;
307 jsn->base = bytes;
308
309 for (; nbytes; nbytes--, jsn->pos++, c++) {
310 unsigned state_type;
311 INCR_METRIC(TOTAL);
312
313 GT_AGAIN:
314 state_type = state->type;
315 /* Most common type is typically a string: */
316 if (state_type & JSONSL_Tf_STRINGY) {
317 /* Special escape handling for some stuff */
318 if (jsn->in_escape) {
319 jsn->in_escape = 0;
320 if (!is_allowed_escape(CUR_CHAR)) {
321 INVOKE_ERROR(ESCAPE_INVALID);
322 } else if (CUR_CHAR == 'u') {
323 DO_CALLBACK(UESCAPE, UESCAPE);
324 if (jsn->return_UESCAPE) {
325 return;
326 }
327 }
328 CONTINUE_NEXT_CHAR();
329 }
330
331 if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
332 FASTPARSE_EXHAUSTED) {
333 /* No need to readjust variables as we've exhausted the iterator */
334 return;
335 } else {
336 if (CUR_CHAR == '"') {
337 goto GT_QUOTE;
338 } else if (CUR_CHAR == '\\') {
339 goto GT_ESCAPE;
340 } else {
341 INVOKE_ERROR(WEIRD_WHITESPACE);
342 }
343 }
344 INCR_METRIC(STRINGY_SLOWPATH);
345
346 } else if (state_type == JSONSL_T_SPECIAL) {
347 /* Fast track for signed/unsigned */
348 if (IS_NORMAL_NUMBER) {
349 if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
350 FASTPARSE_EXHAUSTED) {
351 return;
352 } else {
353 goto GT_SPECIAL_NUMERIC;
354 }
355 } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
356 if (!isdigit(CUR_CHAR)) {
357 INVOKE_ERROR(INVALID_NUMBER);
358 }
359
360 if (CUR_CHAR == '0') {
361 state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
362 } else if (isdigit(CUR_CHAR)) {
363 state->special_flags = JSONSL_SPECIALf_SIGNED;
364 state->nelem = CUR_CHAR - 0x30;
365 } else {
366 INVOKE_ERROR(INVALID_NUMBER);
367 }
368 CONTINUE_NEXT_CHAR();
369
370 } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
371 if (isdigit(CUR_CHAR)) {
372 /* Following a zero! */
373 INVOKE_ERROR(INVALID_NUMBER);
374 }
375 /* Unset the 'zero' flag: */
376 if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
377 state->special_flags = JSONSL_SPECIALf_SIGNED;
378 } else {
379 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
380 }
381 goto GT_SPECIAL_NUMERIC;
382 }
383
384 if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
385 GT_SPECIAL_NUMERIC:
386 switch (CUR_CHAR) {
387 CASE_DIGITS
388 STATE_NUM_LAST = '1';
389 CONTINUE_NEXT_CHAR();
390
391 case '.':
392 if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
393 INVOKE_ERROR(INVALID_NUMBER);
394 }
395 state->special_flags |= JSONSL_SPECIALf_FLOAT;
396 STATE_NUM_LAST = '.';
397 CONTINUE_NEXT_CHAR();
398
399 case 'e':
400 case 'E':
401 if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
402 INVOKE_ERROR(INVALID_NUMBER);
403 }
404 state->special_flags |= JSONSL_SPECIALf_EXPONENT;
405 STATE_NUM_LAST = 'e';
406 CONTINUE_NEXT_CHAR();
407
408 case '-':
409 case '+':
410 if (STATE_NUM_LAST != 'e') {
411 INVOKE_ERROR(INVALID_NUMBER);
412 }
413 STATE_NUM_LAST = '-';
414 CONTINUE_NEXT_CHAR();
415
416 default:
417 if (is_special_end(CUR_CHAR)) {
418 goto GT_SPECIAL_POP;
419 }
420 INVOKE_ERROR(INVALID_NUMBER);
421 break;
422 }
423 }
424 /* else if (!NUMERIC) */
425 if (!is_special_end(CUR_CHAR)) {
426 STATE_SPECIAL_LENGTH++;
427
428 /* Verify TRUE, FALSE, NULL */
429 if (state->special_flags == JSONSL_SPECIALf_TRUE) {
430 VERIFY_SPECIAL("true");
431 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
432 VERIFY_SPECIAL("false");
433 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
434 VERIFY_SPECIAL("null");
435 }
436 INCR_METRIC(SPECIAL_FASTPATH);
437 CONTINUE_NEXT_CHAR();
438 }
439
440 GT_SPECIAL_POP:
441 if (IS_NORMAL_NUMBER) {
442 /* Nothing */
443 } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
444 state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
445 /* 0 is unsigned! */
446 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
447 } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
448 /* Still in dash! */
449 INVOKE_ERROR(INVALID_NUMBER);
450 } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
451 /* Check that we're not at the end of a token */
452 if (STATE_NUM_LAST != '1') {
453 INVOKE_ERROR(INVALID_NUMBER);
454 }
455 } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
456 if (STATE_SPECIAL_LENGTH != 4) {
457 INVOKE_ERROR(SPECIAL_INCOMPLETE);
458 }
459 state->nelem = 1;
460 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
461 if (STATE_SPECIAL_LENGTH != 5) {
462 INVOKE_ERROR(SPECIAL_INCOMPLETE);
463 }
464 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
465 if (STATE_SPECIAL_LENGTH != 4) {
466 INVOKE_ERROR(SPECIAL_INCOMPLETE);
467 }
468 }
469 SPECIAL_POP;
470 jsn->expecting = ',';
471 if (is_allowed_whitespace(CUR_CHAR)) {
472 CONTINUE_NEXT_CHAR();
473 }
474 /**
475 * This works because we have a non-whitespace token
476 * which is not a special token. If this is a structural
477 * character then it will be gracefully handled by the
478 * switch statement. Otherwise it will default to the 'special'
479 * state again,
480 */
481 goto GT_STRUCTURAL_TOKEN;
482 } else if (is_allowed_whitespace(CUR_CHAR)) {
483 INCR_METRIC(ALLOWED_WHITESPACE);
484 /* So we're not special. Harmless insignificant whitespace
485 * passthrough
486 */
487 CONTINUE_NEXT_CHAR();
488 } else if (extract_special(CUR_CHAR)) {
489 /* not a string, whitespace, or structural token. must be special */
490 goto GT_SPECIAL_BEGIN;
491 }
492
493 INCR_GENERIC(CUR_CHAR);
494
495 if (CUR_CHAR == '"') {
496 GT_QUOTE:
497 jsn->can_insert = 0;
498 switch (state_type) {
499
500 /* the end of a string or hash key */
501 case JSONSL_T_STRING:
502 CALLBACK_AND_POP(STRING);
503 CONTINUE_NEXT_CHAR();
504 case JSONSL_T_HKEY:
505 CALLBACK_AND_POP(HKEY);
506 CONTINUE_NEXT_CHAR();
507
508 case JSONSL_T_OBJECT:
509 state->nelem++;
510 if ( (state->nelem-1) % 2 ) {
511 /* Odd, this must be a hash value */
512 if (jsn->tok_last != ':') {
513 INVOKE_ERROR(MISSING_TOKEN);
514 }
515 jsn->expecting = ','; /* Can't figure out what to expect next */
516 jsn->tok_last = 0;
517
518 STACK_PUSH;
519 state->type = JSONSL_T_STRING;
520 DO_CALLBACK(STRING, PUSH);
521
522 } else {
523 /* hash key */
524 if (jsn->expecting != '"') {
525 INVOKE_ERROR(STRAY_TOKEN);
526 }
527 jsn->tok_last = 0;
528 jsn->expecting = ':';
529
530 STACK_PUSH;
531 state->type = JSONSL_T_HKEY;
532 DO_CALLBACK(HKEY, PUSH);
533 }
534 CONTINUE_NEXT_CHAR();
535
536 case JSONSL_T_LIST:
537 state->nelem++;
538 STACK_PUSH;
539 state->type = JSONSL_T_STRING;
540 jsn->expecting = ',';
541 jsn->tok_last = 0;
542 DO_CALLBACK(STRING, PUSH);
543 CONTINUE_NEXT_CHAR();
544
545 case JSONSL_T_SPECIAL:
546 INVOKE_ERROR(STRAY_TOKEN);
547 break;
548
549 default:
550 INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
551 break;
552 } /* switch(state->type) */
553 } else if (CUR_CHAR == '\\') {
554 GT_ESCAPE:
555 INCR_METRIC(ESCAPES);
556 /* Escape */
557 if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
558 INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
559 }
560 state->nescapes++;
561 jsn->in_escape = 1;
562 CONTINUE_NEXT_CHAR();
563 } /* " or \ */
564
565 GT_STRUCTURAL_TOKEN:
566 switch (CUR_CHAR) {
567 case ':':
568 INCR_METRIC(STRUCTURAL_TOKEN);
569 if (jsn->expecting != CUR_CHAR) {
570 INVOKE_ERROR(STRAY_TOKEN);
571 }
572 jsn->tok_last = ':';
573 jsn->can_insert = 1;
574 jsn->expecting = '"';
575 CONTINUE_NEXT_CHAR();
576
577 case ',':
578 INCR_METRIC(STRUCTURAL_TOKEN);
579 /**
580 * The comma is one of the more generic tokens.
581 * In the context of an OBJECT, the can_insert flag
582 * should never be set, and no other action is
583 * necessary.
584 */
585 if (jsn->expecting != CUR_CHAR) {
586 /* make this branch execute only when we haven't manually
587 * just placed the ',' in the expecting register.
588 */
589 INVOKE_ERROR(STRAY_TOKEN);
590 }
591
592 if (state->type == JSONSL_T_OBJECT) {
593 /* end of hash value, expect a string as a hash key */
594 jsn->expecting = '"';
595 } else {
596 jsn->can_insert = 1;
597 }
598
599 jsn->tok_last = ',';
600 jsn->expecting = '"';
601 CONTINUE_NEXT_CHAR();
602
603 /* new list or object */
604 /* hashes are more common */
605 case '{':
606 case '[':
607 INCR_METRIC(STRUCTURAL_TOKEN);
608 if (!jsn->can_insert) {
609 INVOKE_ERROR(CANT_INSERT);
610 }
611
612 ENSURE_HVAL;
613 state->nelem++;
614
615 STACK_PUSH;
616 /* because the constants match the opening delimiters, we can do this: */
617 state->type = CUR_CHAR;
618 state->nelem = 0;
619 jsn->can_insert = 1;
620 if (CUR_CHAR == '{') {
621 /* If we're a hash, we expect a key first, which is quouted */
622 jsn->expecting = '"';
623 }
624 if (CUR_CHAR == JSONSL_T_OBJECT) {
625 DO_CALLBACK(OBJECT, PUSH);
626 } else {
627 DO_CALLBACK(LIST, PUSH);
628 }
629 jsn->tok_last = 0;
630 CONTINUE_NEXT_CHAR();
631
632 /* closing of list or object */
633 case '}':
634 case ']':
635 INCR_METRIC(STRUCTURAL_TOKEN);
636 if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
637 INVOKE_ERROR(TRAILING_COMMA);
638 }
639
640 jsn->can_insert = 0;
641 jsn->level--;
642 jsn->expecting = ',';
643 jsn->tok_last = 0;
644 if (CUR_CHAR == ']') {
645 if (state->type != '[') {
646 INVOKE_ERROR(BRACKET_MISMATCH);
647 }
648 DO_CALLBACK(LIST, POP);
649 } else {
650 if (state->type != '{') {
651 INVOKE_ERROR(BRACKET_MISMATCH);
652 } else if (state->nelem && state->nelem % 2 != 0) {
653 INVOKE_ERROR(VALUE_EXPECTED);
654 }
655 DO_CALLBACK(OBJECT, POP);
656 }
657 state = jsn->stack + jsn->level;
658 state->pos_cur = jsn->pos;
659 CONTINUE_NEXT_CHAR();
660
661 default:
662 GT_SPECIAL_BEGIN:
663 /**
664 * Not a string, not a structural token, and not benign whitespace.
665 * Technically we should iterate over the character always, but since
666 * we are not doing full numerical/value decoding anyway (but only hinting),
667 * we only check upon entry.
668 */
669 if (state->type != JSONSL_T_SPECIAL) {
670 int special_flags = extract_special(CUR_CHAR);
671 if (!special_flags) {
672 /**
673 * Try to do some heuristics here anyway to figure out what kind of
674 * error this is. The 'special' case is a fallback scenario anyway.
675 */
676 if (CUR_CHAR == '\0') {
677 INVOKE_ERROR(FOUND_NULL_BYTE);
678 } else if (CUR_CHAR < 0x20) {
679 INVOKE_ERROR(WEIRD_WHITESPACE);
680 } else {
681 INVOKE_ERROR(SPECIAL_EXPECTED);
682 }
683 }
684 ENSURE_HVAL;
685 state->nelem++;
686 if (!jsn->can_insert) {
687 INVOKE_ERROR(CANT_INSERT);
688 }
689 STACK_PUSH;
690 state->type = JSONSL_T_SPECIAL;
691 state->special_flags = special_flags;
692 STATE_SPECIAL_LENGTH = 1;
693
694 if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
695 state->nelem = CUR_CHAR - 0x30;
696 STATE_NUM_LAST = '1';
697 } else {
698 STATE_NUM_LAST = '-';
699 state->nelem = 0;
700 }
701 DO_CALLBACK(SPECIAL, PUSH);
702 }
703 CONTINUE_NEXT_CHAR();
704 }
705 }
706 }
707
708 JSONSL_API
jsonsl_strerror(jsonsl_error_t err)709 const char* jsonsl_strerror(jsonsl_error_t err)
710 {
711 if (err == JSONSL_ERROR_SUCCESS) {
712 return "SUCCESS";
713 }
714 #define X(t) \
715 if (err == JSONSL_ERROR_##t) \
716 return #t;
717 JSONSL_XERR;
718 #undef X
719 return "<UNKNOWN_ERROR>";
720 }
721
722 JSONSL_API
jsonsl_strtype(jsonsl_type_t type)723 const char *jsonsl_strtype(jsonsl_type_t type)
724 {
725 #define X(o,c) \
726 if (type == JSONSL_T_##o) \
727 return #o;
728 JSONSL_XTYPE
729 #undef X
730 return "UNKNOWN TYPE";
731
732 }
733
734 /*
735 *
736 * JPR/JSONPointer functions
737 *
738 *
739 */
740 #ifndef JSONSL_NO_JPR
741 static
742 jsonsl_jpr_type_t
populate_component(char * in,struct jsonsl_jpr_component_st * component,char ** next,jsonsl_error_t * errp)743 populate_component(char *in,
744 struct jsonsl_jpr_component_st *component,
745 char **next,
746 jsonsl_error_t *errp)
747 {
748 unsigned long pctval;
749 char *c = NULL, *outp = NULL, *end = NULL;
750 size_t input_len;
751 jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
752
753 if (*next == NULL || *(*next) == '\0') {
754 return JSONSL_PATH_NONE;
755 }
756
757 /* Replace the next / with a NULL */
758 *next = strstr(in, "/");
759 if (*next != NULL) {
760 *(*next) = '\0'; /* drop the forward slash */
761 input_len = *next - in;
762 end = *next;
763 *next += 1; /* next character after the '/' */
764 } else {
765 input_len = strlen(in);
766 end = in + input_len + 1;
767 }
768
769 component->pstr = in;
770
771 /* Check for special components of interest */
772 if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
773 /* Lone wildcard */
774 ret = JSONSL_PATH_WILDCARD;
775 goto GT_RET;
776 } else if (isdigit(*in)) {
777 /* ASCII Numeric */
778 char *endptr;
779 component->idx = strtoul(in, &endptr, 10);
780 if (endptr && *endptr == '\0') {
781 ret = JSONSL_PATH_NUMERIC;
782 goto GT_RET;
783 }
784 }
785
786 /* Default, it's a string */
787 ret = JSONSL_PATH_STRING;
788 for (c = outp = in; c < end; c++, outp++) {
789 char origc;
790 if (*c != '%') {
791 goto GT_ASSIGN;
792 }
793 /*
794 * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
795 */
796
797 /* Need %XX */
798 if (c+2 >= end) {
799 *errp = JSONSL_ERROR_PERCENT_BADHEX;
800 return JSONSL_PATH_INVALID;
801 }
802 if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
803 *errp = JSONSL_ERROR_PERCENT_BADHEX;
804 return JSONSL_PATH_INVALID;
805 }
806
807 /* Temporarily null-terminate the characters */
808 origc = *(c+3);
809 *(c+3) = '\0';
810 pctval = strtoul(c+1, NULL, 16);
811 *(c+3) = origc;
812
813 *outp = (char) pctval;
814 c += 2;
815 continue;
816
817 GT_ASSIGN:
818 *outp = *c;
819 }
820 /* Null-terminate the string */
821 for (; outp < c; outp++) {
822 *outp = '\0';
823 }
824
825 GT_RET:
826 component->ptype = ret;
827 if (ret != JSONSL_PATH_WILDCARD) {
828 component->len = strlen(component->pstr);
829 }
830 return ret;
831 }
832
833 JSONSL_API
834 jsonsl_jpr_t
jsonsl_jpr_new(const char * path,jsonsl_error_t * errp)835 jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
836 {
837 char *my_copy = NULL;
838 int count, curidx;
839 struct jsonsl_jpr_st *ret = NULL;
840 struct jsonsl_jpr_component_st *components = NULL;
841 size_t origlen;
842 jsonsl_error_t errstacked;
843
844 #define JPR_BAIL(err) *errp = err; goto GT_ERROR;
845
846 if (errp == NULL) {
847 errp = &errstacked;
848 }
849
850 if (path == NULL || *path != '/') {
851 JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
852 return NULL;
853 }
854
855 count = 1;
856 path++;
857 {
858 const char *c = path;
859 for (; *c; c++) {
860 if (*c == '/') {
861 count++;
862 if (*(c+1) == '/') {
863 JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
864 }
865 }
866 }
867 }
868 if(*path) {
869 count++;
870 }
871
872 components = (struct jsonsl_jpr_component_st *)
873 malloc(sizeof(*components) * count);
874 if (!components) {
875 JPR_BAIL(JSONSL_ERROR_ENOMEM);
876 }
877
878 my_copy = (char *)malloc(strlen(path) + 1);
879 if (!my_copy) {
880 JPR_BAIL(JSONSL_ERROR_ENOMEM);
881 }
882
883 strcpy(my_copy, path);
884
885 components[0].ptype = JSONSL_PATH_ROOT;
886
887 if (*my_copy) {
888 char *cur = my_copy;
889 int pathret = JSONSL_PATH_STRING;
890 curidx = 1;
891 while (pathret > 0 && curidx < count) {
892 pathret = populate_component(cur, components + curidx, &cur, errp);
893 if (pathret > 0) {
894 curidx++;
895 } else {
896 break;
897 }
898 }
899
900 if (pathret == JSONSL_PATH_INVALID) {
901 JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
902 }
903 } else {
904 curidx = 1;
905 }
906
907 path--; /*revert path to leading '/' */
908 origlen = strlen(path) + 1;
909 ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
910 if (!ret) {
911 JPR_BAIL(JSONSL_ERROR_ENOMEM);
912 }
913 ret->orig = (char *)malloc(origlen);
914 if (!ret->orig) {
915 JPR_BAIL(JSONSL_ERROR_ENOMEM);
916 }
917 ret->components = components;
918 ret->ncomponents = curidx;
919 ret->basestr = my_copy;
920 ret->norig = origlen-1;
921 strcpy(ret->orig, path);
922
923 return ret;
924
925 GT_ERROR:
926 free(my_copy);
927 free(components);
928 if (ret) {
929 free(ret->orig);
930 }
931 free(ret);
932 return NULL;
933 #undef JPR_BAIL
934 }
935
jsonsl_jpr_destroy(jsonsl_jpr_t jpr)936 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
937 {
938 free(jpr->components);
939 free(jpr->basestr);
940 free(jpr->orig);
941 free(jpr);
942 }
943
944 JSONSL_API
945 jsonsl_jpr_match_t
jsonsl_jpr_match(jsonsl_jpr_t jpr,unsigned int parent_type,unsigned int parent_level,const char * key,size_t nkey)946 jsonsl_jpr_match(jsonsl_jpr_t jpr,
947 unsigned int parent_type,
948 unsigned int parent_level,
949 const char *key,
950 size_t nkey)
951 {
952 /* find our current component. This is the child level */
953 int cmpret;
954 struct jsonsl_jpr_component_st *p_component;
955 p_component = jpr->components + parent_level;
956
957 if (parent_level >= jpr->ncomponents) {
958 return JSONSL_MATCH_NOMATCH;
959 }
960
961 /* Lone query for 'root' element. Always matches */
962 if (parent_level == 0) {
963 if (jpr->ncomponents == 1) {
964 return JSONSL_MATCH_COMPLETE;
965 } else {
966 return JSONSL_MATCH_POSSIBLE;
967 }
968 }
969
970 /* Wildcard, always matches */
971 if (p_component->ptype == JSONSL_PATH_WILDCARD) {
972 if (parent_level == jpr->ncomponents-1) {
973 return JSONSL_MATCH_COMPLETE;
974 } else {
975 return JSONSL_MATCH_POSSIBLE;
976 }
977 }
978
979 /* Check numeric array index. This gets its special block so we can avoid
980 * string comparisons */
981 if (p_component->ptype == JSONSL_PATH_NUMERIC) {
982 if (parent_type == JSONSL_T_LIST) {
983 if (p_component->idx != nkey) {
984 /* Wrong index */
985 return JSONSL_MATCH_NOMATCH;
986 } else {
987 if (parent_level == jpr->ncomponents-1) {
988 /* This is the last element of the path */
989 return JSONSL_MATCH_COMPLETE;
990 } else {
991 /* Intermediate element */
992 return JSONSL_MATCH_POSSIBLE;
993 }
994 }
995 } else if (p_component->is_arridx) {
996 /* Numeric and an array index (set explicitly by user). But not
997 * a list for a parent */
998 return JSONSL_MATCH_TYPE_MISMATCH;
999 }
1000 } else if (parent_type == JSONSL_T_LIST) {
1001 return JSONSL_MATCH_TYPE_MISMATCH;
1002 }
1003
1004 /* Check lengths */
1005 if (p_component->len != nkey) {
1006 return JSONSL_MATCH_NOMATCH;
1007 }
1008
1009 /* Check string comparison */
1010 cmpret = strncmp(p_component->pstr, key, nkey);
1011 if (cmpret == 0) {
1012 if (parent_level == jpr->ncomponents-1) {
1013 return JSONSL_MATCH_COMPLETE;
1014 } else {
1015 return JSONSL_MATCH_POSSIBLE;
1016 }
1017 }
1018
1019 return JSONSL_MATCH_NOMATCH;
1020 }
1021
1022 JSONSL_API
jsonsl_jpr_match_state_init(jsonsl_t jsn,jsonsl_jpr_t * jprs,size_t njprs)1023 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
1024 jsonsl_jpr_t *jprs,
1025 size_t njprs)
1026 {
1027 size_t ii, *firstjmp;
1028 if (njprs == 0) {
1029 return;
1030 }
1031 jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs);
1032 jsn->jpr_count = njprs;
1033 jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max);
1034 memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs);
1035 /* Set the initial jump table values */
1036
1037 firstjmp = jsn->jpr_root;
1038 for (ii = 0; ii < njprs; ii++) {
1039 firstjmp[ii] = ii+1;
1040 }
1041 }
1042
1043 JSONSL_API
jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)1044 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)
1045 {
1046 if (jsn->jpr_count == 0) {
1047 return;
1048 }
1049
1050 free(jsn->jpr_root);
1051 free(jsn->jprs);
1052 jsn->jprs = NULL;
1053 jsn->jpr_root = NULL;
1054 jsn->jpr_count = 0;
1055 }
1056
1057 /**
1058 * This function should be called exactly once on each element...
1059 * This should also be called in recursive order, since we rely
1060 * on the parent having been initalized for a match.
1061 *
1062 * Since the parent is checked for a match as well, we maintain a 'serial' counter.
1063 * Whenever we traverse an element, we expect the serial to be the same as a global
1064 * integer. If they do not match, we re-initialize the context, and set the serial.
1065 *
1066 * This ensures a type of consistency without having a proactive reset by the
1067 * main lexer itself.
1068 *
1069 */
1070 JSONSL_API
jsonsl_jpr_match_state(jsonsl_t jsn,struct jsonsl_state_st * state,const char * key,size_t nkey,jsonsl_jpr_match_t * out)1071 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
1072 struct jsonsl_state_st *state,
1073 const char *key,
1074 size_t nkey,
1075 jsonsl_jpr_match_t *out)
1076 {
1077 struct jsonsl_state_st *parent_state;
1078 jsonsl_jpr_t ret = NULL;
1079
1080 /* Jump and JPR tables for our own state and the parent state */
1081 size_t *jmptable, *pjmptable;
1082 size_t jmp_cur, ii, ourjmpidx;
1083
1084 if (!jsn->jpr_root) {
1085 *out = JSONSL_MATCH_NOMATCH;
1086 return NULL;
1087 }
1088
1089 pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1));
1090 jmptable = pjmptable + jsn->jpr_count;
1091
1092 /* If the parent cannot match, then invalidate it */
1093 if (*pjmptable == 0) {
1094 *jmptable = 0;
1095 *out = JSONSL_MATCH_NOMATCH;
1096 return NULL;
1097 }
1098
1099 parent_state = jsn->stack + state->level - 1;
1100
1101 if (parent_state->type == JSONSL_T_LIST) {
1102 nkey = (size_t) parent_state->nelem;
1103 }
1104
1105 *jmptable = 0;
1106 ourjmpidx = 0;
1107 memset(jmptable, 0, sizeof(int) * jsn->jpr_count);
1108
1109 for (ii = 0; ii < jsn->jpr_count; ii++) {
1110 jmp_cur = pjmptable[ii];
1111 if (jmp_cur) {
1112 jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1];
1113 *out = jsonsl_jpr_match(jpr,
1114 parent_state->type,
1115 parent_state->level,
1116 key, nkey);
1117 if (*out == JSONSL_MATCH_COMPLETE) {
1118 ret = jpr;
1119 *jmptable = 0;
1120 return ret;
1121 } else if (*out == JSONSL_MATCH_POSSIBLE) {
1122 jmptable[ourjmpidx] = ii+1;
1123 ourjmpidx++;
1124 }
1125 } else {
1126 break;
1127 }
1128 }
1129 if (!*jmptable) {
1130 *out = JSONSL_MATCH_NOMATCH;
1131 }
1132 return NULL;
1133 }
1134
1135 JSONSL_API
jsonsl_strmatchtype(jsonsl_jpr_match_t match)1136 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match)
1137 {
1138 #define X(T,v) \
1139 if ( match == JSONSL_MATCH_##T ) \
1140 return #T;
1141 JSONSL_XMATCH
1142 #undef X
1143 return "<UNKNOWN>";
1144 }
1145
1146 #endif /* JSONSL_WITH_JPR */
1147
1148 /**
1149 * Utility function to convert escape sequences
1150 */
1151 JSONSL_API
jsonsl_util_unescape_ex(const char * in,char * out,size_t len,const int toEscape[128],unsigned * oflags,jsonsl_error_t * err,const char ** errat)1152 size_t jsonsl_util_unescape_ex(const char *in,
1153 char *out,
1154 size_t len,
1155 const int toEscape[128],
1156 unsigned *oflags,
1157 jsonsl_error_t *err,
1158 const char **errat)
1159 {
1160 const unsigned char *c = (const unsigned char*)in;
1161 int in_escape = 0;
1162 size_t origlen = len;
1163 /* difference between the length of the input buffer and the output buffer */
1164 size_t ndiff = 0;
1165 if (oflags) {
1166 *oflags = 0;
1167 }
1168 #define UNESCAPE_BAIL(e,offset) \
1169 *err = JSONSL_ERROR_##e; \
1170 if (errat) { \
1171 *errat = (const char*)(c+ (ptrdiff_t)(offset)); \
1172 } \
1173 return 0;
1174
1175 for (; len; len--, c++, out++) {
1176 unsigned int uesc_val[2];
1177 if (in_escape) {
1178 /* inside a previously ignored escape. Ignore */
1179 in_escape = 0;
1180 goto GT_ASSIGN;
1181 }
1182
1183 if (*c != '\\') {
1184 /* Not an escape, so we don't care about this */
1185 goto GT_ASSIGN;
1186 }
1187
1188 if (len < 2) {
1189 UNESCAPE_BAIL(ESCAPE_INVALID, 0);
1190 }
1191 if (!is_allowed_escape(c[1])) {
1192 UNESCAPE_BAIL(ESCAPE_INVALID, 1)
1193 }
1194 if ((toEscape[(unsigned char)c[1] & 0x7f] == 0 &&
1195 c[1] != '\\' && c[1] != '"')) {
1196 /* if we don't want to unescape this string, just continue with
1197 * the escape flag set
1198 */
1199 in_escape = 1;
1200 goto GT_ASSIGN;
1201 }
1202
1203 if (c[1] != 'u') {
1204 /* simple skip-and-replace using pre-defined maps.
1205 * TODO: should the maps actually reflect the desired
1206 * replacement character in toEscape?
1207 */
1208 char esctmp = get_escape_equiv(c[1]);
1209 if (esctmp) {
1210 /* Check if there is a corresponding replacement */
1211 *out = esctmp;
1212 } else {
1213 /* Just gobble up the 'reverse-solidus' */
1214 *out = c[1];
1215 }
1216 len--;
1217 ndiff++;
1218 c++;
1219 /* do not assign, just continue */
1220 continue;
1221 }
1222
1223 /* next == 'u' */
1224 if (len < 6) {
1225 /* Need at least six characters:
1226 * { [0] = '\\', [1] = 'u', [2] = 'f', [3] = 'f', [4] = 'f', [5] = 'f' }
1227 */
1228 UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1);
1229 }
1230
1231 if (sscanf((const char*)(c+2), "%02x%02x", uesc_val, uesc_val+1) != 2) {
1232 /* We treat the sequence as two octets */
1233 UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1);
1234 }
1235
1236 /* By now, we gobble up all the six bytes (current implied + 5 next
1237 * characters), and have at least four missing bytes from the output
1238 * buffer.
1239 */
1240 len -= 5;
1241 c += 5;
1242
1243 ndiff += 4;
1244 if (uesc_val[0] == 0) {
1245 /* only one byte is extracted from the two
1246 * possible octets. Increment the diff counter by one.
1247 */
1248 *out = uesc_val[1];
1249 if (oflags && *(unsigned char*)out > 0x7f) {
1250 *oflags |= JSONSL_SPECIALf_NONASCII;
1251 }
1252 ndiff++;
1253 } else {
1254 *(out++) = uesc_val[0];
1255 *out = uesc_val[1];
1256 if (oflags && (uesc_val[0] > 0x7f || uesc_val[1] > 0x7f)) {
1257 *oflags |= JSONSL_SPECIALf_NONASCII;
1258 }
1259 }
1260 continue;
1261
1262 /* Only reached by previous branches */
1263 GT_ASSIGN:
1264 *out = *c;
1265 }
1266 *err = JSONSL_ERROR_SUCCESS;
1267 return origlen - ndiff;
1268 }
1269
1270 /**
1271 * Character Table definitions.
1272 * These were all generated via srcutil/genchartables.pl
1273 */
1274
1275 /**
1276 * This table contains the beginnings of non-string
1277 * allowable (bareword) values.
1278 */
1279 static unsigned short Special_Table[0x100] = {
1280 /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1281 /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */
1282 /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */
1283 /* 0x2e */ 0,0, /* 0x2f */
1284 /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */
1285 /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */
1286 /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */
1287 /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */
1288 /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */
1289 /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */
1290 /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */
1291 /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */
1292 /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */
1293 /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */
1294 /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x59 */
1295 /* 0x5a */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */
1296 /* 0x66 */ JSONSL_SPECIALf_FALSE /* <f> */, /* 0x66 */
1297 /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1298 /* 0x6e */ JSONSL_SPECIALf_NULL /* <n> */, /* 0x6e */
1299 /* 0x6f */ 0,0,0,0,0, /* 0x73 */
1300 /* 0x74 */ JSONSL_SPECIALf_TRUE /* <t> */, /* 0x74 */
1301 /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1302 /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1303 /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1304 /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1305 /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */
1306 };
1307
1308 /**
1309 * Contains characters which signal the termination of any of the 'special' bareword
1310 * values.
1311 */
1312 static int Special_Endings[0x100] = {
1313 /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1314 /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1315 /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1316 /* 0x0b */ 0,0, /* 0x0c */
1317 /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1318 /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1319 /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1320 /* 0x21 */ 0, /* 0x21 */
1321 /* 0x22 */ 1 /* " */, /* 0x22 */
1322 /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */
1323 /* 0x2c */ 1 /* , */, /* 0x2c */
1324 /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */
1325 /* 0x3a */ 1 /* : */, /* 0x3a */
1326 /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */
1327 /* 0x5b */ 1 /* [ */, /* 0x5b */
1328 /* 0x5c */ 1 /* \ */, /* 0x5c */
1329 /* 0x5d */ 1 /* ] */, /* 0x5d */
1330 /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */
1331 /* 0x7b */ 1 /* { */, /* 0x7b */
1332 /* 0x7c */ 0, /* 0x7c */
1333 /* 0x7d */ 1 /* } */, /* 0x7d */
1334 /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */
1335 /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */
1336 /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */
1337 /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */
1338 /* 0xfe */ 0 /* 0xfe */
1339 };
1340
1341 /**
1342 * This table contains entries for the allowed whitespace as per RFC 4627
1343 */
1344 static int Allowed_Whitespace[0x100] = {
1345 /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1346 /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1347 /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1348 /* 0x0b */ 0,0, /* 0x0c */
1349 /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1350 /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1351 /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1352 /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */
1353 /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */
1354 /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */
1355 /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */
1356 /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */
1357 /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */
1358 /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1359 };
1360
1361 /**
1362 * Allowable two-character 'common' escapes:
1363 */
1364 static int Allowed_Escapes[0x100] = {
1365 /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1366 /* 0x20 */ 0,0, /* 0x21 */
1367 /* 0x22 */ 1 /* <"> */, /* 0x22 */
1368 /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
1369 /* 0x2f */ 1 /* </> */, /* 0x2f */
1370 /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
1371 /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1372 /* 0x5c */ 1 /* <\> */, /* 0x5c */
1373 /* 0x5d */ 0,0,0,0,0, /* 0x61 */
1374 /* 0x62 */ 1 /* <b> */, /* 0x62 */
1375 /* 0x63 */ 0,0,0, /* 0x65 */
1376 /* 0x66 */ 1 /* <f> */, /* 0x66 */
1377 /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1378 /* 0x6e */ 1 /* <n> */, /* 0x6e */
1379 /* 0x6f */ 0,0,0, /* 0x71 */
1380 /* 0x72 */ 1 /* <r> */, /* 0x72 */
1381 /* 0x73 */ 0, /* 0x73 */
1382 /* 0x74 */ 1 /* <t> */, /* 0x74 */
1383 /* 0x75 */ 1 /* <u> */, /* 0x75 */
1384 /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
1385 /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
1386 /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
1387 /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
1388 /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
1389 };
1390
1391 /**
1392 * This table contains the _values_ for a given (single) escaped character.
1393 */
1394 static unsigned char Escape_Equivs[0x100] = {
1395 /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1396 /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
1397 /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
1398 /* 0x60 */ 0,0, /* 0x61 */
1399 /* 0x62 */ 8 /* <b> */, /* 0x62 */
1400 /* 0x63 */ 0,0,0, /* 0x65 */
1401 /* 0x66 */ 12 /* <f> */, /* 0x66 */
1402 /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1403 /* 0x6e */ 10 /* <n> */, /* 0x6e */
1404 /* 0x6f */ 0,0,0, /* 0x71 */
1405 /* 0x72 */ 13 /* <r> */, /* 0x72 */
1406 /* 0x73 */ 0, /* 0x73 */
1407 /* 0x74 */ 9 /* <t> */, /* 0x74 */
1408 /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1409 /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1410 /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1411 /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1412 /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1413 };
1414
1415 /* Definitions of above-declared static functions */
get_escape_equiv(unsigned c)1416 static char get_escape_equiv(unsigned c) {
1417 return Escape_Equivs[c & 0xff];
1418 }
extract_special(unsigned c)1419 static unsigned extract_special(unsigned c) {
1420 return Special_Table[c & 0xff];
1421 }
is_special_end(unsigned c)1422 static int is_special_end(unsigned c) {
1423 return Special_Endings[c & 0xff];
1424 }
is_allowed_whitespace(unsigned c)1425 static int is_allowed_whitespace(unsigned c) {
1426 return c == ' ' || Allowed_Whitespace[c & 0xff];
1427 }
is_allowed_escape(unsigned c)1428 static int is_allowed_escape(unsigned c) {
1429 return Allowed_Escapes[c & 0xff];
1430 }
1431
1432 /* Clean up all our macros! */
1433 #undef INCR_METRIC
1434 #undef INCR_GENERIC
1435 #undef INCR_STRINGY_CATCH
1436 #undef CASE_DIGITS
1437 #undef INVOKE_ERROR
1438 #undef STACK_PUSH
1439 #undef STACK_POP_NOPOS
1440 #undef STACK_POP
1441 #undef CALLBACK_AND_POP_NOPOS
1442 #undef CALLBACK_AND_POP
1443 #undef SPECIAL_POP
1444 #undef CUR_CHAR
1445 #undef DO_CALLBACK
1446 #undef ENSURE_HVAL
1447 #undef VERIFY_SPECIAL
1448 #undef STATE_SPECIAL_LENGTH
1449 #undef IS_NORMAL_NUMBER
1450 #undef STATE_NUM_LAST
1451 #undef FASTPARSE_EXHAUSTED
1452 #undef FASTPARSE_BREAK
1453