1 /*
2 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3 *
4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 * Michael Clark <michael@metaparadigm.com>
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the MIT license. See COPYING for details.
9 *
10 *
11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 * The copyrights to the contents of this file are licensed under the MIT License
13 * (http://www.opensource.org/licenses/mit-license.php)
14 */
15
16 #include "config.h"
17
18 #include "math_compat.h"
19 #include <assert.h>
20 #include <ctype.h>
21 #include <limits.h>
22 #include <math.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "debug.h"
29 #include "json_inttypes.h"
30 #include "json_object.h"
31 #include "json_object_private.h"
32 #include "json_tokener.h"
33 #include "json_util.h"
34 #include "printbuf.h"
35 #include "strdup_compat.h"
36
37 #ifdef HAVE_LOCALE_H
38 #include <locale.h>
39 #endif /* HAVE_LOCALE_H */
40 #ifdef HAVE_XLOCALE_H
41 #include <xlocale.h>
42 #endif
43 #ifdef HAVE_STRINGS_H
44 #include <strings.h>
45 #endif /* HAVE_STRINGS_H */
46
47 #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
48
49 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
50 /* MSC has the version as _strnicmp */
51 #define strncasecmp _strnicmp
52 #elif !HAVE_STRNCASECMP
53 #error You do not have strncasecmp on your system.
54 #endif /* HAVE_STRNCASECMP */
55
56 /* Use C99 NAN by default; if not available, nan("") should work too. */
57 #ifndef NAN
58 #define NAN nan("")
59 #endif /* !NAN */
60
61 static const char json_null_str[] = "null";
62 static const int json_null_str_len = sizeof(json_null_str) - 1;
63 static const char json_inf_str[] = "Infinity";
64 static const char json_inf_str_lower[] = "infinity";
65 static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
66 static const char json_nan_str[] = "NaN";
67 static const int json_nan_str_len = sizeof(json_nan_str) - 1;
68 static const char json_true_str[] = "true";
69 static const int json_true_str_len = sizeof(json_true_str) - 1;
70 static const char json_false_str[] = "false";
71 static const int json_false_str_len = sizeof(json_false_str) - 1;
72
73 /* clang-format off */
74 static const char *json_tokener_errors[] = {
75 "success",
76 "continue",
77 "nesting too deep",
78 "unexpected end of data",
79 "unexpected character",
80 "null expected",
81 "boolean expected",
82 "number expected",
83 "array value separator ',' expected",
84 "quoted object property name expected",
85 "object property name separator ':' expected",
86 "object value separator ',' expected",
87 "invalid string sequence",
88 "expected comment",
89 "invalid utf-8 string",
90 "buffer size overflow"
91 };
92 /* clang-format on */
93
94 /**
95 * validete the utf-8 string in strict model.
96 * if not utf-8 format, return err.
97 */
98 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
99
100 static int json_tokener_parse_double(const char *buf, int len, double *retval);
101
json_tokener_error_desc(enum json_tokener_error jerr)102 const char *json_tokener_error_desc(enum json_tokener_error jerr)
103 {
104 int jerr_int = (int)jerr;
105 if (jerr_int < 0 ||
106 jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
107 return "Unknown error, "
108 "invalid json_tokener_error value passed to json_tokener_error_desc()";
109 return json_tokener_errors[jerr];
110 }
111
json_tokener_get_error(struct json_tokener * tok)112 enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
113 {
114 return tok->err;
115 }
116
117 /* Stuff for decoding unicode sequences */
118 #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
119 #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
120 #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
121 static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
122
json_tokener_new_ex(int depth)123 struct json_tokener *json_tokener_new_ex(int depth)
124 {
125 struct json_tokener *tok;
126
127 tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
128 if (!tok)
129 return NULL;
130 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
131 if (!tok->stack)
132 {
133 free(tok);
134 return NULL;
135 }
136 tok->pb = printbuf_new();
137 tok->max_depth = depth;
138 json_tokener_reset(tok);
139 return tok;
140 }
141
json_tokener_new(void)142 struct json_tokener *json_tokener_new(void)
143 {
144 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
145 }
146
json_tokener_free(struct json_tokener * tok)147 void json_tokener_free(struct json_tokener *tok)
148 {
149 json_tokener_reset(tok);
150 if (tok->pb)
151 printbuf_free(tok->pb);
152 free(tok->stack);
153 free(tok);
154 }
155
json_tokener_reset_level(struct json_tokener * tok,int depth)156 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
157 {
158 tok->stack[depth].state = json_tokener_state_eatws;
159 tok->stack[depth].saved_state = json_tokener_state_start;
160 json_object_put(tok->stack[depth].current);
161 tok->stack[depth].current = NULL;
162 free(tok->stack[depth].obj_field_name);
163 tok->stack[depth].obj_field_name = NULL;
164 }
165
json_tokener_reset(struct json_tokener * tok)166 void json_tokener_reset(struct json_tokener *tok)
167 {
168 int i;
169 if (!tok)
170 return;
171
172 for (i = tok->depth; i >= 0; i--)
173 json_tokener_reset_level(tok, i);
174 tok->depth = 0;
175 tok->err = json_tokener_success;
176 }
177
json_tokener_parse(const char * str)178 struct json_object *json_tokener_parse(const char *str)
179 {
180 enum json_tokener_error jerr_ignored;
181 struct json_object *obj;
182 obj = json_tokener_parse_verbose(str, &jerr_ignored);
183 return obj;
184 }
185
json_tokener_parse_verbose(const char * str,enum json_tokener_error * error)186 struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
187 {
188 struct json_tokener *tok;
189 struct json_object *obj;
190
191 tok = json_tokener_new();
192 if (!tok)
193 return NULL;
194 obj = json_tokener_parse_ex(tok, str, -1);
195 *error = tok->err;
196 if (tok->err != json_tokener_success
197 #if 0
198 /* This would be a more sensible default, and cause parsing
199 * things like "null123" to fail when the caller can't know
200 * where the parsing left off, but starting to fail would
201 * be a notable behaviour change. Save for a 1.0 release.
202 */
203 || json_tokener_get_parse_end(tok) != strlen(str)
204 #endif
205 )
206
207 {
208 if (obj != NULL)
209 json_object_put(obj);
210 obj = NULL;
211 }
212
213 json_tokener_free(tok);
214 return obj;
215 }
216
217 #define state tok->stack[tok->depth].state
218 #define saved_state tok->stack[tok->depth].saved_state
219 #define current tok->stack[tok->depth].current
220 #define obj_field_name tok->stack[tok->depth].obj_field_name
221
222 /* Optimization:
223 * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
224 * iterating character-by character. A large performance boost is
225 * achieved by using tighter loops to locally handle units such as
226 * comments and strings. Loops that handle an entire token within
227 * their scope also gather entire strings and pass them to
228 * printbuf_memappend() in a single call, rather than calling
229 * printbuf_memappend() one char at a time.
230 *
231 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
232 * common to both the main loop and the tighter loops.
233 */
234
235 /* PEEK_CHAR(dest, tok) macro:
236 * Peeks at the current char and stores it in dest.
237 * Returns 1 on success, sets tok->err and returns 0 if no more chars.
238 * Implicit inputs: str, len, nBytesp vars
239 */
240 #define PEEK_CHAR(dest, tok) \
241 (((tok)->char_offset == len) \
242 ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \
243 saved_state == json_tokener_state_finish) \
244 ? (((tok)->err = json_tokener_success), 0) \
245 : (((tok)->err = json_tokener_continue), 0)) \
246 : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \
247 (!json_tokener_validate_utf8(*str, nBytesp))) \
248 ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
249 : (((dest) = *str), 1)))
250
251 /* ADVANCE_CHAR() macro:
252 * Increments str & tok->char_offset.
253 * For convenience of existing conditionals, returns the old value of c (0 on eof)
254 * Implicit inputs: c var
255 */
256 #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
257
258 /* End optimization macro defs */
259
json_tokener_parse_ex(struct json_tokener * tok,const char * str,int len)260 struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
261 {
262 struct json_object *obj = NULL;
263 char c = '\1';
264 unsigned int nBytes = 0;
265 unsigned int *nBytesp = &nBytes;
266
267 #ifdef HAVE_USELOCALE
268 locale_t oldlocale = uselocale(NULL);
269 locale_t newloc;
270 #elif defined(HAVE_SETLOCALE)
271 char *oldlocale = NULL;
272 #endif
273
274 tok->char_offset = 0;
275 tok->err = json_tokener_success;
276
277 /* this interface is presently not 64-bit clean due to the int len argument
278 * and the internal printbuf interface that takes 32-bit int len arguments
279 * so the function limits the maximum string size to INT32_MAX (2GB).
280 * If the function is called with len == -1 then strlen is called to check
281 * the string length is less than INT32_MAX (2GB)
282 */
283 if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
284 {
285 tok->err = json_tokener_error_size;
286 return NULL;
287 }
288
289 #ifdef HAVE_USELOCALE
290 {
291 locale_t duploc = duplocale(oldlocale);
292 newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
293 if (newloc == NULL)
294 {
295 freelocale(duploc);
296 return NULL;
297 }
298 uselocale(newloc);
299 }
300 #elif defined(HAVE_SETLOCALE)
301 {
302 char *tmplocale;
303 tmplocale = setlocale(LC_NUMERIC, NULL);
304 if (tmplocale)
305 oldlocale = strdup(tmplocale);
306 setlocale(LC_NUMERIC, "C");
307 }
308 #endif
309
310 while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
311 {
312
313 redo_char:
314 switch (state)
315 {
316
317 case json_tokener_state_eatws:
318 /* Advance until we change state */
319 while (isspace((unsigned char)c))
320 {
321 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
322 goto out;
323 }
324 if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
325 {
326 printbuf_reset(tok->pb);
327 printbuf_memappend_fast(tok->pb, &c, 1);
328 state = json_tokener_state_comment_start;
329 }
330 else
331 {
332 state = saved_state;
333 goto redo_char;
334 }
335 break;
336
337 case json_tokener_state_start:
338 switch (c)
339 {
340 case '{':
341 state = json_tokener_state_eatws;
342 saved_state = json_tokener_state_object_field_start;
343 current = json_object_new_object();
344 if (current == NULL)
345 goto out;
346 break;
347 case '[':
348 state = json_tokener_state_eatws;
349 saved_state = json_tokener_state_array;
350 current = json_object_new_array();
351 if (current == NULL)
352 goto out;
353 break;
354 case 'I':
355 case 'i':
356 state = json_tokener_state_inf;
357 printbuf_reset(tok->pb);
358 tok->st_pos = 0;
359 goto redo_char;
360 case 'N':
361 case 'n':
362 state = json_tokener_state_null; // or NaN
363 printbuf_reset(tok->pb);
364 tok->st_pos = 0;
365 goto redo_char;
366 case '\'':
367 if (tok->flags & JSON_TOKENER_STRICT)
368 {
369 /* in STRICT mode only double-quote are allowed */
370 tok->err = json_tokener_error_parse_unexpected;
371 goto out;
372 }
373 /* FALLTHRU */
374 case '"':
375 state = json_tokener_state_string;
376 printbuf_reset(tok->pb);
377 tok->quote_char = c;
378 break;
379 case 'T':
380 case 't':
381 case 'F':
382 case 'f':
383 state = json_tokener_state_boolean;
384 printbuf_reset(tok->pb);
385 tok->st_pos = 0;
386 goto redo_char;
387 case '0':
388 case '1':
389 case '2':
390 case '3':
391 case '4':
392 case '5':
393 case '6':
394 case '7':
395 case '8':
396 case '9':
397 case '-':
398 state = json_tokener_state_number;
399 printbuf_reset(tok->pb);
400 tok->is_double = 0;
401 goto redo_char;
402 default: tok->err = json_tokener_error_parse_unexpected; goto out;
403 }
404 break;
405
406 case json_tokener_state_finish:
407 if (tok->depth == 0)
408 goto out;
409 obj = json_object_get(current);
410 json_tokener_reset_level(tok, tok->depth);
411 tok->depth--;
412 goto redo_char;
413
414 case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
415 {
416 /* If we were guaranteed to have len set, then we could (usually) handle
417 * the entire "Infinity" check in a single strncmp (strncasecmp), but
418 * since len might be -1 (i.e. "read until \0"), we need to check it
419 * a character at a time.
420 * Trying to handle it both ways would make this code considerably more
421 * complicated with likely little performance benefit.
422 */
423 int is_negative = 0;
424 const char *_json_inf_str = json_inf_str;
425 if (!(tok->flags & JSON_TOKENER_STRICT))
426 _json_inf_str = json_inf_str_lower;
427
428 /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
429 while (tok->st_pos < (int)json_inf_str_len)
430 {
431 char inf_char = *str;
432 if (!(tok->flags & JSON_TOKENER_STRICT))
433 inf_char = tolower((unsigned char)*str);
434 if (inf_char != _json_inf_str[tok->st_pos])
435 {
436 tok->err = json_tokener_error_parse_unexpected;
437 goto out;
438 }
439 tok->st_pos++;
440 (void)ADVANCE_CHAR(str, tok);
441 if (!PEEK_CHAR(c, tok))
442 {
443 /* out of input chars, for now at least */
444 goto out;
445 }
446 }
447 /* We checked the full length of "Infinity", so create the object.
448 * When handling -Infinity, the number parsing code will have dropped
449 * the "-" into tok->pb for us, so check it now.
450 */
451 if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
452 {
453 is_negative = 1;
454 }
455 current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
456 if (current == NULL)
457 goto out;
458 saved_state = json_tokener_state_finish;
459 state = json_tokener_state_eatws;
460 goto redo_char;
461 }
462 break;
463 case json_tokener_state_null: /* aka starts with 'n' */
464 {
465 int size;
466 int size_nan;
467 printbuf_memappend_fast(tok->pb, &c, 1);
468 size = json_min(tok->st_pos + 1, json_null_str_len);
469 size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
470 if ((!(tok->flags & JSON_TOKENER_STRICT) &&
471 strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
472 (strncmp(json_null_str, tok->pb->buf, size) == 0))
473 {
474 if (tok->st_pos == json_null_str_len)
475 {
476 current = NULL;
477 saved_state = json_tokener_state_finish;
478 state = json_tokener_state_eatws;
479 goto redo_char;
480 }
481 }
482 else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
483 strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
484 (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
485 {
486 if (tok->st_pos == json_nan_str_len)
487 {
488 current = json_object_new_double(NAN);
489 if (current == NULL)
490 goto out;
491 saved_state = json_tokener_state_finish;
492 state = json_tokener_state_eatws;
493 goto redo_char;
494 }
495 }
496 else
497 {
498 tok->err = json_tokener_error_parse_null;
499 goto out;
500 }
501 tok->st_pos++;
502 }
503 break;
504
505 case json_tokener_state_comment_start:
506 if (c == '*')
507 {
508 state = json_tokener_state_comment;
509 }
510 else if (c == '/')
511 {
512 state = json_tokener_state_comment_eol;
513 }
514 else
515 {
516 tok->err = json_tokener_error_parse_comment;
517 goto out;
518 }
519 printbuf_memappend_fast(tok->pb, &c, 1);
520 break;
521
522 case json_tokener_state_comment:
523 {
524 /* Advance until we change state */
525 const char *case_start = str;
526 while (c != '*')
527 {
528 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
529 {
530 printbuf_memappend_fast(tok->pb, case_start,
531 str - case_start);
532 goto out;
533 }
534 }
535 printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);
536 state = json_tokener_state_comment_end;
537 }
538 break;
539
540 case json_tokener_state_comment_eol:
541 {
542 /* Advance until we change state */
543 const char *case_start = str;
544 while (c != '\n')
545 {
546 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
547 {
548 printbuf_memappend_fast(tok->pb, case_start,
549 str - case_start);
550 goto out;
551 }
552 }
553 printbuf_memappend_fast(tok->pb, case_start, str - case_start);
554 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
555 state = json_tokener_state_eatws;
556 }
557 break;
558
559 case json_tokener_state_comment_end:
560 printbuf_memappend_fast(tok->pb, &c, 1);
561 if (c == '/')
562 {
563 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
564 state = json_tokener_state_eatws;
565 }
566 else
567 {
568 state = json_tokener_state_comment;
569 }
570 break;
571
572 case json_tokener_state_string:
573 {
574 /* Advance until we change state */
575 const char *case_start = str;
576 while (1)
577 {
578 if (c == tok->quote_char)
579 {
580 printbuf_memappend_fast(tok->pb, case_start,
581 str - case_start);
582 current =
583 json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
584 if (current == NULL)
585 goto out;
586 saved_state = json_tokener_state_finish;
587 state = json_tokener_state_eatws;
588 break;
589 }
590 else if (c == '\\')
591 {
592 printbuf_memappend_fast(tok->pb, case_start,
593 str - case_start);
594 saved_state = json_tokener_state_string;
595 state = json_tokener_state_string_escape;
596 break;
597 }
598 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
599 {
600 printbuf_memappend_fast(tok->pb, case_start,
601 str - case_start);
602 goto out;
603 }
604 }
605 }
606 break;
607
608 case json_tokener_state_string_escape:
609 switch (c)
610 {
611 case '"':
612 case '\\':
613 case '/':
614 printbuf_memappend_fast(tok->pb, &c, 1);
615 state = saved_state;
616 break;
617 case 'b':
618 case 'n':
619 case 'r':
620 case 't':
621 case 'f':
622 if (c == 'b')
623 printbuf_memappend_fast(tok->pb, "\b", 1);
624 else if (c == 'n')
625 printbuf_memappend_fast(tok->pb, "\n", 1);
626 else if (c == 'r')
627 printbuf_memappend_fast(tok->pb, "\r", 1);
628 else if (c == 't')
629 printbuf_memappend_fast(tok->pb, "\t", 1);
630 else if (c == 'f')
631 printbuf_memappend_fast(tok->pb, "\f", 1);
632 state = saved_state;
633 break;
634 case 'u':
635 tok->ucs_char = 0;
636 tok->st_pos = 0;
637 state = json_tokener_state_escape_unicode;
638 break;
639 default: tok->err = json_tokener_error_parse_string; goto out;
640 }
641 break;
642
643 // ===================================================
644
645 case json_tokener_state_escape_unicode:
646 {
647 /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
648 while (1)
649 {
650 if (!c || !strchr(json_hex_chars, c))
651 {
652 tok->err = json_tokener_error_parse_string;
653 goto out;
654 }
655 tok->ucs_char |=
656 ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
657 tok->st_pos++;
658 if (tok->st_pos >= 4)
659 break;
660
661 (void)ADVANCE_CHAR(str, tok);
662 if (!PEEK_CHAR(c, tok))
663 {
664 /*
665 * We're out of characters in the current call to
666 * json_tokener_parse(), but a subsequent call might
667 * provide us with more, so leave our current state
668 * as-is (including tok->high_surrogate) and return.
669 */
670 goto out;
671 }
672 }
673 tok->st_pos = 0;
674
675 /* Now, we have a full \uNNNN sequence in tok->ucs_char */
676
677 /* If the *previous* sequence was a high surrogate ... */
678 if (tok->high_surrogate)
679 {
680 if (IS_LOW_SURROGATE(tok->ucs_char))
681 {
682 /* Recalculate the ucs_char, then fall thru to process normally */
683 tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
684 tok->ucs_char);
685 }
686 else
687 {
688 /* High surrogate was not followed by a low surrogate
689 * Replace the high and process the rest normally
690 */
691 printbuf_memappend_fast(tok->pb,
692 (char *)utf8_replacement_char, 3);
693 }
694 tok->high_surrogate = 0;
695 }
696
697 if (tok->ucs_char < 0x80)
698 {
699 unsigned char unescaped_utf[1];
700 unescaped_utf[0] = tok->ucs_char;
701 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);
702 }
703 else if (tok->ucs_char < 0x800)
704 {
705 unsigned char unescaped_utf[2];
706 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
707 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
708 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);
709 }
710 else if (IS_HIGH_SURROGATE(tok->ucs_char))
711 {
712 /*
713 * The next two characters should be \u, HOWEVER,
714 * we can't simply peek ahead here, because the
715 * characters we need might not be passed to us
716 * until a subsequent call to json_tokener_parse.
717 * Instead, transition throug a couple of states.
718 * (now):
719 * _escape_unicode => _unicode_need_escape
720 * (see a '\\' char):
721 * _unicode_need_escape => _unicode_need_u
722 * (see a 'u' char):
723 * _unicode_need_u => _escape_unicode
724 * ...and we'll end up back around here.
725 */
726 tok->high_surrogate = tok->ucs_char;
727 tok->ucs_char = 0;
728 state = json_tokener_state_escape_unicode_need_escape;
729 break;
730 }
731 else if (IS_LOW_SURROGATE(tok->ucs_char))
732 {
733 /* Got a low surrogate not preceded by a high */
734 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
735 }
736 else if (tok->ucs_char < 0x10000)
737 {
738 unsigned char unescaped_utf[3];
739 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
740 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
741 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
742 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);
743 }
744 else if (tok->ucs_char < 0x110000)
745 {
746 unsigned char unescaped_utf[4];
747 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
748 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
749 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
750 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
751 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);
752 }
753 else
754 {
755 /* Don't know what we got--insert the replacement char */
756 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
757 }
758 state = saved_state; // i.e. _state_string or _state_object_field
759 }
760 break;
761
762 case json_tokener_state_escape_unicode_need_escape:
763 // We get here after processing a high_surrogate
764 // require a '\\' char
765 if (!c || c != '\\')
766 {
767 /* Got a high surrogate without another sequence following
768 * it. Put a replacement char in for the high surrogate
769 * and pop back up to _state_string or _state_object_field.
770 */
771 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
772 tok->high_surrogate = 0;
773 tok->ucs_char = 0;
774 tok->st_pos = 0;
775 state = saved_state;
776 goto redo_char;
777 }
778 state = json_tokener_state_escape_unicode_need_u;
779 break;
780
781 case json_tokener_state_escape_unicode_need_u:
782 /* We already had a \ char, check that it's \u */
783 if (!c || c != 'u')
784 {
785 /* Got a high surrogate with some non-unicode escape
786 * sequence following it.
787 * Put a replacement char in for the high surrogate
788 * and handle the escape sequence normally.
789 */
790 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
791 tok->high_surrogate = 0;
792 tok->ucs_char = 0;
793 tok->st_pos = 0;
794 state = json_tokener_state_string_escape;
795 goto redo_char;
796 }
797 state = json_tokener_state_escape_unicode;
798 break;
799
800 // ===================================================
801
802 case json_tokener_state_boolean:
803 {
804 int size1, size2;
805 printbuf_memappend_fast(tok->pb, &c, 1);
806 size1 = json_min(tok->st_pos + 1, json_true_str_len);
807 size2 = json_min(tok->st_pos + 1, json_false_str_len);
808 if ((!(tok->flags & JSON_TOKENER_STRICT) &&
809 strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
810 (strncmp(json_true_str, tok->pb->buf, size1) == 0))
811 {
812 if (tok->st_pos == json_true_str_len)
813 {
814 current = json_object_new_boolean(1);
815 if (current == NULL)
816 goto out;
817 saved_state = json_tokener_state_finish;
818 state = json_tokener_state_eatws;
819 goto redo_char;
820 }
821 }
822 else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
823 strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
824 (strncmp(json_false_str, tok->pb->buf, size2) == 0))
825 {
826 if (tok->st_pos == json_false_str_len)
827 {
828 current = json_object_new_boolean(0);
829 if (current == NULL)
830 goto out;
831 saved_state = json_tokener_state_finish;
832 state = json_tokener_state_eatws;
833 goto redo_char;
834 }
835 }
836 else
837 {
838 tok->err = json_tokener_error_parse_boolean;
839 goto out;
840 }
841 tok->st_pos++;
842 }
843 break;
844
845 case json_tokener_state_number:
846 {
847 /* Advance until we change state */
848 const char *case_start = str;
849 int case_len = 0;
850 int is_exponent = 0;
851 int neg_sign_ok = 1;
852 int pos_sign_ok = 0;
853 if (printbuf_length(tok->pb) > 0)
854 {
855 /* We don't save all state from the previous incremental parse
856 so we need to re-generate it based on the saved string so far.
857 */
858 char *e_loc = strchr(tok->pb->buf, 'e');
859 if (!e_loc)
860 e_loc = strchr(tok->pb->buf, 'E');
861 if (e_loc)
862 {
863 char *last_saved_char =
864 &tok->pb->buf[printbuf_length(tok->pb) - 1];
865 is_exponent = 1;
866 pos_sign_ok = neg_sign_ok = 1;
867 /* If the "e" isn't at the end, we can't start with a '-' */
868 if (e_loc != last_saved_char)
869 {
870 neg_sign_ok = 0;
871 pos_sign_ok = 0;
872 }
873 // else leave it set to 1, i.e. start of the new input
874 }
875 }
876
877 while (c && ((c >= '0' && c <= '9') ||
878 (!is_exponent && (c == 'e' || c == 'E')) ||
879 (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
880 (!tok->is_double && c == '.')))
881 {
882 pos_sign_ok = neg_sign_ok = 0;
883 ++case_len;
884
885 /* non-digit characters checks */
886 /* note: since the main loop condition to get here was
887 * an input starting with 0-9 or '-', we are
888 * protected from input starting with '.' or
889 * e/E.
890 */
891 switch (c)
892 {
893 case '.':
894 tok->is_double = 1;
895 pos_sign_ok = 1;
896 neg_sign_ok = 1;
897 break;
898 case 'e': /* FALLTHRU */
899 case 'E':
900 is_exponent = 1;
901 tok->is_double = 1;
902 /* the exponent part can begin with a negative sign */
903 pos_sign_ok = neg_sign_ok = 1;
904 break;
905 default: break;
906 }
907
908 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
909 {
910 printbuf_memappend_fast(tok->pb, case_start, case_len);
911 goto out;
912 }
913 }
914 /*
915 Now we know c isn't a valid number char, but check whether
916 it might have been intended to be, and return a potentially
917 more understandable error right away.
918 However, if we're at the top-level, use the number as-is
919 because c can be part of a new object to parse on the
920 next call to json_tokener_parse().
921 */
922 if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
923 c != 'I' && c != 'i' && !isspace((unsigned char)c))
924 {
925 tok->err = json_tokener_error_parse_number;
926 goto out;
927 }
928 if (case_len > 0)
929 printbuf_memappend_fast(tok->pb, case_start, case_len);
930
931 // Check for -Infinity
932 if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
933 {
934 state = json_tokener_state_inf;
935 tok->st_pos = 0;
936 goto redo_char;
937 }
938 if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
939 {
940 /* Trim some chars off the end, to allow things
941 like "123e+" to parse ok. */
942 while (printbuf_length(tok->pb) > 1)
943 {
944 char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
945 if (last_char != 'e' && last_char != 'E' &&
946 last_char != '-' && last_char != '+')
947 {
948 break;
949 }
950 tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
951 printbuf_length(tok->pb)--;
952 }
953 }
954 }
955 {
956 int64_t num64;
957 uint64_t numuint64;
958 double numd;
959 if (!tok->is_double && tok->pb->buf[0] == '-' &&
960 json_parse_int64(tok->pb->buf, &num64) == 0)
961 {
962 current = json_object_new_int64(num64);
963 if (current == NULL)
964 goto out;
965 }
966 else if (!tok->is_double && tok->pb->buf[0] != '-' &&
967 json_parse_uint64(tok->pb->buf, &numuint64) == 0)
968 {
969 if (numuint64 && tok->pb->buf[0] == '0' &&
970 (tok->flags & JSON_TOKENER_STRICT))
971 {
972 tok->err = json_tokener_error_parse_number;
973 goto out;
974 }
975 if (numuint64 <= INT64_MAX)
976 {
977 num64 = (uint64_t)numuint64;
978 current = json_object_new_int64(num64);
979 if (current == NULL)
980 goto out;
981 }
982 else
983 {
984 current = json_object_new_uint64(numuint64);
985 if (current == NULL)
986 goto out;
987 }
988 }
989 else if (tok->is_double &&
990 json_tokener_parse_double(
991 tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
992 {
993 current = json_object_new_double_s(numd, tok->pb->buf);
994 if (current == NULL)
995 goto out;
996 }
997 else
998 {
999 tok->err = json_tokener_error_parse_number;
1000 goto out;
1001 }
1002 saved_state = json_tokener_state_finish;
1003 state = json_tokener_state_eatws;
1004 goto redo_char;
1005 }
1006 break;
1007
1008 case json_tokener_state_array_after_sep:
1009 case json_tokener_state_array:
1010 if (c == ']')
1011 {
1012 // Minimize memory usage; assume parsed objs are unlikely to be changed
1013 json_object_array_shrink(current, 0);
1014
1015 if (state == json_tokener_state_array_after_sep &&
1016 (tok->flags & JSON_TOKENER_STRICT))
1017 {
1018 tok->err = json_tokener_error_parse_unexpected;
1019 goto out;
1020 }
1021 saved_state = json_tokener_state_finish;
1022 state = json_tokener_state_eatws;
1023 }
1024 else
1025 {
1026 if (tok->depth >= tok->max_depth - 1)
1027 {
1028 tok->err = json_tokener_error_depth;
1029 goto out;
1030 }
1031 state = json_tokener_state_array_add;
1032 tok->depth++;
1033 json_tokener_reset_level(tok, tok->depth);
1034 goto redo_char;
1035 }
1036 break;
1037
1038 case json_tokener_state_array_add:
1039 if (json_object_array_add(current, obj) != 0)
1040 goto out;
1041 saved_state = json_tokener_state_array_sep;
1042 state = json_tokener_state_eatws;
1043 goto redo_char;
1044
1045 case json_tokener_state_array_sep:
1046 if (c == ']')
1047 {
1048 // Minimize memory usage; assume parsed objs are unlikely to be changed
1049 json_object_array_shrink(current, 0);
1050
1051 saved_state = json_tokener_state_finish;
1052 state = json_tokener_state_eatws;
1053 }
1054 else if (c == ',')
1055 {
1056 saved_state = json_tokener_state_array_after_sep;
1057 state = json_tokener_state_eatws;
1058 }
1059 else
1060 {
1061 tok->err = json_tokener_error_parse_array;
1062 goto out;
1063 }
1064 break;
1065
1066 case json_tokener_state_object_field_start:
1067 case json_tokener_state_object_field_start_after_sep:
1068 if (c == '}')
1069 {
1070 if (state == json_tokener_state_object_field_start_after_sep &&
1071 (tok->flags & JSON_TOKENER_STRICT))
1072 {
1073 tok->err = json_tokener_error_parse_unexpected;
1074 goto out;
1075 }
1076 saved_state = json_tokener_state_finish;
1077 state = json_tokener_state_eatws;
1078 }
1079 else if (c == '"' || c == '\'')
1080 {
1081 tok->quote_char = c;
1082 printbuf_reset(tok->pb);
1083 state = json_tokener_state_object_field;
1084 }
1085 else
1086 {
1087 tok->err = json_tokener_error_parse_object_key_name;
1088 goto out;
1089 }
1090 break;
1091
1092 case json_tokener_state_object_field:
1093 {
1094 /* Advance until we change state */
1095 const char *case_start = str;
1096 while (1)
1097 {
1098 if (c == tok->quote_char)
1099 {
1100 printbuf_memappend_fast(tok->pb, case_start,
1101 str - case_start);
1102 obj_field_name = strdup(tok->pb->buf);
1103 saved_state = json_tokener_state_object_field_end;
1104 state = json_tokener_state_eatws;
1105 break;
1106 }
1107 else if (c == '\\')
1108 {
1109 printbuf_memappend_fast(tok->pb, case_start,
1110 str - case_start);
1111 saved_state = json_tokener_state_object_field;
1112 state = json_tokener_state_string_escape;
1113 break;
1114 }
1115 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
1116 {
1117 printbuf_memappend_fast(tok->pb, case_start,
1118 str - case_start);
1119 goto out;
1120 }
1121 }
1122 }
1123 break;
1124
1125 case json_tokener_state_object_field_end:
1126 if (c == ':')
1127 {
1128 saved_state = json_tokener_state_object_value;
1129 state = json_tokener_state_eatws;
1130 }
1131 else
1132 {
1133 tok->err = json_tokener_error_parse_object_key_sep;
1134 goto out;
1135 }
1136 break;
1137
1138 case json_tokener_state_object_value:
1139 if (tok->depth >= tok->max_depth - 1)
1140 {
1141 tok->err = json_tokener_error_depth;
1142 goto out;
1143 }
1144 state = json_tokener_state_object_value_add;
1145 tok->depth++;
1146 json_tokener_reset_level(tok, tok->depth);
1147 goto redo_char;
1148
1149 case json_tokener_state_object_value_add:
1150 json_object_object_add(current, obj_field_name, obj);
1151 free(obj_field_name);
1152 obj_field_name = NULL;
1153 saved_state = json_tokener_state_object_sep;
1154 state = json_tokener_state_eatws;
1155 goto redo_char;
1156
1157 case json_tokener_state_object_sep:
1158 /* { */
1159 if (c == '}')
1160 {
1161 saved_state = json_tokener_state_finish;
1162 state = json_tokener_state_eatws;
1163 }
1164 else if (c == ',')
1165 {
1166 saved_state = json_tokener_state_object_field_start_after_sep;
1167 state = json_tokener_state_eatws;
1168 }
1169 else
1170 {
1171 tok->err = json_tokener_error_parse_object_value_sep;
1172 goto out;
1173 }
1174 break;
1175 }
1176 (void)ADVANCE_CHAR(str, tok);
1177 if (!c) // This is the char *before* advancing
1178 break;
1179 } /* while(PEEK_CHAR) */
1180
1181 out:
1182 if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
1183 {
1184 tok->err = json_tokener_error_parse_utf8_string;
1185 }
1186 if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
1187 (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
1188 JSON_TOKENER_STRICT)
1189 {
1190 /* unexpected char after JSON data */
1191 tok->err = json_tokener_error_parse_unexpected;
1192 }
1193 if (!c)
1194 {
1195 /* We hit an eof char (0) */
1196 if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
1197 tok->err = json_tokener_error_parse_eof;
1198 }
1199
1200 #ifdef HAVE_USELOCALE
1201 uselocale(oldlocale);
1202 freelocale(newloc);
1203 #elif defined(HAVE_SETLOCALE)
1204 setlocale(LC_NUMERIC, oldlocale);
1205 free(oldlocale);
1206 #endif
1207
1208 if (tok->err == json_tokener_success)
1209 {
1210 json_object *ret = json_object_get(current);
1211 int ii;
1212
1213 /* Partially reset, so we parse additional objects on subsequent calls. */
1214 for (ii = tok->depth; ii >= 0; ii--)
1215 json_tokener_reset_level(tok, ii);
1216 return ret;
1217 }
1218
1219 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
1220 tok->char_offset);
1221 return NULL;
1222 }
1223
json_tokener_validate_utf8(const char c,unsigned int * nBytes)1224 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
1225 {
1226 unsigned char chr = c;
1227 if (*nBytes == 0)
1228 {
1229 if (chr >= 0x80)
1230 {
1231 if ((chr & 0xe0) == 0xc0)
1232 *nBytes = 1;
1233 else if ((chr & 0xf0) == 0xe0)
1234 *nBytes = 2;
1235 else if ((chr & 0xf8) == 0xf0)
1236 *nBytes = 3;
1237 else
1238 return 0;
1239 }
1240 }
1241 else
1242 {
1243 if ((chr & 0xC0) != 0x80)
1244 return 0;
1245 (*nBytes)--;
1246 }
1247 return 1;
1248 }
1249
json_tokener_set_flags(struct json_tokener * tok,int flags)1250 void json_tokener_set_flags(struct json_tokener *tok, int flags)
1251 {
1252 tok->flags = flags;
1253 }
1254
json_tokener_get_parse_end(struct json_tokener * tok)1255 size_t json_tokener_get_parse_end(struct json_tokener *tok)
1256 {
1257 assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */
1258 return (size_t)tok->char_offset;
1259 }
1260
json_tokener_parse_double(const char * buf,int len,double * retval)1261 static int json_tokener_parse_double(const char *buf, int len, double *retval)
1262 {
1263 char *end;
1264 *retval = strtod(buf, &end);
1265 if (buf + len == end)
1266 return 0; // It worked
1267 return 1;
1268 }
1269