1 /*
2  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3  *
4  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5  * Michael Clark <michael@metaparadigm.com>
6  *
7  * This library is free software; you can redistribute it and/or modify
8  * it under the terms of the MIT license. See COPYING for details.
9  *
10  *
11  * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
12  * The copyrights to the contents of this file are licensed under the MIT License
13  * (http://www.opensource.org/licenses/mit-license.php)
14  */
15 
16 #include "config.h"
17 
18 #include "math_compat.h"
19 #include <assert.h>
20 #include <ctype.h>
21 #include <limits.h>
22 #include <math.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "debug.h"
29 #include "json_inttypes.h"
30 #include "json_object.h"
31 #include "json_object_private.h"
32 #include "json_tokener.h"
33 #include "json_util.h"
34 #include "printbuf.h"
35 #include "strdup_compat.h"
36 
37 #ifdef HAVE_LOCALE_H
38 #include <locale.h>
39 #endif /* HAVE_LOCALE_H */
40 #ifdef HAVE_XLOCALE_H
41 #include <xlocale.h>
42 #endif
43 #ifdef HAVE_STRINGS_H
44 #include <strings.h>
45 #endif /* HAVE_STRINGS_H */
46 
47 #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
48 
49 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
50 /* MSC has the version as _strnicmp */
51 #define strncasecmp _strnicmp
52 #elif !HAVE_STRNCASECMP
53 #error You do not have strncasecmp on your system.
54 #endif /* HAVE_STRNCASECMP */
55 
56 /* Use C99 NAN by default; if not available, nan("") should work too. */
57 #ifndef NAN
58 #define NAN nan("")
59 #endif /* !NAN */
60 
61 static const char json_null_str[] = "null";
62 static const int json_null_str_len = sizeof(json_null_str) - 1;
63 static const char json_inf_str[] = "Infinity";
64 static const char json_inf_str_lower[] = "infinity";
65 static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
66 static const char json_nan_str[] = "NaN";
67 static const int json_nan_str_len = sizeof(json_nan_str) - 1;
68 static const char json_true_str[] = "true";
69 static const int json_true_str_len = sizeof(json_true_str) - 1;
70 static const char json_false_str[] = "false";
71 static const int json_false_str_len = sizeof(json_false_str) - 1;
72 
73 /* clang-format off */
74 static const char *json_tokener_errors[] = {
75 	"success",
76 	"continue",
77 	"nesting too deep",
78 	"unexpected end of data",
79 	"unexpected character",
80 	"null expected",
81 	"boolean expected",
82 	"number expected",
83 	"array value separator ',' expected",
84 	"quoted object property name expected",
85 	"object property name separator ':' expected",
86 	"object value separator ',' expected",
87 	"invalid string sequence",
88 	"expected comment",
89 	"invalid utf-8 string",
90 	"buffer size overflow"
91 };
92 /* clang-format on */
93 
94 /**
95  * validete the utf-8 string in strict model.
96  * if not utf-8 format, return err.
97  */
98 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
99 
100 static int json_tokener_parse_double(const char *buf, int len, double *retval);
101 
json_tokener_error_desc(enum json_tokener_error jerr)102 const char *json_tokener_error_desc(enum json_tokener_error jerr)
103 {
104 	int jerr_int = (int)jerr;
105 	if (jerr_int < 0 ||
106 	    jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
107 		return "Unknown error, "
108 		       "invalid json_tokener_error value passed to json_tokener_error_desc()";
109 	return json_tokener_errors[jerr];
110 }
111 
json_tokener_get_error(struct json_tokener * tok)112 enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
113 {
114 	return tok->err;
115 }
116 
117 /* Stuff for decoding unicode sequences */
118 #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
119 #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
120 #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
121 static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
122 
json_tokener_new_ex(int depth)123 struct json_tokener *json_tokener_new_ex(int depth)
124 {
125 	struct json_tokener *tok;
126 
127 	tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
128 	if (!tok)
129 		return NULL;
130 	tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
131 	if (!tok->stack)
132 	{
133 		free(tok);
134 		return NULL;
135 	}
136 	tok->pb = printbuf_new();
137 	tok->max_depth = depth;
138 	json_tokener_reset(tok);
139 	return tok;
140 }
141 
json_tokener_new(void)142 struct json_tokener *json_tokener_new(void)
143 {
144 	return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
145 }
146 
json_tokener_free(struct json_tokener * tok)147 void json_tokener_free(struct json_tokener *tok)
148 {
149 	json_tokener_reset(tok);
150 	if (tok->pb)
151 		printbuf_free(tok->pb);
152 	free(tok->stack);
153 	free(tok);
154 }
155 
json_tokener_reset_level(struct json_tokener * tok,int depth)156 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
157 {
158 	tok->stack[depth].state = json_tokener_state_eatws;
159 	tok->stack[depth].saved_state = json_tokener_state_start;
160 	json_object_put(tok->stack[depth].current);
161 	tok->stack[depth].current = NULL;
162 	free(tok->stack[depth].obj_field_name);
163 	tok->stack[depth].obj_field_name = NULL;
164 }
165 
json_tokener_reset(struct json_tokener * tok)166 void json_tokener_reset(struct json_tokener *tok)
167 {
168 	int i;
169 	if (!tok)
170 		return;
171 
172 	for (i = tok->depth; i >= 0; i--)
173 		json_tokener_reset_level(tok, i);
174 	tok->depth = 0;
175 	tok->err = json_tokener_success;
176 }
177 
json_tokener_parse(const char * str)178 struct json_object *json_tokener_parse(const char *str)
179 {
180 	enum json_tokener_error jerr_ignored;
181 	struct json_object *obj;
182 	obj = json_tokener_parse_verbose(str, &jerr_ignored);
183 	return obj;
184 }
185 
json_tokener_parse_verbose(const char * str,enum json_tokener_error * error)186 struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
187 {
188 	struct json_tokener *tok;
189 	struct json_object *obj;
190 
191 	tok = json_tokener_new();
192 	if (!tok)
193 		return NULL;
194 	obj = json_tokener_parse_ex(tok, str, -1);
195 	*error = tok->err;
196 	if (tok->err != json_tokener_success
197 #if 0
198 		/* This would be a more sensible default, and cause parsing
199 		 * things like "null123" to fail when the caller can't know
200 		 * where the parsing left off, but starting to fail would
201 		 * be a notable behaviour change.  Save for a 1.0 release.
202 		 */
203 	    || json_tokener_get_parse_end(tok) != strlen(str)
204 #endif
205 	)
206 
207 	{
208 		if (obj != NULL)
209 			json_object_put(obj);
210 		obj = NULL;
211 	}
212 
213 	json_tokener_free(tok);
214 	return obj;
215 }
216 
217 #define state tok->stack[tok->depth].state
218 #define saved_state tok->stack[tok->depth].saved_state
219 #define current tok->stack[tok->depth].current
220 #define obj_field_name tok->stack[tok->depth].obj_field_name
221 
222 /* Optimization:
223  * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
224  * iterating character-by character.  A large performance boost is
225  * achieved by using tighter loops to locally handle units such as
226  * comments and strings.  Loops that handle an entire token within
227  * their scope also gather entire strings and pass them to
228  * printbuf_memappend() in a single call, rather than calling
229  * printbuf_memappend() one char at a time.
230  *
231  * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
232  * common to both the main loop and the tighter loops.
233  */
234 
235 /* PEEK_CHAR(dest, tok) macro:
236  *   Peeks at the current char and stores it in dest.
237  *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
238  *   Implicit inputs:  str, len, nBytesp vars
239  */
240 #define PEEK_CHAR(dest, tok)                                                 \
241 	(((tok)->char_offset == len)                                         \
242 	     ? (((tok)->depth == 0 && state == json_tokener_state_eatws &&   \
243 	         saved_state == json_tokener_state_finish)                   \
244 	            ? (((tok)->err = json_tokener_success), 0)               \
245 	            : (((tok)->err = json_tokener_continue), 0))             \
246 	     : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) &&                \
247 	         (!json_tokener_validate_utf8(*str, nBytesp)))               \
248 	            ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
249 	            : (((dest) = *str), 1)))
250 
251 /* ADVANCE_CHAR() macro:
252  *   Increments str & tok->char_offset.
253  *   For convenience of existing conditionals, returns the old value of c (0 on eof)
254  *   Implicit inputs:  c var
255  */
256 #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
257 
258 /* End optimization macro defs */
259 
json_tokener_parse_ex(struct json_tokener * tok,const char * str,int len)260 struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
261 {
262 	struct json_object *obj = NULL;
263 	char c = '\1';
264 	unsigned int nBytes = 0;
265 	unsigned int *nBytesp = &nBytes;
266 
267 #ifdef HAVE_USELOCALE
268 	locale_t oldlocale = uselocale(NULL);
269 	locale_t newloc;
270 #elif defined(HAVE_SETLOCALE)
271 	char *oldlocale = NULL;
272 #endif
273 
274 	tok->char_offset = 0;
275 	tok->err = json_tokener_success;
276 
277 	/* this interface is presently not 64-bit clean due to the int len argument
278 	 * and the internal printbuf interface that takes 32-bit int len arguments
279 	 * so the function limits the maximum string size to INT32_MAX (2GB).
280 	 * If the function is called with len == -1 then strlen is called to check
281 	 * the string length is less than INT32_MAX (2GB)
282 	 */
283 	if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
284 	{
285 		tok->err = json_tokener_error_size;
286 		return NULL;
287 	}
288 
289 #ifdef HAVE_USELOCALE
290 	{
291 		locale_t duploc = duplocale(oldlocale);
292 		newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
293 		if (newloc == NULL)
294 		{
295 			freelocale(duploc);
296 			return NULL;
297 		}
298 		uselocale(newloc);
299 	}
300 #elif defined(HAVE_SETLOCALE)
301 	{
302 		char *tmplocale;
303 		tmplocale = setlocale(LC_NUMERIC, NULL);
304 		if (tmplocale)
305 			oldlocale = strdup(tmplocale);
306 		setlocale(LC_NUMERIC, "C");
307 	}
308 #endif
309 
310 	while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
311 	{
312 
313 	redo_char:
314 		switch (state)
315 		{
316 
317 		case json_tokener_state_eatws:
318 			/* Advance until we change state */
319 			while (isspace((unsigned char)c))
320 			{
321 				if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
322 					goto out;
323 			}
324 			if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
325 			{
326 				printbuf_reset(tok->pb);
327 				printbuf_memappend_fast(tok->pb, &c, 1);
328 				state = json_tokener_state_comment_start;
329 			}
330 			else
331 			{
332 				state = saved_state;
333 				goto redo_char;
334 			}
335 			break;
336 
337 		case json_tokener_state_start:
338 			switch (c)
339 			{
340 			case '{':
341 				state = json_tokener_state_eatws;
342 				saved_state = json_tokener_state_object_field_start;
343 				current = json_object_new_object();
344 				if (current == NULL)
345 					goto out;
346 				break;
347 			case '[':
348 				state = json_tokener_state_eatws;
349 				saved_state = json_tokener_state_array;
350 				current = json_object_new_array();
351 				if (current == NULL)
352 					goto out;
353 				break;
354 			case 'I':
355 			case 'i':
356 				state = json_tokener_state_inf;
357 				printbuf_reset(tok->pb);
358 				tok->st_pos = 0;
359 				goto redo_char;
360 			case 'N':
361 			case 'n':
362 				state = json_tokener_state_null; // or NaN
363 				printbuf_reset(tok->pb);
364 				tok->st_pos = 0;
365 				goto redo_char;
366 			case '\'':
367 				if (tok->flags & JSON_TOKENER_STRICT)
368 				{
369 					/* in STRICT mode only double-quote are allowed */
370 					tok->err = json_tokener_error_parse_unexpected;
371 					goto out;
372 				}
373 				/* FALLTHRU */
374 			case '"':
375 				state = json_tokener_state_string;
376 				printbuf_reset(tok->pb);
377 				tok->quote_char = c;
378 				break;
379 			case 'T':
380 			case 't':
381 			case 'F':
382 			case 'f':
383 				state = json_tokener_state_boolean;
384 				printbuf_reset(tok->pb);
385 				tok->st_pos = 0;
386 				goto redo_char;
387 			case '0':
388 			case '1':
389 			case '2':
390 			case '3':
391 			case '4':
392 			case '5':
393 			case '6':
394 			case '7':
395 			case '8':
396 			case '9':
397 			case '-':
398 				state = json_tokener_state_number;
399 				printbuf_reset(tok->pb);
400 				tok->is_double = 0;
401 				goto redo_char;
402 			default: tok->err = json_tokener_error_parse_unexpected; goto out;
403 			}
404 			break;
405 
406 		case json_tokener_state_finish:
407 			if (tok->depth == 0)
408 				goto out;
409 			obj = json_object_get(current);
410 			json_tokener_reset_level(tok, tok->depth);
411 			tok->depth--;
412 			goto redo_char;
413 
414 		case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
415 		{
416 			/* If we were guaranteed to have len set, then we could (usually) handle
417 			 * the entire "Infinity" check in a single strncmp (strncasecmp), but
418 			 * since len might be -1 (i.e. "read until \0"), we need to check it
419 			 * a character at a time.
420 			 * Trying to handle it both ways would make this code considerably more
421 			 * complicated with likely little performance benefit.
422 			 */
423 			int is_negative = 0;
424 			const char *_json_inf_str = json_inf_str;
425 			if (!(tok->flags & JSON_TOKENER_STRICT))
426 				_json_inf_str = json_inf_str_lower;
427 
428 			/* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
429 			while (tok->st_pos < (int)json_inf_str_len)
430 			{
431 				char inf_char = *str;
432 				if (!(tok->flags & JSON_TOKENER_STRICT))
433 					inf_char = tolower((unsigned char)*str);
434 				if (inf_char != _json_inf_str[tok->st_pos])
435 				{
436 					tok->err = json_tokener_error_parse_unexpected;
437 					goto out;
438 				}
439 				tok->st_pos++;
440 				(void)ADVANCE_CHAR(str, tok);
441 				if (!PEEK_CHAR(c, tok))
442 				{
443 					/* out of input chars, for now at least */
444 					goto out;
445 				}
446 			}
447 			/* We checked the full length of "Infinity", so create the object.
448 			 * When handling -Infinity, the number parsing code will have dropped
449 			 * the "-" into tok->pb for us, so check it now.
450 			 */
451 			if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
452 			{
453 				is_negative = 1;
454 			}
455 			current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
456 			if (current == NULL)
457 				goto out;
458 			saved_state = json_tokener_state_finish;
459 			state = json_tokener_state_eatws;
460 			goto redo_char;
461 		}
462 		break;
463 		case json_tokener_state_null: /* aka starts with 'n' */
464 		{
465 			int size;
466 			int size_nan;
467 			printbuf_memappend_fast(tok->pb, &c, 1);
468 			size = json_min(tok->st_pos + 1, json_null_str_len);
469 			size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
470 			if ((!(tok->flags & JSON_TOKENER_STRICT) &&
471 			     strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
472 			    (strncmp(json_null_str, tok->pb->buf, size) == 0))
473 			{
474 				if (tok->st_pos == json_null_str_len)
475 				{
476 					current = NULL;
477 					saved_state = json_tokener_state_finish;
478 					state = json_tokener_state_eatws;
479 					goto redo_char;
480 				}
481 			}
482 			else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
483 			          strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
484 			         (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
485 			{
486 				if (tok->st_pos == json_nan_str_len)
487 				{
488 					current = json_object_new_double(NAN);
489 					if (current == NULL)
490 						goto out;
491 					saved_state = json_tokener_state_finish;
492 					state = json_tokener_state_eatws;
493 					goto redo_char;
494 				}
495 			}
496 			else
497 			{
498 				tok->err = json_tokener_error_parse_null;
499 				goto out;
500 			}
501 			tok->st_pos++;
502 		}
503 		break;
504 
505 		case json_tokener_state_comment_start:
506 			if (c == '*')
507 			{
508 				state = json_tokener_state_comment;
509 			}
510 			else if (c == '/')
511 			{
512 				state = json_tokener_state_comment_eol;
513 			}
514 			else
515 			{
516 				tok->err = json_tokener_error_parse_comment;
517 				goto out;
518 			}
519 			printbuf_memappend_fast(tok->pb, &c, 1);
520 			break;
521 
522 		case json_tokener_state_comment:
523 		{
524 			/* Advance until we change state */
525 			const char *case_start = str;
526 			while (c != '*')
527 			{
528 				if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
529 				{
530 					printbuf_memappend_fast(tok->pb, case_start,
531 					                        str - case_start);
532 					goto out;
533 				}
534 			}
535 			printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);
536 			state = json_tokener_state_comment_end;
537 		}
538 		break;
539 
540 		case json_tokener_state_comment_eol:
541 		{
542 			/* Advance until we change state */
543 			const char *case_start = str;
544 			while (c != '\n')
545 			{
546 				if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
547 				{
548 					printbuf_memappend_fast(tok->pb, case_start,
549 					                        str - case_start);
550 					goto out;
551 				}
552 			}
553 			printbuf_memappend_fast(tok->pb, case_start, str - case_start);
554 			MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
555 			state = json_tokener_state_eatws;
556 		}
557 		break;
558 
559 		case json_tokener_state_comment_end:
560 			printbuf_memappend_fast(tok->pb, &c, 1);
561 			if (c == '/')
562 			{
563 				MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
564 				state = json_tokener_state_eatws;
565 			}
566 			else
567 			{
568 				state = json_tokener_state_comment;
569 			}
570 			break;
571 
572 		case json_tokener_state_string:
573 		{
574 			/* Advance until we change state */
575 			const char *case_start = str;
576 			while (1)
577 			{
578 				if (c == tok->quote_char)
579 				{
580 					printbuf_memappend_fast(tok->pb, case_start,
581 					                        str - case_start);
582 					current =
583 					    json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
584 					if (current == NULL)
585 						goto out;
586 					saved_state = json_tokener_state_finish;
587 					state = json_tokener_state_eatws;
588 					break;
589 				}
590 				else if (c == '\\')
591 				{
592 					printbuf_memappend_fast(tok->pb, case_start,
593 					                        str - case_start);
594 					saved_state = json_tokener_state_string;
595 					state = json_tokener_state_string_escape;
596 					break;
597 				}
598 				if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
599 				{
600 					printbuf_memappend_fast(tok->pb, case_start,
601 					                        str - case_start);
602 					goto out;
603 				}
604 			}
605 		}
606 		break;
607 
608 		case json_tokener_state_string_escape:
609 			switch (c)
610 			{
611 			case '"':
612 			case '\\':
613 			case '/':
614 				printbuf_memappend_fast(tok->pb, &c, 1);
615 				state = saved_state;
616 				break;
617 			case 'b':
618 			case 'n':
619 			case 'r':
620 			case 't':
621 			case 'f':
622 				if (c == 'b')
623 					printbuf_memappend_fast(tok->pb, "\b", 1);
624 				else if (c == 'n')
625 					printbuf_memappend_fast(tok->pb, "\n", 1);
626 				else if (c == 'r')
627 					printbuf_memappend_fast(tok->pb, "\r", 1);
628 				else if (c == 't')
629 					printbuf_memappend_fast(tok->pb, "\t", 1);
630 				else if (c == 'f')
631 					printbuf_memappend_fast(tok->pb, "\f", 1);
632 				state = saved_state;
633 				break;
634 			case 'u':
635 				tok->ucs_char = 0;
636 				tok->st_pos = 0;
637 				state = json_tokener_state_escape_unicode;
638 				break;
639 			default: tok->err = json_tokener_error_parse_string; goto out;
640 			}
641 			break;
642 
643 			// ===================================================
644 
645 		case json_tokener_state_escape_unicode:
646 		{
647 			/* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
648 			while (1)
649 			{
650 				if (!c || !strchr(json_hex_chars, c))
651 				{
652 					tok->err = json_tokener_error_parse_string;
653 					goto out;
654 				}
655 				tok->ucs_char |=
656 				    ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
657 				tok->st_pos++;
658 				if (tok->st_pos >= 4)
659 					break;
660 
661 				(void)ADVANCE_CHAR(str, tok);
662 				if (!PEEK_CHAR(c, tok))
663 				{
664 					/*
665 					 * We're out of characters in the current call to
666 					 * json_tokener_parse(), but a subsequent call might
667 					 * provide us with more, so leave our current state
668 					 * as-is (including tok->high_surrogate) and return.
669 					 */
670 					goto out;
671 				}
672 			}
673 			tok->st_pos = 0;
674 
675 			/* Now, we have a full \uNNNN sequence in tok->ucs_char */
676 
677 			/* If the *previous* sequence was a high surrogate ... */
678 			if (tok->high_surrogate)
679 			{
680 				if (IS_LOW_SURROGATE(tok->ucs_char))
681 				{
682 					/* Recalculate the ucs_char, then fall thru to process normally */
683 					tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
684 					                                      tok->ucs_char);
685 				}
686 				else
687 				{
688 					/* High surrogate was not followed by a low surrogate
689 					 * Replace the high and process the rest normally
690 					 */
691 					printbuf_memappend_fast(tok->pb,
692 					                        (char *)utf8_replacement_char, 3);
693 				}
694 				tok->high_surrogate = 0;
695 			}
696 
697 			if (tok->ucs_char < 0x80)
698 			{
699 				unsigned char unescaped_utf[1];
700 				unescaped_utf[0] = tok->ucs_char;
701 				printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);
702 			}
703 			else if (tok->ucs_char < 0x800)
704 			{
705 				unsigned char unescaped_utf[2];
706 				unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
707 				unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
708 				printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);
709 			}
710 			else if (IS_HIGH_SURROGATE(tok->ucs_char))
711 			{
712 				/*
713 				 * The next two characters should be \u, HOWEVER,
714 				 * we can't simply peek ahead here, because the
715 				 * characters we need might not be passed to us
716 				 * until a subsequent call to json_tokener_parse.
717 				 * Instead, transition throug a couple of states.
718 				 * (now):
719 				 *   _escape_unicode => _unicode_need_escape
720 				 * (see a '\\' char):
721 				 *   _unicode_need_escape => _unicode_need_u
722 				 * (see a 'u' char):
723 				 *   _unicode_need_u => _escape_unicode
724 				 *      ...and we'll end up back around here.
725 				 */
726 				tok->high_surrogate = tok->ucs_char;
727 				tok->ucs_char = 0;
728 				state = json_tokener_state_escape_unicode_need_escape;
729 				break;
730 			}
731 			else if (IS_LOW_SURROGATE(tok->ucs_char))
732 			{
733 				/* Got a low surrogate not preceded by a high */
734 				printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
735 			}
736 			else if (tok->ucs_char < 0x10000)
737 			{
738 				unsigned char unescaped_utf[3];
739 				unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
740 				unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
741 				unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
742 				printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);
743 			}
744 			else if (tok->ucs_char < 0x110000)
745 			{
746 				unsigned char unescaped_utf[4];
747 				unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
748 				unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
749 				unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
750 				unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
751 				printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);
752 			}
753 			else
754 			{
755 				/* Don't know what we got--insert the replacement char */
756 				printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
757 			}
758 			state = saved_state; // i.e. _state_string or _state_object_field
759 		}
760 		break;
761 
762 		case json_tokener_state_escape_unicode_need_escape:
763 			// We get here after processing a high_surrogate
764 			// require a '\\' char
765 			if (!c || c != '\\')
766 			{
767 				/* Got a high surrogate without another sequence following
768 				 * it.  Put a replacement char in for the high surrogate
769 				 * and pop back up to _state_string or _state_object_field.
770 				 */
771 				printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
772 				tok->high_surrogate = 0;
773 				tok->ucs_char = 0;
774 				tok->st_pos = 0;
775 				state = saved_state;
776 				goto redo_char;
777 			}
778 			state = json_tokener_state_escape_unicode_need_u;
779 			break;
780 
781 		case json_tokener_state_escape_unicode_need_u:
782 			/* We already had a \ char, check that it's \u */
783 			if (!c || c != 'u')
784 			{
785 				/* Got a high surrogate with some non-unicode escape
786 				 * sequence following it.
787 				 * Put a replacement char in for the high surrogate
788 				 * and handle the escape sequence normally.
789 				 */
790 				printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
791 				tok->high_surrogate = 0;
792 				tok->ucs_char = 0;
793 				tok->st_pos = 0;
794 				state = json_tokener_state_string_escape;
795 				goto redo_char;
796 			}
797 			state = json_tokener_state_escape_unicode;
798 			break;
799 
800 			// ===================================================
801 
802 		case json_tokener_state_boolean:
803 		{
804 			int size1, size2;
805 			printbuf_memappend_fast(tok->pb, &c, 1);
806 			size1 = json_min(tok->st_pos + 1, json_true_str_len);
807 			size2 = json_min(tok->st_pos + 1, json_false_str_len);
808 			if ((!(tok->flags & JSON_TOKENER_STRICT) &&
809 			     strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
810 			    (strncmp(json_true_str, tok->pb->buf, size1) == 0))
811 			{
812 				if (tok->st_pos == json_true_str_len)
813 				{
814 					current = json_object_new_boolean(1);
815 					if (current == NULL)
816 						goto out;
817 					saved_state = json_tokener_state_finish;
818 					state = json_tokener_state_eatws;
819 					goto redo_char;
820 				}
821 			}
822 			else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
823 			          strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
824 			         (strncmp(json_false_str, tok->pb->buf, size2) == 0))
825 			{
826 				if (tok->st_pos == json_false_str_len)
827 				{
828 					current = json_object_new_boolean(0);
829 					if (current == NULL)
830 						goto out;
831 					saved_state = json_tokener_state_finish;
832 					state = json_tokener_state_eatws;
833 					goto redo_char;
834 				}
835 			}
836 			else
837 			{
838 				tok->err = json_tokener_error_parse_boolean;
839 				goto out;
840 			}
841 			tok->st_pos++;
842 		}
843 		break;
844 
845 		case json_tokener_state_number:
846 		{
847 			/* Advance until we change state */
848 			const char *case_start = str;
849 			int case_len = 0;
850 			int is_exponent = 0;
851 			int neg_sign_ok = 1;
852 			int pos_sign_ok = 0;
853 			if (printbuf_length(tok->pb) > 0)
854 			{
855 				/* We don't save all state from the previous incremental parse
856 				   so we need to re-generate it based on the saved string so far.
857 				 */
858 				char *e_loc = strchr(tok->pb->buf, 'e');
859 				if (!e_loc)
860 					e_loc = strchr(tok->pb->buf, 'E');
861 				if (e_loc)
862 				{
863 					char *last_saved_char =
864 					    &tok->pb->buf[printbuf_length(tok->pb) - 1];
865 					is_exponent = 1;
866 					pos_sign_ok = neg_sign_ok = 1;
867 					/* If the "e" isn't at the end, we can't start with a '-' */
868 					if (e_loc != last_saved_char)
869 					{
870 						neg_sign_ok = 0;
871 						pos_sign_ok = 0;
872 					}
873 					// else leave it set to 1, i.e. start of the new input
874 				}
875 			}
876 
877 			while (c && ((c >= '0' && c <= '9') ||
878 			             (!is_exponent && (c == 'e' || c == 'E')) ||
879 			             (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
880 			             (!tok->is_double && c == '.')))
881 			{
882 				pos_sign_ok = neg_sign_ok = 0;
883 				++case_len;
884 
885 				/* non-digit characters checks */
886 				/* note: since the main loop condition to get here was
887 				 * an input starting with 0-9 or '-', we are
888 				 * protected from input starting with '.' or
889 				 * e/E.
890 				 */
891 				switch (c)
892 				{
893 				case '.':
894 					tok->is_double = 1;
895 					pos_sign_ok = 1;
896 					neg_sign_ok = 1;
897 					break;
898 				case 'e': /* FALLTHRU */
899 				case 'E':
900 					is_exponent = 1;
901 					tok->is_double = 1;
902 					/* the exponent part can begin with a negative sign */
903 					pos_sign_ok = neg_sign_ok = 1;
904 					break;
905 				default: break;
906 				}
907 
908 				if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
909 				{
910 					printbuf_memappend_fast(tok->pb, case_start, case_len);
911 					goto out;
912 				}
913 			}
914 			/*
915 				Now we know c isn't a valid number char, but check whether
916 				it might have been intended to be, and return a potentially
917 				more understandable error right away.
918 				However, if we're at the top-level, use the number as-is
919 			    because c can be part of a new object to parse on the
920 				next call to json_tokener_parse().
921 			 */
922 			if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
923 			    c != 'I' && c != 'i' && !isspace((unsigned char)c))
924 			{
925 				tok->err = json_tokener_error_parse_number;
926 				goto out;
927 			}
928 			if (case_len > 0)
929 				printbuf_memappend_fast(tok->pb, case_start, case_len);
930 
931 			// Check for -Infinity
932 			if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
933 			{
934 				state = json_tokener_state_inf;
935 				tok->st_pos = 0;
936 				goto redo_char;
937 			}
938 			if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
939 			{
940 				/* Trim some chars off the end, to allow things
941 				   like "123e+" to parse ok. */
942 				while (printbuf_length(tok->pb) > 1)
943 				{
944 					char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
945 					if (last_char != 'e' && last_char != 'E' &&
946 					    last_char != '-' && last_char != '+')
947 					{
948 						break;
949 					}
950 					tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
951 					printbuf_length(tok->pb)--;
952 				}
953 			}
954 		}
955 			{
956 				int64_t num64;
957 				uint64_t numuint64;
958 				double numd;
959 				if (!tok->is_double && tok->pb->buf[0] == '-' &&
960 				    json_parse_int64(tok->pb->buf, &num64) == 0)
961 				{
962 					current = json_object_new_int64(num64);
963 					if (current == NULL)
964 						goto out;
965 				}
966 				else if (!tok->is_double && tok->pb->buf[0] != '-' &&
967 				         json_parse_uint64(tok->pb->buf, &numuint64) == 0)
968 				{
969 					if (numuint64 && tok->pb->buf[0] == '0' &&
970 					    (tok->flags & JSON_TOKENER_STRICT))
971 					{
972 						tok->err = json_tokener_error_parse_number;
973 						goto out;
974 					}
975 					if (numuint64 <= INT64_MAX)
976 					{
977 						num64 = (uint64_t)numuint64;
978 						current = json_object_new_int64(num64);
979 						if (current == NULL)
980 							goto out;
981 					}
982 					else
983 					{
984 						current = json_object_new_uint64(numuint64);
985 						if (current == NULL)
986 							goto out;
987 					}
988 				}
989 				else if (tok->is_double &&
990 				         json_tokener_parse_double(
991 				             tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
992 				{
993 					current = json_object_new_double_s(numd, tok->pb->buf);
994 					if (current == NULL)
995 						goto out;
996 				}
997 				else
998 				{
999 					tok->err = json_tokener_error_parse_number;
1000 					goto out;
1001 				}
1002 				saved_state = json_tokener_state_finish;
1003 				state = json_tokener_state_eatws;
1004 				goto redo_char;
1005 			}
1006 			break;
1007 
1008 		case json_tokener_state_array_after_sep:
1009 		case json_tokener_state_array:
1010 			if (c == ']')
1011 			{
1012 				// Minimize memory usage; assume parsed objs are unlikely to be changed
1013 				json_object_array_shrink(current, 0);
1014 
1015 				if (state == json_tokener_state_array_after_sep &&
1016 				    (tok->flags & JSON_TOKENER_STRICT))
1017 				{
1018 					tok->err = json_tokener_error_parse_unexpected;
1019 					goto out;
1020 				}
1021 				saved_state = json_tokener_state_finish;
1022 				state = json_tokener_state_eatws;
1023 			}
1024 			else
1025 			{
1026 				if (tok->depth >= tok->max_depth - 1)
1027 				{
1028 					tok->err = json_tokener_error_depth;
1029 					goto out;
1030 				}
1031 				state = json_tokener_state_array_add;
1032 				tok->depth++;
1033 				json_tokener_reset_level(tok, tok->depth);
1034 				goto redo_char;
1035 			}
1036 			break;
1037 
1038 		case json_tokener_state_array_add:
1039 			if (json_object_array_add(current, obj) != 0)
1040 				goto out;
1041 			saved_state = json_tokener_state_array_sep;
1042 			state = json_tokener_state_eatws;
1043 			goto redo_char;
1044 
1045 		case json_tokener_state_array_sep:
1046 			if (c == ']')
1047 			{
1048 				// Minimize memory usage; assume parsed objs are unlikely to be changed
1049 				json_object_array_shrink(current, 0);
1050 
1051 				saved_state = json_tokener_state_finish;
1052 				state = json_tokener_state_eatws;
1053 			}
1054 			else if (c == ',')
1055 			{
1056 				saved_state = json_tokener_state_array_after_sep;
1057 				state = json_tokener_state_eatws;
1058 			}
1059 			else
1060 			{
1061 				tok->err = json_tokener_error_parse_array;
1062 				goto out;
1063 			}
1064 			break;
1065 
1066 		case json_tokener_state_object_field_start:
1067 		case json_tokener_state_object_field_start_after_sep:
1068 			if (c == '}')
1069 			{
1070 				if (state == json_tokener_state_object_field_start_after_sep &&
1071 				    (tok->flags & JSON_TOKENER_STRICT))
1072 				{
1073 					tok->err = json_tokener_error_parse_unexpected;
1074 					goto out;
1075 				}
1076 				saved_state = json_tokener_state_finish;
1077 				state = json_tokener_state_eatws;
1078 			}
1079 			else if (c == '"' || c == '\'')
1080 			{
1081 				tok->quote_char = c;
1082 				printbuf_reset(tok->pb);
1083 				state = json_tokener_state_object_field;
1084 			}
1085 			else
1086 			{
1087 				tok->err = json_tokener_error_parse_object_key_name;
1088 				goto out;
1089 			}
1090 			break;
1091 
1092 		case json_tokener_state_object_field:
1093 		{
1094 			/* Advance until we change state */
1095 			const char *case_start = str;
1096 			while (1)
1097 			{
1098 				if (c == tok->quote_char)
1099 				{
1100 					printbuf_memappend_fast(tok->pb, case_start,
1101 					                        str - case_start);
1102 					obj_field_name = strdup(tok->pb->buf);
1103 					saved_state = json_tokener_state_object_field_end;
1104 					state = json_tokener_state_eatws;
1105 					break;
1106 				}
1107 				else if (c == '\\')
1108 				{
1109 					printbuf_memappend_fast(tok->pb, case_start,
1110 					                        str - case_start);
1111 					saved_state = json_tokener_state_object_field;
1112 					state = json_tokener_state_string_escape;
1113 					break;
1114 				}
1115 				if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
1116 				{
1117 					printbuf_memappend_fast(tok->pb, case_start,
1118 					                        str - case_start);
1119 					goto out;
1120 				}
1121 			}
1122 		}
1123 		break;
1124 
1125 		case json_tokener_state_object_field_end:
1126 			if (c == ':')
1127 			{
1128 				saved_state = json_tokener_state_object_value;
1129 				state = json_tokener_state_eatws;
1130 			}
1131 			else
1132 			{
1133 				tok->err = json_tokener_error_parse_object_key_sep;
1134 				goto out;
1135 			}
1136 			break;
1137 
1138 		case json_tokener_state_object_value:
1139 			if (tok->depth >= tok->max_depth - 1)
1140 			{
1141 				tok->err = json_tokener_error_depth;
1142 				goto out;
1143 			}
1144 			state = json_tokener_state_object_value_add;
1145 			tok->depth++;
1146 			json_tokener_reset_level(tok, tok->depth);
1147 			goto redo_char;
1148 
1149 		case json_tokener_state_object_value_add:
1150 			json_object_object_add(current, obj_field_name, obj);
1151 			free(obj_field_name);
1152 			obj_field_name = NULL;
1153 			saved_state = json_tokener_state_object_sep;
1154 			state = json_tokener_state_eatws;
1155 			goto redo_char;
1156 
1157 		case json_tokener_state_object_sep:
1158 			/* { */
1159 			if (c == '}')
1160 			{
1161 				saved_state = json_tokener_state_finish;
1162 				state = json_tokener_state_eatws;
1163 			}
1164 			else if (c == ',')
1165 			{
1166 				saved_state = json_tokener_state_object_field_start_after_sep;
1167 				state = json_tokener_state_eatws;
1168 			}
1169 			else
1170 			{
1171 				tok->err = json_tokener_error_parse_object_value_sep;
1172 				goto out;
1173 			}
1174 			break;
1175 		}
1176 		(void)ADVANCE_CHAR(str, tok);
1177 		if (!c) // This is the char *before* advancing
1178 			break;
1179 	} /* while(PEEK_CHAR) */
1180 
1181 out:
1182 	if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
1183 	{
1184 		tok->err = json_tokener_error_parse_utf8_string;
1185 	}
1186 	if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
1187 	    (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
1188 	        JSON_TOKENER_STRICT)
1189 	{
1190 		/* unexpected char after JSON data */
1191 		tok->err = json_tokener_error_parse_unexpected;
1192 	}
1193 	if (!c)
1194 	{
1195 		/* We hit an eof char (0) */
1196 		if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
1197 			tok->err = json_tokener_error_parse_eof;
1198 	}
1199 
1200 #ifdef HAVE_USELOCALE
1201 	uselocale(oldlocale);
1202 	freelocale(newloc);
1203 #elif defined(HAVE_SETLOCALE)
1204 	setlocale(LC_NUMERIC, oldlocale);
1205 	free(oldlocale);
1206 #endif
1207 
1208 	if (tok->err == json_tokener_success)
1209 	{
1210 		json_object *ret = json_object_get(current);
1211 		int ii;
1212 
1213 		/* Partially reset, so we parse additional objects on subsequent calls. */
1214 		for (ii = tok->depth; ii >= 0; ii--)
1215 			json_tokener_reset_level(tok, ii);
1216 		return ret;
1217 	}
1218 
1219 	MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
1220 	         tok->char_offset);
1221 	return NULL;
1222 }
1223 
json_tokener_validate_utf8(const char c,unsigned int * nBytes)1224 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
1225 {
1226 	unsigned char chr = c;
1227 	if (*nBytes == 0)
1228 	{
1229 		if (chr >= 0x80)
1230 		{
1231 			if ((chr & 0xe0) == 0xc0)
1232 				*nBytes = 1;
1233 			else if ((chr & 0xf0) == 0xe0)
1234 				*nBytes = 2;
1235 			else if ((chr & 0xf8) == 0xf0)
1236 				*nBytes = 3;
1237 			else
1238 				return 0;
1239 		}
1240 	}
1241 	else
1242 	{
1243 		if ((chr & 0xC0) != 0x80)
1244 			return 0;
1245 		(*nBytes)--;
1246 	}
1247 	return 1;
1248 }
1249 
json_tokener_set_flags(struct json_tokener * tok,int flags)1250 void json_tokener_set_flags(struct json_tokener *tok, int flags)
1251 {
1252 	tok->flags = flags;
1253 }
1254 
json_tokener_get_parse_end(struct json_tokener * tok)1255 size_t json_tokener_get_parse_end(struct json_tokener *tok)
1256 {
1257 	assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */
1258 	return (size_t)tok->char_offset;
1259 }
1260 
json_tokener_parse_double(const char * buf,int len,double * retval)1261 static int json_tokener_parse_double(const char *buf, int len, double *retval)
1262 {
1263 	char *end;
1264 	*retval = strtod(buf, &end);
1265 	if (buf + len == end)
1266 		return 0; // It worked
1267 	return 1;
1268 }
1269