1 /*
2   Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3   All rights reserved.
4 
5   Permission is hereby granted, free of charge, to any person obtaining a copy
6   of this software and associated documentation files (the "Software"), to deal
7   in the Software without restriction, including without limitation the rights
8   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9   copies of the Software, and to permit persons to whom the Software is
10   furnished to do so, subject to the following conditions:
11 
12   The above copyright notice and this permission notice shall be included in
13   all copies or substantial portions of the Software.
14 
15   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21   THE SOFTWARE.
22 */
23 
24 #include "json.h"
25 
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #define out_of_memory() do {                    \
33 		fprintf(stderr, "Out of memory.\n");    \
34 		exit(EXIT_FAILURE);                     \
35 	} while (0)
36 
37 /* Sadly, strdup is not portable. */
json_strdup(const char * str)38 static char *json_strdup(const char *str)
39 {
40 	char *ret = (char*) malloc(strlen(str) + 1);
41 	if (ret == NULL)
42 		out_of_memory();
43 	strcpy(ret, str);
44 	return ret;
45 }
46 
47 /* String buffer */
48 
49 typedef struct
50 {
51 	char *cur;
52 	char *end;
53 	char *start;
54 } SB;
55 
sb_init(SB * sb)56 static void sb_init(SB *sb)
57 {
58 	sb->start = (char*) malloc(17);
59 	if (sb->start == NULL)
60 		out_of_memory();
61 	sb->cur = sb->start;
62 	sb->end = sb->start + 16;
63 }
64 
65 /* sb and need may be evaluated multiple times. */
66 #define sb_need(sb, need) do {                  \
67 		if ((sb)->end - (sb)->cur < (need))     \
68 			sb_grow(sb, need);                  \
69 	} while (0)
70 
sb_grow(SB * sb,int need)71 static void sb_grow(SB *sb, int need)
72 {
73 	size_t length = sb->cur - sb->start;
74 	size_t alloc = sb->end - sb->start;
75 
76 	do {
77 		alloc *= 2;
78 	} while (alloc < length + need);
79 
80 	sb->start = (char*) realloc(sb->start, alloc + 1);
81 	if (sb->start == NULL)
82 		out_of_memory();
83 	sb->cur = sb->start + length;
84 	sb->end = sb->start + alloc;
85 }
86 
sb_put(SB * sb,const char * bytes,int count)87 static void sb_put(SB *sb, const char *bytes, int count)
88 {
89 	sb_need(sb, count);
90 	memcpy(sb->cur, bytes, count);
91 	sb->cur += count;
92 }
93 
94 #define sb_putc(sb, c) do {         \
95 		if ((sb)->cur >= (sb)->end) \
96 			sb_grow(sb, 1);         \
97 		*(sb)->cur++ = (c);         \
98 	} while (0)
99 
sb_puts(SB * sb,const char * str)100 static void sb_puts(SB *sb, const char *str)
101 {
102 	sb_put(sb, str, strlen(str));
103 }
104 
sb_finish(SB * sb)105 static char *sb_finish(SB *sb)
106 {
107 	*sb->cur = 0;
108 	assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
109 	return sb->start;
110 }
111 
sb_free(SB * sb)112 static void sb_free(SB *sb)
113 {
114 	free(sb->start);
115 }
116 
117 /*
118  * Unicode helper functions
119  *
120  * These are taken from the ccan/charset module and customized a bit.
121  * Putting them here means the compiler can (choose to) inline them,
122  * and it keeps ccan/json from having a dependency.
123  */
124 
125 /*
126  * Type for Unicode codepoints.
127  * We need our own because wchar_t might be 16 bits.
128  */
129 typedef uint32_t uchar_t;
130 
131 /*
132  * Validate a single UTF-8 character starting at @s.
133  * The string must be null-terminated.
134  *
135  * If it's valid, return its length (1 thru 4).
136  * If it's invalid or clipped, return 0.
137  *
138  * This function implements the syntax given in RFC3629, which is
139  * the same as that given in The Unicode Standard, Version 6.0.
140  *
141  * It has the following properties:
142  *
143  *  * All codepoints U+0000..U+10FFFF may be encoded,
144  *    except for U+D800..U+DFFF, which are reserved
145  *    for UTF-16 surrogate pair encoding.
146  *  * UTF-8 byte sequences longer than 4 bytes are not permitted,
147  *    as they exceed the range of Unicode.
148  *  * The sixty-six Unicode "non-characters" are permitted
149  *    (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
150  */
utf8_validate_cz(const char * s)151 static int utf8_validate_cz(const char *s)
152 {
153 	unsigned char c = *s++;
154 
155 	if (c <= 0x7F) {        /* 00..7F */
156 		return 1;
157 	} else if (c <= 0xC1) { /* 80..C1 */
158 		/* Disallow overlong 2-byte sequence. */
159 		return 0;
160 	} else if (c <= 0xDF) { /* C2..DF */
161 		/* Make sure subsequent byte is in the range 0x80..0xBF. */
162 		if (((unsigned char)*s++ & 0xC0) != 0x80)
163 			return 0;
164 
165 		return 2;
166 	} else if (c <= 0xEF) { /* E0..EF */
167 		/* Disallow overlong 3-byte sequence. */
168 		if (c == 0xE0 && (unsigned char)*s < 0xA0)
169 			return 0;
170 
171 		/* Disallow U+D800..U+DFFF. */
172 		if (c == 0xED && (unsigned char)*s > 0x9F)
173 			return 0;
174 
175 		/* Make sure subsequent bytes are in the range 0x80..0xBF. */
176 		if (((unsigned char)*s++ & 0xC0) != 0x80)
177 			return 0;
178 		if (((unsigned char)*s++ & 0xC0) != 0x80)
179 			return 0;
180 
181 		return 3;
182 	} else if (c <= 0xF4) { /* F0..F4 */
183 		/* Disallow overlong 4-byte sequence. */
184 		if (c == 0xF0 && (unsigned char)*s < 0x90)
185 			return 0;
186 
187 		/* Disallow codepoints beyond U+10FFFF. */
188 		if (c == 0xF4 && (unsigned char)*s > 0x8F)
189 			return 0;
190 
191 		/* Make sure subsequent bytes are in the range 0x80..0xBF. */
192 		if (((unsigned char)*s++ & 0xC0) != 0x80)
193 			return 0;
194 		if (((unsigned char)*s++ & 0xC0) != 0x80)
195 			return 0;
196 		if (((unsigned char)*s++ & 0xC0) != 0x80)
197 			return 0;
198 
199 		return 4;
200 	} else {                /* F5..FF */
201 		return 0;
202 	}
203 }
204 
205 /* Validate a null-terminated UTF-8 string. */
utf8_validate(const char * s)206 static bool utf8_validate(const char *s)
207 {
208 	int len;
209 
210 	for (; *s != 0; s += len) {
211 		len = utf8_validate_cz(s);
212 		if (len == 0)
213 			return false;
214 	}
215 
216 	return true;
217 }
218 
219 /*
220  * Read a single UTF-8 character starting at @s,
221  * returning the length, in bytes, of the character read.
222  *
223  * This function assumes input is valid UTF-8,
224  * and that there are enough characters in front of @s.
225  */
utf8_read_char(const char * s,uchar_t * out)226 static int utf8_read_char(const char *s, uchar_t *out)
227 {
228 	const unsigned char *c = (const unsigned char*) s;
229 
230 	assert(utf8_validate_cz(s));
231 
232 	if (c[0] <= 0x7F) {
233 		/* 00..7F */
234 		*out = c[0];
235 		return 1;
236 	} else if (c[0] <= 0xDF) {
237 		/* C2..DF (unless input is invalid) */
238 		*out = ((uchar_t)c[0] & 0x1F) << 6 |
239 		       ((uchar_t)c[1] & 0x3F);
240 		return 2;
241 	} else if (c[0] <= 0xEF) {
242 		/* E0..EF */
243 		*out = ((uchar_t)c[0] &  0xF) << 12 |
244 		       ((uchar_t)c[1] & 0x3F) << 6  |
245 		       ((uchar_t)c[2] & 0x3F);
246 		return 3;
247 	} else {
248 		/* F0..F4 (unless input is invalid) */
249 		*out = ((uchar_t)c[0] &  0x7) << 18 |
250 		       ((uchar_t)c[1] & 0x3F) << 12 |
251 		       ((uchar_t)c[2] & 0x3F) << 6  |
252 		       ((uchar_t)c[3] & 0x3F);
253 		return 4;
254 	}
255 }
256 
257 /*
258  * Write a single UTF-8 character to @s,
259  * returning the length, in bytes, of the character written.
260  *
261  * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
262  *
263  * This function will write up to 4 bytes to @out.
264  */
utf8_write_char(uchar_t unicode,char * out)265 static int utf8_write_char(uchar_t unicode, char *out)
266 {
267 	unsigned char *o = (unsigned char*) out;
268 
269 	assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
270 
271 	if (unicode <= 0x7F) {
272 		/* U+0000..U+007F */
273 		*o++ = unicode;
274 		return 1;
275 	} else if (unicode <= 0x7FF) {
276 		/* U+0080..U+07FF */
277 		*o++ = 0xC0 | unicode >> 6;
278 		*o++ = 0x80 | (unicode & 0x3F);
279 		return 2;
280 	} else if (unicode <= 0xFFFF) {
281 		/* U+0800..U+FFFF */
282 		*o++ = 0xE0 | unicode >> 12;
283 		*o++ = 0x80 | (unicode >> 6 & 0x3F);
284 		*o++ = 0x80 | (unicode & 0x3F);
285 		return 3;
286 	} else {
287 		/* U+10000..U+10FFFF */
288 		*o++ = 0xF0 | unicode >> 18;
289 		*o++ = 0x80 | (unicode >> 12 & 0x3F);
290 		*o++ = 0x80 | (unicode >> 6 & 0x3F);
291 		*o++ = 0x80 | (unicode & 0x3F);
292 		return 4;
293 	}
294 }
295 
296 /*
297  * Compute the Unicode codepoint of a UTF-16 surrogate pair.
298  *
299  * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
300  * If they aren't, this function returns false.
301  */
from_surrogate_pair(uint16_t uc,uint16_t lc,uchar_t * unicode)302 static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode)
303 {
304 	if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
305 		*unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
306 		return true;
307 	} else {
308 		return false;
309 	}
310 }
311 
312 /*
313  * Construct a UTF-16 surrogate pair given a Unicode codepoint.
314  *
315  * @unicode must be U+10000..U+10FFFF.
316  */
to_surrogate_pair(uchar_t unicode,uint16_t * uc,uint16_t * lc)317 static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc)
318 {
319 	uchar_t n;
320 
321 	assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
322 
323 	n = unicode - 0x10000;
324 	*uc = ((n >> 10) & 0x3FF) | 0xD800;
325 	*lc = (n & 0x3FF) | 0xDC00;
326 }
327 
328 #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
329 #define is_digit(c) ((c) >= '0' && (c) <= '9')
330 
331 static bool parse_value     (const char **sp, JsonNode        **out);
332 static bool parse_string    (const char **sp, char            **out);
333 static bool parse_number    (const char **sp, double           *out);
334 static bool parse_array     (const char **sp, JsonNode        **out);
335 static bool parse_object    (const char **sp, JsonNode        **out);
336 static bool parse_hex16     (const char **sp, uint16_t         *out);
337 
338 static bool expect_literal  (const char **sp, const char *str);
339 static void skip_space      (const char **sp);
340 
341 static void emit_value              (SB *out, const JsonNode *node);
342 static void emit_value_indented     (SB *out, const JsonNode *node, const char *space, int indent_level);
343 static void emit_string             (SB *out, const char *str);
344 static void emit_number             (SB *out, double num);
345 static void emit_array              (SB *out, const JsonNode *array);
346 static void emit_array_indented     (SB *out, const JsonNode *array, const char *space, int indent_level);
347 static void emit_object             (SB *out, const JsonNode *object);
348 static void emit_object_indented    (SB *out, const JsonNode *object, const char *space, int indent_level);
349 
350 static int write_hex16(char *out, uint16_t val);
351 
352 static JsonNode *mknode(JsonTag tag);
353 static void append_node(JsonNode *parent, JsonNode *child);
354 static void prepend_node(JsonNode *parent, JsonNode *child);
355 static void append_member(JsonNode *object, char *key, JsonNode *value);
356 
357 /* Assertion-friendly validity checks */
358 static bool tag_is_valid(unsigned int tag);
359 static bool number_is_valid(const char *num);
360 
json_decode(const char * json)361 JsonNode *json_decode(const char *json)
362 {
363 	const char *s = json;
364 	JsonNode *ret;
365 
366 	skip_space(&s);
367 	if (!parse_value(&s, &ret))
368 		return NULL;
369 
370 	skip_space(&s);
371 	if (*s != 0) {
372 		json_delete(ret);
373 		return NULL;
374 	}
375 
376 	return ret;
377 }
378 
json_encode(const JsonNode * node)379 char *json_encode(const JsonNode *node)
380 {
381 	return json_stringify(node, NULL);
382 }
383 
json_encode_string(const char * str)384 char *json_encode_string(const char *str)
385 {
386 	SB sb;
387 	sb_init(&sb);
388 
389 	emit_string(&sb, str);
390 
391 	return sb_finish(&sb);
392 }
393 
json_stringify(const JsonNode * node,const char * space)394 char *json_stringify(const JsonNode *node, const char *space)
395 {
396 	SB sb;
397 	sb_init(&sb);
398 
399 	if (space != NULL)
400 		emit_value_indented(&sb, node, space, 0);
401 	else
402 		emit_value(&sb, node);
403 
404 	return sb_finish(&sb);
405 }
406 
json_delete(JsonNode * node)407 void json_delete(JsonNode *node)
408 {
409 	if (node != NULL) {
410 		json_remove_from_parent(node);
411 
412 		switch (node->tag) {
413 			case JSON_STRING:
414 				free(node->string_);
415 				break;
416 			case JSON_ARRAY:
417 			case JSON_OBJECT:
418 			{
419 				JsonNode *child, *next;
420 				for (child = node->children.head; child != NULL; child = next) {
421 					next = child->next;
422 					json_delete(child);
423 				}
424 				break;
425 			}
426 			default:;
427 		}
428 
429 		free(node);
430 	}
431 }
432 
json_validate(const char * json)433 bool json_validate(const char *json)
434 {
435 	const char *s = json;
436 
437 	skip_space(&s);
438 	if (!parse_value(&s, NULL))
439 		return false;
440 
441 	skip_space(&s);
442 	if (*s != 0)
443 		return false;
444 
445 	return true;
446 }
447 
json_find_element(JsonNode * array,int index)448 JsonNode *json_find_element(JsonNode *array, int index)
449 {
450 	JsonNode *element;
451 	int i = 0;
452 
453 	if (array == NULL || array->tag != JSON_ARRAY)
454 		return NULL;
455 
456 	json_foreach(element, array) {
457 		if (i == index)
458 			return element;
459 		i++;
460 	}
461 
462 	return NULL;
463 }
464 
json_find_member(JsonNode * object,const char * name)465 JsonNode *json_find_member(JsonNode *object, const char *name)
466 {
467 	JsonNode *member;
468 
469 	if (object == NULL || object->tag != JSON_OBJECT)
470 		return NULL;
471 
472 	json_foreach(member, object)
473 		if (strcmp(member->key, name) == 0)
474 			return member;
475 
476 	return NULL;
477 }
478 
json_first_child(const JsonNode * node)479 JsonNode *json_first_child(const JsonNode *node)
480 {
481 	if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
482 		return node->children.head;
483 	return NULL;
484 }
485 
mknode(JsonTag tag)486 static JsonNode *mknode(JsonTag tag)
487 {
488 	JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
489 	if (ret == NULL)
490 		out_of_memory();
491 	ret->tag = tag;
492 	return ret;
493 }
494 
json_mknull(void)495 JsonNode *json_mknull(void)
496 {
497 	return mknode(JSON_NULL);
498 }
499 
json_mkbool(bool b)500 JsonNode *json_mkbool(bool b)
501 {
502 	JsonNode *ret = mknode(JSON_BOOL);
503 	ret->bool_ = b;
504 	return ret;
505 }
506 
mkstring(char * s)507 static JsonNode *mkstring(char *s)
508 {
509 	JsonNode *ret = mknode(JSON_STRING);
510 	ret->string_ = s;
511 	return ret;
512 }
513 
json_mkstring(const char * s)514 JsonNode *json_mkstring(const char *s)
515 {
516 	return mkstring(json_strdup(s));
517 }
518 
json_mknumber(double n)519 JsonNode *json_mknumber(double n)
520 {
521 	JsonNode *node = mknode(JSON_NUMBER);
522 	node->number_ = n;
523 	return node;
524 }
525 
json_mkarray(void)526 JsonNode *json_mkarray(void)
527 {
528 	return mknode(JSON_ARRAY);
529 }
530 
json_mkobject(void)531 JsonNode *json_mkobject(void)
532 {
533 	return mknode(JSON_OBJECT);
534 }
535 
append_node(JsonNode * parent,JsonNode * child)536 static void append_node(JsonNode *parent, JsonNode *child)
537 {
538 	child->parent = parent;
539 	child->prev = parent->children.tail;
540 	child->next = NULL;
541 
542 	if (parent->children.tail != NULL)
543 		parent->children.tail->next = child;
544 	else
545 		parent->children.head = child;
546 	parent->children.tail = child;
547 }
548 
prepend_node(JsonNode * parent,JsonNode * child)549 static void prepend_node(JsonNode *parent, JsonNode *child)
550 {
551 	child->parent = parent;
552 	child->prev = NULL;
553 	child->next = parent->children.head;
554 
555 	if (parent->children.head != NULL)
556 		parent->children.head->prev = child;
557 	else
558 		parent->children.tail = child;
559 	parent->children.head = child;
560 }
561 
append_member(JsonNode * object,char * key,JsonNode * value)562 static void append_member(JsonNode *object, char *key, JsonNode *value)
563 {
564 	value->key = key;
565 	append_node(object, value);
566 }
567 
json_append_element(JsonNode * array,JsonNode * element)568 void json_append_element(JsonNode *array, JsonNode *element)
569 {
570 	assert(array->tag == JSON_ARRAY);
571 	assert(element->parent == NULL);
572 
573 	append_node(array, element);
574 }
575 
json_prepend_element(JsonNode * array,JsonNode * element)576 void json_prepend_element(JsonNode *array, JsonNode *element)
577 {
578 	assert(array->tag == JSON_ARRAY);
579 	assert(element->parent == NULL);
580 
581 	prepend_node(array, element);
582 }
583 
json_append_member(JsonNode * object,const char * key,JsonNode * value)584 void json_append_member(JsonNode *object, const char *key, JsonNode *value)
585 {
586 	assert(object->tag == JSON_OBJECT);
587 	assert(value->parent == NULL);
588 
589 	append_member(object, json_strdup(key), value);
590 }
591 
json_prepend_member(JsonNode * object,const char * key,JsonNode * value)592 void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
593 {
594 	assert(object->tag == JSON_OBJECT);
595 	assert(value->parent == NULL);
596 
597 	value->key = json_strdup(key);
598 	prepend_node(object, value);
599 }
600 
json_remove_from_parent(JsonNode * node)601 void json_remove_from_parent(JsonNode *node)
602 {
603 	JsonNode *parent = node->parent;
604 
605 	if (parent != NULL) {
606 		if (node->prev != NULL)
607 			node->prev->next = node->next;
608 		else
609 			parent->children.head = node->next;
610 		if (node->next != NULL)
611 			node->next->prev = node->prev;
612 		else
613 			parent->children.tail = node->prev;
614 
615 		free(node->key);
616 
617 		node->parent = NULL;
618 		node->prev = node->next = NULL;
619 		node->key = NULL;
620 	}
621 }
622 
parse_value(const char ** sp,JsonNode ** out)623 static bool parse_value(const char **sp, JsonNode **out)
624 {
625 	const char *s = *sp;
626 
627 	switch (*s) {
628 		case 'n':
629 			if (expect_literal(&s, "null")) {
630 				if (out)
631 					*out = json_mknull();
632 				*sp = s;
633 				return true;
634 			}
635 			return false;
636 
637 		case 'f':
638 			if (expect_literal(&s, "false")) {
639 				if (out)
640 					*out = json_mkbool(false);
641 				*sp = s;
642 				return true;
643 			}
644 			return false;
645 
646 		case 't':
647 			if (expect_literal(&s, "true")) {
648 				if (out)
649 					*out = json_mkbool(true);
650 				*sp = s;
651 				return true;
652 			}
653 			return false;
654 
655 		case '"': {
656 			char *str;
657 			if (parse_string(&s, out ? &str : NULL)) {
658 				if (out)
659 					*out = mkstring(str);
660 				*sp = s;
661 				return true;
662 			}
663 			return false;
664 		}
665 
666 		case '[':
667 			if (parse_array(&s, out)) {
668 				*sp = s;
669 				return true;
670 			}
671 			return false;
672 
673 		case '{':
674 			if (parse_object(&s, out)) {
675 				*sp = s;
676 				return true;
677 			}
678 			return false;
679 
680 		default: {
681 			double num;
682 			if (parse_number(&s, out ? &num : NULL)) {
683 				if (out)
684 					*out = json_mknumber(num);
685 				*sp = s;
686 				return true;
687 			}
688 			return false;
689 		}
690 	}
691 }
692 
parse_array(const char ** sp,JsonNode ** out)693 static bool parse_array(const char **sp, JsonNode **out)
694 {
695 	const char *s = *sp;
696 	JsonNode *ret = out ? json_mkarray() : NULL;
697 	JsonNode *element;
698 
699 	if (*s++ != '[')
700 		goto failure;
701 	skip_space(&s);
702 
703 	if (*s == ']') {
704 		s++;
705 		goto success;
706 	}
707 
708 	for (;;) {
709 		if (!parse_value(&s, out ? &element : NULL))
710 			goto failure;
711 		skip_space(&s);
712 
713 		if (out)
714 			json_append_element(ret, element);
715 
716 		if (*s == ']') {
717 			s++;
718 			goto success;
719 		}
720 
721 		if (*s++ != ',')
722 			goto failure;
723 		skip_space(&s);
724 	}
725 
726 success:
727 	*sp = s;
728 	if (out)
729 		*out = ret;
730 	return true;
731 
732 failure:
733 	json_delete(ret);
734 	return false;
735 }
736 
parse_object(const char ** sp,JsonNode ** out)737 static bool parse_object(const char **sp, JsonNode **out)
738 {
739 	const char *s = *sp;
740 	JsonNode *ret = out ? json_mkobject() : NULL;
741 	char *key;
742 	JsonNode *value;
743 
744 	if (*s++ != '{')
745 		goto failure;
746 	skip_space(&s);
747 
748 	if (*s == '}') {
749 		s++;
750 		goto success;
751 	}
752 
753 	for (;;) {
754 		if (!parse_string(&s, out ? &key : NULL))
755 			goto failure;
756 		skip_space(&s);
757 
758 		if (*s++ != ':')
759 			goto failure_free_key;
760 		skip_space(&s);
761 
762 		if (!parse_value(&s, out ? &value : NULL))
763 			goto failure_free_key;
764 		skip_space(&s);
765 
766 		if (out)
767 			append_member(ret, key, value);
768 
769 		if (*s == '}') {
770 			s++;
771 			goto success;
772 		}
773 
774 		if (*s++ != ',')
775 			goto failure;
776 		skip_space(&s);
777 	}
778 
779 success:
780 	*sp = s;
781 	if (out)
782 		*out = ret;
783 	return true;
784 
785 failure_free_key:
786 	if (out)
787 		free(key);
788 failure:
789 	json_delete(ret);
790 	return false;
791 }
792 
parse_string(const char ** sp,char ** out)793 bool parse_string(const char **sp, char **out)
794 {
795 	const char *s = *sp;
796 	SB sb;
797 	char throwaway_buffer[4];
798 		/* enough space for a UTF-8 character */
799 	char *b;
800 
801 	if (*s++ != '"')
802 		return false;
803 
804 	if (out) {
805 		sb_init(&sb);
806 		sb_need(&sb, 4);
807 		b = sb.cur;
808 	} else {
809 		b = throwaway_buffer;
810 	}
811 
812 	while (*s != '"') {
813 		unsigned char c = *s++;
814 
815 		/* Parse next character, and write it to b. */
816 		if (c == '\\') {
817 			c = *s++;
818 			switch (c) {
819 				case '"':
820 				case '\\':
821 				case '/':
822 					*b++ = c;
823 					break;
824 				case 'b':
825 					*b++ = '\b';
826 					break;
827 				case 'f':
828 					*b++ = '\f';
829 					break;
830 				case 'n':
831 					*b++ = '\n';
832 					break;
833 				case 'r':
834 					*b++ = '\r';
835 					break;
836 				case 't':
837 					*b++ = '\t';
838 					break;
839 				case 'u':
840 				{
841 					uint16_t uc, lc;
842 					uchar_t unicode;
843 
844 					if (!parse_hex16(&s, &uc))
845 						goto failed;
846 
847 					if (uc >= 0xD800 && uc <= 0xDFFF) {
848 						/* Handle UTF-16 surrogate pair. */
849 						if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
850 							goto failed; /* Incomplete surrogate pair. */
851 						if (!from_surrogate_pair(uc, lc, &unicode))
852 							goto failed; /* Invalid surrogate pair. */
853 					} else if (uc == 0) {
854 						/* Disallow "\u0000". */
855 						goto failed;
856 					} else {
857 						unicode = uc;
858 					}
859 
860 					b += utf8_write_char(unicode, b);
861 					break;
862 				}
863 				default:
864 					/* Invalid escape */
865 					goto failed;
866 			}
867 		} else if (c <= 0x1F) {
868 			/* Control characters are not allowed in string literals. */
869 			goto failed;
870 		} else {
871 			/* Validate and echo a UTF-8 character. */
872 			int len;
873 
874 			s--;
875 			len = utf8_validate_cz(s);
876 			if (len == 0)
877 				goto failed; /* Invalid UTF-8 character. */
878 
879 			while (len--)
880 				*b++ = *s++;
881 		}
882 
883 		/*
884 		 * Update sb to know about the new bytes,
885 		 * and set up b to write another character.
886 		 */
887 		if (out) {
888 			sb.cur = b;
889 			sb_need(&sb, 4);
890 			b = sb.cur;
891 		} else {
892 			b = throwaway_buffer;
893 		}
894 	}
895 	s++;
896 
897 	if (out)
898 		*out = sb_finish(&sb);
899 	*sp = s;
900 	return true;
901 
902 failed:
903 	if (out)
904 		sb_free(&sb);
905 	return false;
906 }
907 
908 /*
909  * The JSON spec says that a number shall follow this precise pattern
910  * (spaces and quotes added for readability):
911  *	 '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
912  *
913  * However, some JSON parsers are more liberal.  For instance, PHP accepts
914  * '.5' and '1.'.  JSON.parse accepts '+3'.
915  *
916  * This function takes the strict approach.
917  */
parse_number(const char ** sp,double * out)918 bool parse_number(const char **sp, double *out)
919 {
920 	const char *s = *sp;
921 
922 	/* '-'? */
923 	if (*s == '-')
924 		s++;
925 
926 	/* (0 | [1-9][0-9]*) */
927 	if (*s == '0') {
928 		s++;
929 	} else {
930 		if (!is_digit(*s))
931 			return false;
932 		do {
933 			s++;
934 		} while (is_digit(*s));
935 	}
936 
937 	/* ('.' [0-9]+)? */
938 	if (*s == '.') {
939 		s++;
940 		if (!is_digit(*s))
941 			return false;
942 		do {
943 			s++;
944 		} while (is_digit(*s));
945 	}
946 
947 	/* ([Ee] [+-]? [0-9]+)? */
948 	if (*s == 'E' || *s == 'e') {
949 		s++;
950 		if (*s == '+' || *s == '-')
951 			s++;
952 		if (!is_digit(*s))
953 			return false;
954 		do {
955 			s++;
956 		} while (is_digit(*s));
957 	}
958 
959 	if (out)
960 		*out = strtod(*sp, NULL);
961 
962 	*sp = s;
963 	return true;
964 }
965 
skip_space(const char ** sp)966 static void skip_space(const char **sp)
967 {
968 	const char *s = *sp;
969 	while (is_space(*s))
970 		s++;
971 	*sp = s;
972 }
973 
emit_value(SB * out,const JsonNode * node)974 static void emit_value(SB *out, const JsonNode *node)
975 {
976 	assert(tag_is_valid(node->tag));
977 	switch (node->tag) {
978 		case JSON_NULL:
979 			sb_puts(out, "null");
980 			break;
981 		case JSON_BOOL:
982 			sb_puts(out, node->bool_ ? "true" : "false");
983 			break;
984 		case JSON_STRING:
985 			emit_string(out, node->string_);
986 			break;
987 		case JSON_NUMBER:
988 			emit_number(out, node->number_);
989 			break;
990 		case JSON_ARRAY:
991 			emit_array(out, node);
992 			break;
993 		case JSON_OBJECT:
994 			emit_object(out, node);
995 			break;
996 		default:
997 			assert(false);
998 	}
999 }
1000 
emit_value_indented(SB * out,const JsonNode * node,const char * space,int indent_level)1001 void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1002 {
1003 	assert(tag_is_valid(node->tag));
1004 	switch (node->tag) {
1005 		case JSON_NULL:
1006 			sb_puts(out, "null");
1007 			break;
1008 		case JSON_BOOL:
1009 			sb_puts(out, node->bool_ ? "true" : "false");
1010 			break;
1011 		case JSON_STRING:
1012 			emit_string(out, node->string_);
1013 			break;
1014 		case JSON_NUMBER:
1015 			emit_number(out, node->number_);
1016 			break;
1017 		case JSON_ARRAY:
1018 			emit_array_indented(out, node, space, indent_level);
1019 			break;
1020 		case JSON_OBJECT:
1021 			emit_object_indented(out, node, space, indent_level);
1022 			break;
1023 		default:
1024 			assert(false);
1025 	}
1026 }
1027 
emit_array(SB * out,const JsonNode * array)1028 static void emit_array(SB *out, const JsonNode *array)
1029 {
1030 	const JsonNode *element;
1031 
1032 	sb_putc(out, '[');
1033 	json_foreach(element, array) {
1034 		emit_value(out, element);
1035 		if (element->next != NULL)
1036 			sb_putc(out, ',');
1037 	}
1038 	sb_putc(out, ']');
1039 }
1040 
emit_array_indented(SB * out,const JsonNode * array,const char * space,int indent_level)1041 static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1042 {
1043 	const JsonNode *element = array->children.head;
1044 	int i;
1045 
1046 	if (element == NULL) {
1047 		sb_puts(out, "[]");
1048 		return;
1049 	}
1050 
1051 	sb_puts(out, "[\n");
1052 	while (element != NULL) {
1053 		for (i = 0; i < indent_level + 1; i++)
1054 			sb_puts(out, space);
1055 		emit_value_indented(out, element, space, indent_level + 1);
1056 
1057 		element = element->next;
1058 		sb_puts(out, element != NULL ? ",\n" : "\n");
1059 	}
1060 	for (i = 0; i < indent_level; i++)
1061 		sb_puts(out, space);
1062 	sb_putc(out, ']');
1063 }
1064 
emit_object(SB * out,const JsonNode * object)1065 static void emit_object(SB *out, const JsonNode *object)
1066 {
1067 	const JsonNode *member;
1068 
1069 	sb_putc(out, '{');
1070 	json_foreach(member, object) {
1071 		emit_string(out, member->key);
1072 		sb_putc(out, ':');
1073 		emit_value(out, member);
1074 		if (member->next != NULL)
1075 			sb_putc(out, ',');
1076 	}
1077 	sb_putc(out, '}');
1078 }
1079 
emit_object_indented(SB * out,const JsonNode * object,const char * space,int indent_level)1080 static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1081 {
1082 	const JsonNode *member = object->children.head;
1083 	int i;
1084 
1085 	if (member == NULL) {
1086 		sb_puts(out, "{}");
1087 		return;
1088 	}
1089 
1090 	sb_puts(out, "{\n");
1091 	while (member != NULL) {
1092 		for (i = 0; i < indent_level + 1; i++)
1093 			sb_puts(out, space);
1094 		emit_string(out, member->key);
1095 		sb_puts(out, ": ");
1096 		emit_value_indented(out, member, space, indent_level + 1);
1097 
1098 		member = member->next;
1099 		sb_puts(out, member != NULL ? ",\n" : "\n");
1100 	}
1101 	for (i = 0; i < indent_level; i++)
1102 		sb_puts(out, space);
1103 	sb_putc(out, '}');
1104 }
1105 
emit_string(SB * out,const char * str)1106 void emit_string(SB *out, const char *str)
1107 {
1108 	bool escape_unicode = false;
1109 	const char *s = str;
1110 	char *b;
1111 
1112 	assert(utf8_validate(str));
1113 
1114 	/*
1115 	 * 14 bytes is enough space to write up to two
1116 	 * \uXXXX escapes and two quotation marks.
1117 	 */
1118 	sb_need(out, 14);
1119 	b = out->cur;
1120 
1121 	*b++ = '"';
1122 	while (*s != 0) {
1123 		unsigned char c = *s++;
1124 
1125 		/* Encode the next character, and write it to b. */
1126 		switch (c) {
1127 			case '"':
1128 				*b++ = '\\';
1129 				*b++ = '"';
1130 				break;
1131 			case '\\':
1132 				*b++ = '\\';
1133 				*b++ = '\\';
1134 				break;
1135 			case '\b':
1136 				*b++ = '\\';
1137 				*b++ = 'b';
1138 				break;
1139 			case '\f':
1140 				*b++ = '\\';
1141 				*b++ = 'f';
1142 				break;
1143 			case '\n':
1144 				*b++ = '\\';
1145 				*b++ = 'n';
1146 				break;
1147 			case '\r':
1148 				*b++ = '\\';
1149 				*b++ = 'r';
1150 				break;
1151 			case '\t':
1152 				*b++ = '\\';
1153 				*b++ = 't';
1154 				break;
1155 			default: {
1156 				int len;
1157 
1158 				s--;
1159 				len = utf8_validate_cz(s);
1160 
1161 				if (len == 0) {
1162 					/*
1163 					 * Handle invalid UTF-8 character gracefully in production
1164 					 * by writing a replacement character (U+FFFD)
1165 					 * and skipping a single byte.
1166 					 *
1167 					 * This should never happen when assertions are enabled
1168 					 * due to the assertion at the beginning of this function.
1169 					 */
1170 					assert(false);
1171 					if (escape_unicode) {
1172 						strcpy(b, "\\uFFFD");
1173 						b += 6;
1174 					} else {
1175 						*b++ = 0xEF;
1176 						*b++ = 0xBF;
1177 						*b++ = 0xBD;
1178 					}
1179 					s++;
1180 				} else if (c < 0x1F || (c >= 0x80 && escape_unicode)) {
1181 					/* Encode using \u.... */
1182 					uint32_t unicode;
1183 
1184 					s += utf8_read_char(s, &unicode);
1185 
1186 					if (unicode <= 0xFFFF) {
1187 						*b++ = '\\';
1188 						*b++ = 'u';
1189 						b += write_hex16(b, unicode);
1190 					} else {
1191 						/* Produce a surrogate pair. */
1192 						uint16_t uc, lc;
1193 						assert(unicode <= 0x10FFFF);
1194 						to_surrogate_pair(unicode, &uc, &lc);
1195 						*b++ = '\\';
1196 						*b++ = 'u';
1197 						b += write_hex16(b, uc);
1198 						*b++ = '\\';
1199 						*b++ = 'u';
1200 						b += write_hex16(b, lc);
1201 					}
1202 				} else {
1203 					/* Write the character directly. */
1204 					while (len--)
1205 						*b++ = *s++;
1206 				}
1207 
1208 				break;
1209 			}
1210 		}
1211 
1212 		/*
1213 		 * Update *out to know about the new bytes,
1214 		 * and set up b to write another encoded character.
1215 		 */
1216 		out->cur = b;
1217 		sb_need(out, 14);
1218 		b = out->cur;
1219 	}
1220 	*b++ = '"';
1221 
1222 	out->cur = b;
1223 }
1224 
emit_number(SB * out,double num)1225 static void emit_number(SB *out, double num)
1226 {
1227 	/*
1228 	 * This isn't exactly how JavaScript renders numbers,
1229 	 * but it should produce valid JSON for reasonable numbers
1230 	 * preserve precision well enough, and avoid some oddities
1231 	 * like 0.3 -> 0.299999999999999988898 .
1232 	 */
1233 	char buf[64];
1234 	sprintf(buf, "%g", num);
1235 
1236 	if (number_is_valid(buf))
1237 		sb_puts(out, buf);
1238 	else
1239 		sb_puts(out, "null");
1240 }
1241 
tag_is_valid(unsigned int tag)1242 static bool tag_is_valid(unsigned int tag)
1243 {
1244 	return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1245 }
1246 
number_is_valid(const char * num)1247 static bool number_is_valid(const char *num)
1248 {
1249 	return (parse_number(&num, NULL) && *num == '\0');
1250 }
1251 
expect_literal(const char ** sp,const char * str)1252 static bool expect_literal(const char **sp, const char *str)
1253 {
1254 	const char *s = *sp;
1255 
1256 	while (*str != '\0')
1257 		if (*s++ != *str++)
1258 			return false;
1259 
1260 	*sp = s;
1261 	return true;
1262 }
1263 
1264 /*
1265  * Parses exactly 4 hex characters (capital or lowercase).
1266  * Fails if any input chars are not [0-9A-Fa-f].
1267  */
parse_hex16(const char ** sp,uint16_t * out)1268 static bool parse_hex16(const char **sp, uint16_t *out)
1269 {
1270 	const char *s = *sp;
1271 	uint16_t ret = 0;
1272 	uint16_t i;
1273 	uint16_t tmp;
1274 	char c;
1275 
1276 	for (i = 0; i < 4; i++) {
1277 		c = *s++;
1278 		if (c >= '0' && c <= '9')
1279 			tmp = c - '0';
1280 		else if (c >= 'A' && c <= 'F')
1281 			tmp = c - 'A' + 10;
1282 		else if (c >= 'a' && c <= 'f')
1283 			tmp = c - 'a' + 10;
1284 		else
1285 			return false;
1286 
1287 		ret <<= 4;
1288 		ret += tmp;
1289 	}
1290 
1291 	if (out)
1292 		*out = ret;
1293 	*sp = s;
1294 	return true;
1295 }
1296 
1297 /*
1298  * Encodes a 16-bit number into hexadecimal,
1299  * writing exactly 4 hex chars.
1300  */
write_hex16(char * out,uint16_t val)1301 static int write_hex16(char *out, uint16_t val)
1302 {
1303 	const char *hex = "0123456789ABCDEF";
1304 
1305 	*out++ = hex[(val >> 12) & 0xF];
1306 	*out++ = hex[(val >> 8)  & 0xF];
1307 	*out++ = hex[(val >> 4)  & 0xF];
1308 	*out++ = hex[ val        & 0xF];
1309 
1310 	return 4;
1311 }
1312 
json_check(const JsonNode * node,char errmsg[256])1313 bool json_check(const JsonNode *node, char errmsg[256])
1314 {
1315 	#define problem(...) do { \
1316 			if (errmsg != NULL) \
1317 				snprintf(errmsg, 256, __VA_ARGS__); \
1318 			return false; \
1319 		} while (0)
1320 
1321 	if (node->key != NULL && !utf8_validate(node->key))
1322 		problem("key contains invalid UTF-8");
1323 
1324 	if (!tag_is_valid(node->tag))
1325 		problem("tag is invalid (%u)", node->tag);
1326 
1327 	if (node->tag == JSON_BOOL) {
1328 		if (node->bool_ != false && node->bool_ != true)
1329 			problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
1330 	} else if (node->tag == JSON_STRING) {
1331 		if (node->string_ == NULL)
1332 			problem("string_ is NULL");
1333 		if (!utf8_validate(node->string_))
1334 			problem("string_ contains invalid UTF-8");
1335 	} else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1336 		JsonNode *head = node->children.head;
1337 		JsonNode *tail = node->children.tail;
1338 
1339 		if (head == NULL || tail == NULL) {
1340 			if (head != NULL)
1341 				problem("tail is NULL, but head is not");
1342 			if (tail != NULL)
1343 				problem("head is NULL, but tail is not");
1344 		} else {
1345 			JsonNode *child;
1346 			JsonNode *last = NULL;
1347 
1348 			if (head->prev != NULL)
1349 				problem("First child's prev pointer is not NULL");
1350 
1351 			for (child = head; child != NULL; last = child, child = child->next) {
1352 				if (child == node)
1353 					problem("node is its own child");
1354 				if (child->next == child)
1355 					problem("child->next == child (cycle)");
1356 				if (child->next == head)
1357 					problem("child->next == head (cycle)");
1358 
1359 				if (child->parent != node)
1360 					problem("child does not point back to parent");
1361 				if (child->next != NULL && child->next->prev != child)
1362 					problem("child->next does not point back to child");
1363 
1364 				if (node->tag == JSON_ARRAY && child->key != NULL)
1365 					problem("Array element's key is not NULL");
1366 				if (node->tag == JSON_OBJECT && child->key == NULL)
1367 					problem("Object member's key is NULL");
1368 
1369 				if (!json_check(child, errmsg))
1370 					return false;
1371 			}
1372 
1373 			if (last != tail)
1374 				problem("tail does not match pointer found by starting at head and following next links");
1375 		}
1376 	}
1377 
1378 	return true;
1379 
1380 	#undef problem
1381 }
1382