1 /*
2   Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3   All rights reserved.
4 
5   Permission is hereby granted, free of charge, to any person obtaining a copy
6   of this software and associated documentation files (the "Software"), to deal
7   in the Software without restriction, including without limitation the rights
8   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9   copies of the Software, and to permit persons to whom the Software is
10   furnished to do so, subject to the following conditions:
11 
12   The above copyright notice and this permission notice shall be included in
13   all copies or substantial portions of the Software.
14 
15   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21   THE SOFTWARE.
22 */
23 
24 #include "ccan-json.h"
25 
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 /* ProFTPD: stdbool.h equivalents */
33 #ifndef TRUE
34 # define TRUE	1
35 #endif
36 
37 #ifndef FALSE
38 # define FALSE	0
39 #endif
40 
41 /* Out-of-memory error handling. */
default_oom(void)42 static void default_oom(void)
43 {
44 	fprintf(stderr, "%s", "Out of memory!\n");
45 	exit(EXIT_FAILURE);
46 }
47 
48 static void (*json_oom)(void) = default_oom;
49 
50 /* Sadly, strdup is not portable. */
json_strdup(const char * str)51 static char *json_strdup(const char *str)
52 {
53 	char *ret = (char*) malloc(strlen(str) + 1);
54 	if (ret == NULL) {
55 		json_oom();
56 		return NULL;
57 	}
58 	strcpy(ret, str);
59 	return ret;
60 }
61 
62 /* String buffer */
63 
64 typedef struct
65 {
66 	char *cur;
67 	char *end;
68 	char *start;
69 } SB;
70 
sb_init(SB * sb)71 static void sb_init(SB *sb)
72 {
73 	sb->start = (char*) malloc(17);
74 	if (sb->start == NULL)
75 		json_oom();
76 	sb->cur = sb->start;
77 	sb->end = sb->start + 16;
78 }
79 
80 /* sb and need may be evaluated multiple times. */
81 #define sb_need(sb, need) do {                  \
82 		if ((sb)->end - (sb)->cur < (need))     \
83 			sb_grow(sb, need);                  \
84 	} while (0)
85 
sb_grow(SB * sb,int need)86 static void sb_grow(SB *sb, int need)
87 {
88 	size_t length = sb->cur - sb->start;
89 	size_t alloc = sb->end - sb->start;
90 
91 	do {
92 		alloc *= 2;
93 	} while (alloc < length + need);
94 
95 	sb->start = (char*) realloc(sb->start, alloc + 1);
96 	if (sb->start == NULL)
97 		json_oom();
98 	sb->cur = sb->start + length;
99 	sb->end = sb->start + alloc;
100 }
101 
sb_put(SB * sb,const char * bytes,int count)102 static void sb_put(SB *sb, const char *bytes, int count)
103 {
104 	sb_need(sb, count);
105 	memcpy(sb->cur, bytes, count);
106 	sb->cur += count;
107 }
108 
109 #define sb_putc(sb, c) do {         \
110 		if ((sb)->cur >= (sb)->end) \
111 			sb_grow(sb, 1);         \
112 		*(sb)->cur++ = (c);         \
113 	} while (0)
114 
sb_puts(SB * sb,const char * str)115 static void sb_puts(SB *sb, const char *str)
116 {
117 	sb_put(sb, str, strlen(str));
118 }
119 
sb_finish(SB * sb)120 static char *sb_finish(SB *sb)
121 {
122 	*sb->cur = 0;
123 	assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
124 	return sb->start;
125 }
126 
sb_free(SB * sb)127 static void sb_free(SB *sb)
128 {
129 	free(sb->start);
130 }
131 
132 /*
133  * Unicode helper functions
134  *
135  * These are taken from the ccan/charset module and customized a bit.
136  * Putting them here means the compiler can (choose to) inline them,
137  * and it keeps ccan/json from having a dependency.
138  */
139 
140 #if 0
141 /* PROFTPD NOTE: This has been commented out, since it will cause
142  * problems on platforms where `uchar_t' is already defined.  Instead,
143  * all ocurrences of `uchar_t' in the original code have been replaced
144  * with `uint32_t'.
145  */
146 
147 /*
148  * Type for Unicode codepoints.
149  * We need our own because wchar_t might be 16 bits.
150  */
151 typedef uint32_t uchar_t;
152 #endif
153 
154 /*
155  * Validate a single UTF-8 character starting at @s.
156  * The string must be null-terminated.
157  *
158  * If it's valid, return its length (1 thru 4).
159  * If it's invalid or clipped, return 0.
160  *
161  * This function implements the syntax given in RFC3629, which is
162  * the same as that given in The Unicode Standard, Version 6.0.
163  *
164  * It has the following properties:
165  *
166  *  * All codepoints U+0000..U+10FFFF may be encoded,
167  *    except for U+D800..U+DFFF, which are reserved
168  *    for UTF-16 surrogate pair encoding.
169  *  * UTF-8 byte sequences longer than 4 bytes are not permitted,
170  *    as they exceed the range of Unicode.
171  *  * The sixty-six Unicode "non-characters" are permitted
172  *    (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
173  */
utf8_validate_cz(const char * s)174 static int utf8_validate_cz(const char *s)
175 {
176 	unsigned char c = *s++;
177 
178 	if (c <= 0x7F) {        /* 00..7F */
179 		return 1;
180 	} else if (c <= 0xC1) { /* 80..C1 */
181 		/* Disallow overlong 2-byte sequence. */
182 		return 0;
183 	} else if (c <= 0xDF) { /* C2..DF */
184 		/* Make sure subsequent byte is in the range 0x80..0xBF. */
185 		if (((unsigned char)*s++ & 0xC0) != 0x80)
186 			return 0;
187 
188 		return 2;
189 	} else if (c <= 0xEF) { /* E0..EF */
190 		/* Disallow overlong 3-byte sequence. */
191 		if (c == 0xE0 && (unsigned char)*s < 0xA0)
192 			return 0;
193 
194 		/* Disallow U+D800..U+DFFF. */
195 		if (c == 0xED && (unsigned char)*s > 0x9F)
196 			return 0;
197 
198 		/* Make sure subsequent bytes are in the range 0x80..0xBF. */
199 		if (((unsigned char)*s++ & 0xC0) != 0x80)
200 			return 0;
201 		if (((unsigned char)*s++ & 0xC0) != 0x80)
202 			return 0;
203 
204 		return 3;
205 	} else if (c <= 0xF4) { /* F0..F4 */
206 		/* Disallow overlong 4-byte sequence. */
207 		if (c == 0xF0 && (unsigned char)*s < 0x90)
208 			return 0;
209 
210 		/* Disallow codepoints beyond U+10FFFF. */
211 		if (c == 0xF4 && (unsigned char)*s > 0x8F)
212 			return 0;
213 
214 		/* Make sure subsequent bytes are in the range 0x80..0xBF. */
215 		if (((unsigned char)*s++ & 0xC0) != 0x80)
216 			return 0;
217 		if (((unsigned char)*s++ & 0xC0) != 0x80)
218 			return 0;
219 		if (((unsigned char)*s++ & 0xC0) != 0x80)
220 			return 0;
221 
222 		return 4;
223 	} else {                /* F5..FF */
224 		return 0;
225 	}
226 }
227 
228 /* Validate a null-terminated UTF-8 string. */
utf8_validate(const char * s)229 static int utf8_validate(const char *s)
230 {
231 	int len;
232 
233 	for (; *s != 0; s += len) {
234 		len = utf8_validate_cz(s);
235 		if (len == 0)
236 			return FALSE;
237 	}
238 
239 	return TRUE;
240 }
241 
242 /*
243  * Read a single UTF-8 character starting at @s,
244  * returning the length, in bytes, of the character read.
245  *
246  * This function assumes input is valid UTF-8,
247  * and that there are enough characters in front of @s.
248  */
utf8_read_char(const char * s,uint32_t * out)249 static int utf8_read_char(const char *s, uint32_t *out)
250 {
251 	const unsigned char *c = (const unsigned char*) s;
252 
253 	assert(utf8_validate_cz(s));
254 
255 	if (c[0] <= 0x7F) {
256 		/* 00..7F */
257 		*out = c[0];
258 		return 1;
259 	} else if (c[0] <= 0xDF) {
260 		/* C2..DF (unless input is invalid) */
261 		*out = ((uint32_t)c[0] & 0x1F) << 6 |
262 		       ((uint32_t)c[1] & 0x3F);
263 		return 2;
264 	} else if (c[0] <= 0xEF) {
265 		/* E0..EF */
266 		*out = ((uint32_t)c[0] &  0xF) << 12 |
267 		       ((uint32_t)c[1] & 0x3F) << 6  |
268 		       ((uint32_t)c[2] & 0x3F);
269 		return 3;
270 	} else {
271 		/* F0..F4 (unless input is invalid) */
272 		*out = ((uint32_t)c[0] &  0x7) << 18 |
273 		       ((uint32_t)c[1] & 0x3F) << 12 |
274 		       ((uint32_t)c[2] & 0x3F) << 6  |
275 		       ((uint32_t)c[3] & 0x3F);
276 		return 4;
277 	}
278 }
279 
280 /*
281  * Write a single UTF-8 character to @s,
282  * returning the length, in bytes, of the character written.
283  *
284  * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
285  *
286  * This function will write up to 4 bytes to @out.
287  */
utf8_write_char(uint32_t unicode,char * out)288 static int utf8_write_char(uint32_t unicode, char *out)
289 {
290 	unsigned char *o = (unsigned char*) out;
291 
292 	assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
293 
294 	if (unicode <= 0x7F) {
295 		/* U+0000..U+007F */
296 		*o++ = unicode;
297 		return 1;
298 	} else if (unicode <= 0x7FF) {
299 		/* U+0080..U+07FF */
300 		*o++ = 0xC0 | unicode >> 6;
301 		*o++ = 0x80 | (unicode & 0x3F);
302 		return 2;
303 	} else if (unicode <= 0xFFFF) {
304 		/* U+0800..U+FFFF */
305 		*o++ = 0xE0 | unicode >> 12;
306 		*o++ = 0x80 | (unicode >> 6 & 0x3F);
307 		*o++ = 0x80 | (unicode & 0x3F);
308 		return 3;
309 	} else {
310 		/* U+10000..U+10FFFF */
311 		*o++ = 0xF0 | unicode >> 18;
312 		*o++ = 0x80 | (unicode >> 12 & 0x3F);
313 		*o++ = 0x80 | (unicode >> 6 & 0x3F);
314 		*o++ = 0x80 | (unicode & 0x3F);
315 		return 4;
316 	}
317 }
318 
319 /*
320  * Compute the Unicode codepoint of a UTF-16 surrogate pair.
321  *
322  * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
323  * If they aren't, this function returns false.
324  */
from_surrogate_pair(uint16_t uc,uint16_t lc,uint32_t * unicode)325 static int from_surrogate_pair(uint16_t uc, uint16_t lc, uint32_t *unicode)
326 {
327 	if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
328 		*unicode = 0x10000 + ((((uint32_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
329 		return TRUE;
330 	} else {
331 		return FALSE;
332 	}
333 }
334 
335 /*
336  * Construct a UTF-16 surrogate pair given a Unicode codepoint.
337  *
338  * @unicode must be U+10000..U+10FFFF.
339  */
to_surrogate_pair(uint32_t unicode,uint16_t * uc,uint16_t * lc)340 static void to_surrogate_pair(uint32_t unicode, uint16_t *uc, uint16_t *lc)
341 {
342 	uint32_t n;
343 
344 	assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
345 
346 	n = unicode - 0x10000;
347 	*uc = ((n >> 10) & 0x3FF) | 0xD800;
348 	*lc = (n & 0x3FF) | 0xDC00;
349 }
350 
351 #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
352 #define is_digit(c) ((c) >= '0' && (c) <= '9')
353 
354 static int parse_value      (const char **sp, JsonNode        **out, unsigned int depth);
355 static int parse_string     (const char **sp, char            **out);
356 static int parse_number     (const char **sp, double           *out);
357 static int parse_array      (const char **sp, JsonNode        **out, unsigned int depth);
358 static int parse_object     (const char **sp, JsonNode        **out, unsigned int depth);
359 static int parse_hex16      (const char **sp, uint16_t         *out);
360 
361 static int expect_literal   (const char **sp, const char *str);
362 static void skip_space      (const char **sp);
363 
364 static void emit_value              (SB *out, const JsonNode *node);
365 static void emit_value_indented     (SB *out, const JsonNode *node, const char *space, int indent_level);
366 static void emit_string             (SB *out, const char *str);
367 static void emit_number             (SB *out, double num);
368 static void emit_array              (SB *out, const JsonNode *array);
369 static void emit_array_indented     (SB *out, const JsonNode *array, const char *space, int indent_level);
370 static void emit_object             (SB *out, const JsonNode *object);
371 static void emit_object_indented    (SB *out, const JsonNode *object, const char *space, int indent_level);
372 
373 static int write_hex16(char *out, uint16_t val);
374 
375 static JsonNode *mknode(JsonTag tag);
376 static void append_node(JsonNode *parent, JsonNode *child);
377 static void prepend_node(JsonNode *parent, JsonNode *child);
378 static void append_member(JsonNode *object, char *key, JsonNode *value);
379 
380 /* Assertion-friendly validity checks */
381 static int tag_is_valid(unsigned int tag);
382 static int number_is_valid(const char *num);
383 
json_set_oom(void (* oom)(void))384 void json_set_oom(void (*oom)(void)) {
385 	json_oom = (oom != NULL ? oom : default_oom);
386 }
387 
json_decode(const char * json)388 JsonNode *json_decode(const char *json)
389 {
390 	const char *s = json;
391 	JsonNode *ret;
392 
393 	skip_space(&s);
394 	if (!parse_value(&s, &ret, 0))
395 		return NULL;
396 
397 	skip_space(&s);
398 	if (*s != 0) {
399 		json_delete(ret);
400 		return NULL;
401 	}
402 
403 	return ret;
404 }
405 
json_encode(const JsonNode * node)406 char *json_encode(const JsonNode *node)
407 {
408 	return json_stringify(node, NULL);
409 }
410 
json_encode_string(const char * str)411 char *json_encode_string(const char *str)
412 {
413 	char *encoded_str;
414 	SB sb;
415 	sb_init(&sb);
416 
417 	emit_string(&sb, str);
418 
419 	encoded_str = json_strdup(sb_finish(&sb));
420 	sb_free(&sb);
421 	return encoded_str;
422 }
423 
json_stringify(const JsonNode * node,const char * space)424 char *json_stringify(const JsonNode *node, const char *space)
425 {
426 	char *str;
427 	SB sb;
428 	sb_init(&sb);
429 
430 	if (space != NULL)
431 		emit_value_indented(&sb, node, space, 0);
432 	else
433 		emit_value(&sb, node);
434 
435 	str = json_strdup(sb_finish(&sb));
436 	sb_free(&sb);
437 	return str;
438 }
439 
json_delete(JsonNode * node)440 void json_delete(JsonNode *node)
441 {
442 	if (node != NULL) {
443 		json_remove_from_parent(node);
444 
445 		switch (node->tag) {
446 			case JSON_STRING:
447 				free(node->string_);
448 				break;
449 			case JSON_ARRAY:
450 			case JSON_OBJECT:
451 			{
452 				JsonNode *child, *next;
453 				for (child = node->children.head; child != NULL; child = next) {
454 					next = child->next;
455 					json_delete(child);
456 				}
457 				break;
458 			}
459 			default:;
460 		}
461 
462 		free(node);
463 	}
464 }
465 
json_validate(const char * json)466 int json_validate(const char *json)
467 {
468 	const char *s = json;
469 
470 	skip_space(&s);
471 	if (!parse_value(&s, NULL, 0))
472 		return FALSE;
473 
474 	skip_space(&s);
475 	if (*s != 0)
476 		return FALSE;
477 
478 	return TRUE;
479 }
480 
json_find_element(JsonNode * array,unsigned int idx)481 JsonNode *json_find_element(JsonNode *array, unsigned int idx)
482 {
483 	JsonNode *element;
484 	unsigned int i = 0;
485 
486 	if (array == NULL || array->tag != JSON_ARRAY)
487 		return NULL;
488 
489 	json_foreach(element, array) {
490 		if (i == idx)
491 			return element;
492 		i++;
493 	}
494 
495 	return NULL;
496 }
497 
json_find_member(JsonNode * object,const char * name)498 JsonNode *json_find_member(JsonNode *object, const char *name)
499 {
500 	JsonNode *member;
501 
502 	if (object == NULL || object->tag != JSON_OBJECT)
503 		return NULL;
504 
505 	json_foreach(member, object)
506 		if (strcmp(member->key, name) == 0)
507 			return member;
508 
509 	return NULL;
510 }
511 
json_first_child(const JsonNode * node)512 JsonNode *json_first_child(const JsonNode *node)
513 {
514 	if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
515 		return node->children.head;
516 	return NULL;
517 }
518 
mknode(JsonTag tag)519 static JsonNode *mknode(JsonTag tag)
520 {
521 	JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
522 	if (ret == NULL) {
523 		json_oom();
524 		return NULL;
525 	}
526 	ret->tag = tag;
527 	return ret;
528 }
529 
json_mknull(void)530 JsonNode *json_mknull(void)
531 {
532 	return mknode(JSON_NULL);
533 }
534 
json_mkbool(int b)535 JsonNode *json_mkbool(int b)
536 {
537 	JsonNode *ret = mknode(JSON_BOOL);
538 	ret->bool_ = b;
539 	return ret;
540 }
541 
mkstring(char * s)542 static JsonNode *mkstring(char *s)
543 {
544 	JsonNode *ret = mknode(JSON_STRING);
545 	ret->string_ = s;
546 	return ret;
547 }
548 
json_mkstring(const char * s)549 JsonNode *json_mkstring(const char *s)
550 {
551 	return mkstring(json_strdup(s));
552 }
553 
json_mknumber(double n)554 JsonNode *json_mknumber(double n)
555 {
556 	JsonNode *node = mknode(JSON_NUMBER);
557 	node->number_ = n;
558 	return node;
559 }
560 
json_mkarray(void)561 JsonNode *json_mkarray(void)
562 {
563 	return mknode(JSON_ARRAY);
564 }
565 
json_mkobject(void)566 JsonNode *json_mkobject(void)
567 {
568 	return mknode(JSON_OBJECT);
569 }
570 
append_node(JsonNode * parent,JsonNode * child)571 static void append_node(JsonNode *parent, JsonNode *child)
572 {
573 	child->parent = parent;
574 	child->prev = parent->children.tail;
575 	child->next = NULL;
576 
577 	if (parent->children.tail != NULL)
578 		parent->children.tail->next = child;
579 	else
580 		parent->children.head = child;
581 	parent->children.tail = child;
582 }
583 
prepend_node(JsonNode * parent,JsonNode * child)584 static void prepend_node(JsonNode *parent, JsonNode *child)
585 {
586 	child->parent = parent;
587 	child->prev = NULL;
588 	child->next = parent->children.head;
589 
590 	if (parent->children.head != NULL)
591 		parent->children.head->prev = child;
592 	else
593 		parent->children.tail = child;
594 	parent->children.head = child;
595 }
596 
append_member(JsonNode * object,char * key,JsonNode * value)597 static void append_member(JsonNode *object, char *key, JsonNode *value)
598 {
599 	value->key = key;
600 	append_node(object, value);
601 }
602 
json_append_element(JsonNode * array,JsonNode * element)603 void json_append_element(JsonNode *array, JsonNode *element)
604 {
605 	assert(array->tag == JSON_ARRAY);
606 	assert(element->parent == NULL);
607 
608 	append_node(array, element);
609 }
610 
json_prepend_element(JsonNode * array,JsonNode * element)611 void json_prepend_element(JsonNode *array, JsonNode *element)
612 {
613 	assert(array->tag == JSON_ARRAY);
614 	assert(element->parent == NULL);
615 
616 	prepend_node(array, element);
617 }
618 
json_append_member(JsonNode * object,const char * key,JsonNode * value)619 void json_append_member(JsonNode *object, const char *key, JsonNode *value)
620 {
621 	assert(object->tag == JSON_OBJECT);
622 	assert(value->parent == NULL);
623 
624 	append_member(object, json_strdup(key), value);
625 }
626 
json_prepend_member(JsonNode * object,const char * key,JsonNode * value)627 void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
628 {
629 	assert(object->tag == JSON_OBJECT);
630 	assert(value->parent == NULL);
631 
632 	value->key = json_strdup(key);
633 	prepend_node(object, value);
634 }
635 
json_remove_from_parent(JsonNode * node)636 void json_remove_from_parent(JsonNode *node)
637 {
638 	JsonNode *parent = node->parent;
639 
640 	if (parent != NULL) {
641 		if (node->prev != NULL)
642 			node->prev->next = node->next;
643 		else
644 			parent->children.head = node->next;
645 		if (node->next != NULL)
646 			node->next->prev = node->prev;
647 		else
648 			parent->children.tail = node->prev;
649 
650 		free(node->key);
651 
652 		node->parent = NULL;
653 		node->prev = node->next = NULL;
654 		node->key = NULL;
655 	}
656 }
657 
parse_value(const char ** sp,JsonNode ** out,unsigned int depth)658 static int parse_value(const char **sp, JsonNode **out, unsigned int depth)
659 {
660 	const char *s = *sp;
661 
662 	if (depth > CCAN_JSON_MAX_DEPTH) {
663 		return FALSE;
664 	}
665 
666 	switch (*s) {
667 		case 'n':
668 			if (expect_literal(&s, "null")) {
669 				if (out)
670 					*out = json_mknull();
671 				*sp = s;
672 				return TRUE;
673 			}
674 			return FALSE;
675 
676 		case 'f':
677 			if (expect_literal(&s, "false")) {
678 				if (out)
679 					*out = json_mkbool(FALSE);
680 				*sp = s;
681 				return TRUE;
682 			}
683 			return FALSE;
684 
685 		case 't':
686 			if (expect_literal(&s, "true")) {
687 				if (out)
688 					*out = json_mkbool(TRUE);
689 				*sp = s;
690 				return TRUE;
691 			}
692 			return FALSE;
693 
694 		case '"': {
695 			char *str;
696 			if (parse_string(&s, out ? &str : NULL)) {
697 				if (out)
698 					*out = mkstring(str);
699 				*sp = s;
700 				return TRUE;
701 			}
702 			return FALSE;
703 		}
704 
705 		case '[':
706 			if (parse_array(&s, out, depth)) {
707 				*sp = s;
708 				return TRUE;
709 			}
710 			return FALSE;
711 
712 		case '{':
713 			if (parse_object(&s, out, depth)) {
714 				*sp = s;
715 				return TRUE;
716 			}
717 			return FALSE;
718 
719 		default: {
720 			double num;
721 			if (parse_number(&s, out ? &num : NULL)) {
722 				if (out)
723 					*out = json_mknumber(num);
724 				*sp = s;
725 				return TRUE;
726 			}
727 			return FALSE;
728 		}
729 	}
730 }
731 
parse_array(const char ** sp,JsonNode ** out,unsigned int depth)732 static int parse_array(const char **sp, JsonNode **out, unsigned int depth)
733 {
734 	const char *s = *sp;
735 	JsonNode *ret = out ? json_mkarray() : NULL;
736 	JsonNode *element;
737 
738 	depth++;
739 	if (*s++ != '[')
740 		goto failure;
741 	skip_space(&s);
742 
743 	if (*s == ']') {
744 		s++;
745 		goto success;
746 	}
747 
748 	for (;;) {
749 		if (!parse_value(&s, out ? &element : NULL, depth))
750 			goto failure;
751 		skip_space(&s);
752 
753 		if (out)
754 			json_append_element(ret, element);
755 
756 		if (*s == ']') {
757 			s++;
758 			goto success;
759 		}
760 
761 		if (*s++ != ',')
762 			goto failure;
763 		skip_space(&s);
764 	}
765 
766 success:
767 	depth--;
768 	*sp = s;
769 	if (out)
770 		*out = ret;
771 	return TRUE;
772 
773 failure:
774 	depth--;
775 	json_delete(ret);
776 	return FALSE;
777 }
778 
parse_object(const char ** sp,JsonNode ** out,unsigned int depth)779 static int parse_object(const char **sp, JsonNode **out, unsigned int depth)
780 {
781 	const char *s = *sp;
782 	JsonNode *ret = out ? json_mkobject() : NULL;
783 	char *key;
784 	JsonNode *value;
785 
786 	depth++;
787 	if (*s++ != '{')
788 		goto failure;
789 	skip_space(&s);
790 
791 	if (*s == '}') {
792 		s++;
793 		goto success;
794 	}
795 
796 	for (;;) {
797 		if (!parse_string(&s, out ? &key : NULL))
798 			goto failure;
799 		skip_space(&s);
800 
801 		if (*s++ != ':')
802 			goto failure_free_key;
803 		skip_space(&s);
804 
805 		if (!parse_value(&s, out ? &value : NULL, depth))
806 			goto failure_free_key;
807 		skip_space(&s);
808 
809 		if (out)
810 			append_member(ret, key, value);
811 
812 		if (*s == '}') {
813 			s++;
814 			goto success;
815 		}
816 
817 		if (*s++ != ',')
818 			goto failure;
819 		skip_space(&s);
820 	}
821 
822 success:
823 	depth--;
824 	*sp = s;
825 	if (out)
826 		*out = ret;
827 	return TRUE;
828 
829 failure_free_key:
830 	if (out)
831 		free(key);
832 failure:
833 	depth--;
834 	json_delete(ret);
835 	return FALSE;
836 }
837 
parse_string(const char ** sp,char ** out)838 int parse_string(const char **sp, char **out)
839 {
840 	const char *s = *sp;
841 	SB sb;
842 	char throwaway_buffer[4];
843 		/* enough space for a UTF-8 character */
844 	char *b;
845 
846 	if (*s++ != '"')
847 		return FALSE;
848 
849 	if (out) {
850 		sb_init(&sb);
851 		sb_need(&sb, 4);
852 		b = sb.cur;
853 	} else {
854 		b = throwaway_buffer;
855 	}
856 
857 	while (*s != '"') {
858 		unsigned char c = *s++;
859 
860 		/* Parse next character, and write it to b. */
861 		if (c == '\\') {
862 			c = *s++;
863 			switch (c) {
864 				case '"':
865 				case '\\':
866 				case '/':
867 					*b++ = c;
868 					break;
869 				case 'b':
870 					*b++ = '\b';
871 					break;
872 				case 'f':
873 					*b++ = '\f';
874 					break;
875 				case 'n':
876 					*b++ = '\n';
877 					break;
878 				case 'r':
879 					*b++ = '\r';
880 					break;
881 				case 't':
882 					*b++ = '\t';
883 					break;
884 				case 'u':
885 				{
886 					uint16_t uc, lc;
887 					uint32_t unicode;
888 
889 					if (!parse_hex16(&s, &uc))
890 						goto failed;
891 
892 					if (uc >= 0xD800 && uc <= 0xDFFF) {
893 						/* Handle UTF-16 surrogate pair. */
894 						if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
895 							goto failed; /* Incomplete surrogate pair. */
896 						if (!from_surrogate_pair(uc, lc, &unicode))
897 							goto failed; /* Invalid surrogate pair. */
898 					} else if (uc == 0) {
899 						/* Disallow "\u0000". */
900 						goto failed;
901 					} else {
902 						unicode = uc;
903 					}
904 
905 					b += utf8_write_char(unicode, b);
906 					break;
907 				}
908 				default:
909 					/* Invalid escape */
910 					goto failed;
911 			}
912 		} else if (c <= 0x1F) {
913 			/* Control characters are not allowed in string literals. */
914 			goto failed;
915 		} else {
916 			/* Validate and echo a UTF-8 character. */
917 			int len;
918 
919 			s--;
920 			len = utf8_validate_cz(s);
921 			if (len == 0)
922 				goto failed; /* Invalid UTF-8 character. */
923 
924 			while (len--)
925 				*b++ = *s++;
926 		}
927 
928 		/*
929 		 * Update sb to know about the new bytes,
930 		 * and set up b to write another character.
931 		 */
932 		if (out) {
933 			sb.cur = b;
934 			sb_need(&sb, 4);
935 			b = sb.cur;
936 		} else {
937 			b = throwaway_buffer;
938 		}
939 	}
940 	s++;
941 
942 	if (out)
943 		*out = sb_finish(&sb);
944 	*sp = s;
945 	return TRUE;
946 
947 failed:
948 	if (out)
949 		sb_free(&sb);
950 	return FALSE;
951 }
952 
953 /*
954  * The JSON spec says that a number shall follow this precise pattern
955  * (spaces and quotes added for readability):
956  *	 '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
957  *
958  * However, some JSON parsers are more liberal.  For instance, PHP accepts
959  * '.5' and '1.'.  JSON.parse accepts '+3'.
960  *
961  * This function takes the strict approach.
962  */
parse_number(const char ** sp,double * out)963 int parse_number(const char **sp, double *out)
964 {
965 	const char *s = *sp;
966 
967 	/* '-'? */
968 	if (*s == '-')
969 		s++;
970 
971 	/* (0 | [1-9][0-9]*) */
972 	if (*s == '0') {
973 		s++;
974 	} else {
975 		if (!is_digit(*s))
976 			return FALSE;
977 		do {
978 			s++;
979 		} while (is_digit(*s));
980 	}
981 
982 	/* ('.' [0-9]+)? */
983 	if (*s == '.') {
984 		s++;
985 		if (!is_digit(*s))
986 			return FALSE;
987 		do {
988 			s++;
989 		} while (is_digit(*s));
990 	}
991 
992 	/* ([Ee] [+-]? [0-9]+)? */
993 	if (*s == 'E' || *s == 'e') {
994 		s++;
995 		if (*s == '+' || *s == '-')
996 			s++;
997 		if (!is_digit(*s))
998 			return FALSE;
999 		do {
1000 			s++;
1001 		} while (is_digit(*s));
1002 	}
1003 
1004 	if (out)
1005 		*out = strtod(*sp, NULL);
1006 
1007 	*sp = s;
1008 	return TRUE;
1009 }
1010 
skip_space(const char ** sp)1011 static void skip_space(const char **sp)
1012 {
1013 	const char *s = *sp;
1014 	while (is_space(*s))
1015 		s++;
1016 	*sp = s;
1017 }
1018 
emit_value(SB * out,const JsonNode * node)1019 static void emit_value(SB *out, const JsonNode *node)
1020 {
1021 	assert(tag_is_valid(node->tag));
1022 	switch (node->tag) {
1023 		case JSON_NULL:
1024 			sb_puts(out, "null");
1025 			break;
1026 		case JSON_BOOL:
1027 			sb_puts(out, node->bool_ ? "true" : "false");
1028 			break;
1029 		case JSON_STRING:
1030 			emit_string(out, node->string_);
1031 			break;
1032 		case JSON_NUMBER:
1033 			emit_number(out, node->number_);
1034 			break;
1035 		case JSON_ARRAY:
1036 			emit_array(out, node);
1037 			break;
1038 		case JSON_OBJECT:
1039 			emit_object(out, node);
1040 			break;
1041 		default:
1042 			assert(FALSE);
1043 	}
1044 }
1045 
emit_value_indented(SB * out,const JsonNode * node,const char * space,int indent_level)1046 void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1047 {
1048 	assert(tag_is_valid(node->tag));
1049 	switch (node->tag) {
1050 		case JSON_NULL:
1051 			sb_puts(out, "null");
1052 			break;
1053 		case JSON_BOOL:
1054 			sb_puts(out, node->bool_ ? "true" : "false");
1055 			break;
1056 		case JSON_STRING:
1057 			emit_string(out, node->string_);
1058 			break;
1059 		case JSON_NUMBER:
1060 			emit_number(out, node->number_);
1061 			break;
1062 		case JSON_ARRAY:
1063 			emit_array_indented(out, node, space, indent_level);
1064 			break;
1065 		case JSON_OBJECT:
1066 			emit_object_indented(out, node, space, indent_level);
1067 			break;
1068 		default:
1069 			assert(FALSE);
1070 	}
1071 }
1072 
emit_array(SB * out,const JsonNode * array)1073 static void emit_array(SB *out, const JsonNode *array)
1074 {
1075 	const JsonNode *element;
1076 
1077 	sb_putc(out, '[');
1078 	json_foreach(element, array) {
1079 		emit_value(out, element);
1080 		if (element->next != NULL)
1081 			sb_putc(out, ',');
1082 	}
1083 	sb_putc(out, ']');
1084 }
1085 
emit_array_indented(SB * out,const JsonNode * array,const char * space,int indent_level)1086 static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1087 {
1088 	const JsonNode *element = array->children.head;
1089 	int i;
1090 
1091 	if (element == NULL) {
1092 		sb_puts(out, "[]");
1093 		return;
1094 	}
1095 
1096 	sb_puts(out, "[\n");
1097 	while (element != NULL) {
1098 		for (i = 0; i < indent_level + 1; i++)
1099 			sb_puts(out, space);
1100 		emit_value_indented(out, element, space, indent_level + 1);
1101 
1102 		element = element->next;
1103 		sb_puts(out, element != NULL ? ",\n" : "\n");
1104 	}
1105 	for (i = 0; i < indent_level; i++)
1106 		sb_puts(out, space);
1107 	sb_putc(out, ']');
1108 }
1109 
emit_object(SB * out,const JsonNode * object)1110 static void emit_object(SB *out, const JsonNode *object)
1111 {
1112 	const JsonNode *member;
1113 
1114 	sb_putc(out, '{');
1115 	json_foreach(member, object) {
1116 		emit_string(out, member->key);
1117 		sb_putc(out, ':');
1118 		emit_value(out, member);
1119 		if (member->next != NULL)
1120 			sb_putc(out, ',');
1121 	}
1122 	sb_putc(out, '}');
1123 }
1124 
emit_object_indented(SB * out,const JsonNode * object,const char * space,int indent_level)1125 static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1126 {
1127 	const JsonNode *member = object->children.head;
1128 	int i;
1129 
1130 	if (member == NULL) {
1131 		sb_puts(out, "{}");
1132 		return;
1133 	}
1134 
1135 	sb_puts(out, "{\n");
1136 	while (member != NULL) {
1137 		for (i = 0; i < indent_level + 1; i++)
1138 			sb_puts(out, space);
1139 		emit_string(out, member->key);
1140 		sb_puts(out, ": ");
1141 		emit_value_indented(out, member, space, indent_level + 1);
1142 
1143 		member = member->next;
1144 		sb_puts(out, member != NULL ? ",\n" : "\n");
1145 	}
1146 	for (i = 0; i < indent_level; i++)
1147 		sb_puts(out, space);
1148 	sb_putc(out, '}');
1149 }
1150 
emit_string(SB * out,const char * str)1151 void emit_string(SB *out, const char *str)
1152 {
1153 	const char *s = str;
1154 	char *b;
1155 
1156 	assert(utf8_validate(str));
1157 
1158 	/*
1159 	 * 14 bytes is enough space to write up to two
1160 	 * \uXXXX escapes and two quotation marks.
1161 	 */
1162 	sb_need(out, 14);
1163 	b = out->cur;
1164 
1165 	*b++ = '"';
1166 	while (*s != 0) {
1167 		unsigned char c = *s++;
1168 
1169 		/* Encode the next character, and write it to b. */
1170 		switch (c) {
1171 			case '"':
1172 				*b++ = '\\';
1173 				*b++ = '"';
1174 				break;
1175 			case '\\':
1176 				*b++ = '\\';
1177 				*b++ = '\\';
1178 				break;
1179 			case '\b':
1180 				*b++ = '\\';
1181 				*b++ = 'b';
1182 				break;
1183 			case '\f':
1184 				*b++ = '\\';
1185 				*b++ = 'f';
1186 				break;
1187 			case '\n':
1188 				*b++ = '\\';
1189 				*b++ = 'n';
1190 				break;
1191 			case '\r':
1192 				*b++ = '\\';
1193 				*b++ = 'r';
1194 				break;
1195 			case '\t':
1196 				*b++ = '\\';
1197 				*b++ = 't';
1198 				break;
1199 			default: {
1200 				int len;
1201 
1202 				s--;
1203 				len = utf8_validate_cz(s);
1204 
1205 				if (len == 0) {
1206 					/*
1207 					 * Handle invalid UTF-8 character gracefully in production
1208 					 * by writing a replacement character (U+FFFD)
1209 					 * and skipping a single byte.
1210 					 *
1211 					 * This should never happen when assertions are enabled
1212 					 * due to the assertion at the beginning of this function.
1213 					 */
1214 					assert(FALSE);
1215 					*b++ = 0xEF;
1216 					*b++ = 0xBF;
1217 					*b++ = 0xBD;
1218 					s++;
1219 				} else if (c < 0x1F) {
1220 					/* Encode using \u.... */
1221 					uint32_t unicode;
1222 
1223 					s += utf8_read_char(s, &unicode);
1224 
1225 					if (unicode <= 0xFFFF) {
1226 						*b++ = '\\';
1227 						*b++ = 'u';
1228 						b += write_hex16(b, unicode);
1229 					} else {
1230 						/* Produce a surrogate pair. */
1231 						uint16_t uc, lc;
1232 						assert(unicode <= 0x10FFFF);
1233 						to_surrogate_pair(unicode, &uc, &lc);
1234 						*b++ = '\\';
1235 						*b++ = 'u';
1236 						b += write_hex16(b, uc);
1237 						*b++ = '\\';
1238 						*b++ = 'u';
1239 						b += write_hex16(b, lc);
1240 					}
1241 				} else {
1242 					/* Write the character directly. */
1243 					while (len--)
1244 						*b++ = *s++;
1245 				}
1246 
1247 				break;
1248 			}
1249 		}
1250 
1251 		/*
1252 		 * Update *out to know about the new bytes,
1253 		 * and set up b to write another encoded character.
1254 		 */
1255 		out->cur = b;
1256 		sb_need(out, 14);
1257 		b = out->cur;
1258 	}
1259 	*b++ = '"';
1260 
1261 	out->cur = b;
1262 }
1263 
emit_number(SB * out,double num)1264 static void emit_number(SB *out, double num)
1265 {
1266 	/*
1267 	 * This isn't exactly how JavaScript renders numbers,
1268 	 * but it should produce valid JSON for reasonable numbers
1269 	 * preserve precision well enough, and avoid some oddities
1270 	 * like 0.3 -> 0.299999999999999988898 .
1271 	 */
1272 	char buf[64];
1273 	snprintf(buf, sizeof(buf)-1, "%.16g", num);
1274         buf[sizeof(buf)-1] = '\0';
1275 
1276 	if (number_is_valid(buf))
1277 		sb_puts(out, buf);
1278 	else
1279 		sb_puts(out, "null");
1280 }
1281 
tag_is_valid(unsigned int tag)1282 static int tag_is_valid(unsigned int tag)
1283 {
1284 	return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1285 }
1286 
number_is_valid(const char * num)1287 static int number_is_valid(const char *num)
1288 {
1289 	return (parse_number(&num, NULL) && *num == '\0');
1290 }
1291 
expect_literal(const char ** sp,const char * str)1292 static int expect_literal(const char **sp, const char *str)
1293 {
1294 	const char *s = *sp;
1295 
1296 	while (*str != '\0')
1297 		if (*s++ != *str++)
1298 			return FALSE;
1299 
1300 	*sp = s;
1301 	return TRUE;
1302 }
1303 
1304 /*
1305  * Parses exactly 4 hex characters (capital or lowercase).
1306  * Fails if any input chars are not [0-9A-Fa-f].
1307  */
parse_hex16(const char ** sp,uint16_t * out)1308 static int parse_hex16(const char **sp, uint16_t *out)
1309 {
1310 	const char *s = *sp;
1311 	uint16_t ret = 0;
1312 	uint16_t i;
1313 	uint16_t tmp;
1314 	char c;
1315 
1316 	for (i = 0; i < 4; i++) {
1317 		c = *s++;
1318 		if (c >= '0' && c <= '9')
1319 			tmp = c - '0';
1320 		else if (c >= 'A' && c <= 'F')
1321 			tmp = c - 'A' + 10;
1322 		else if (c >= 'a' && c <= 'f')
1323 			tmp = c - 'a' + 10;
1324 		else
1325 			return FALSE;
1326 
1327 		ret <<= 4;
1328 		ret += tmp;
1329 	}
1330 
1331 	if (out)
1332 		*out = ret;
1333 	*sp = s;
1334 	return TRUE;
1335 }
1336 
1337 /*
1338  * Encodes a 16-bit number into hexadecimal,
1339  * writing exactly 4 hex chars.
1340  */
write_hex16(char * out,uint16_t val)1341 static int write_hex16(char *out, uint16_t val)
1342 {
1343 	const char *hex = "0123456789ABCDEF";
1344 
1345 	*out++ = hex[(val >> 12) & 0xF];
1346 	*out++ = hex[(val >> 8)  & 0xF];
1347 	*out++ = hex[(val >> 4)  & 0xF];
1348 	*out++ = hex[ val        & 0xF];
1349 
1350 	return 4;
1351 }
1352 
json_check(const JsonNode * node,char errmsg[256])1353 int json_check(const JsonNode *node, char errmsg[256])
1354 {
1355 	#define problem(...) do { \
1356 			if (errmsg != NULL) \
1357 				snprintf(errmsg, 256, __VA_ARGS__); \
1358 			return FALSE; \
1359 		} while (0)
1360 
1361 	if (node->key != NULL && !utf8_validate(node->key))
1362 		problem("key contains invalid UTF-8");
1363 
1364 	if (!tag_is_valid(node->tag))
1365 		problem("tag is invalid (%u)", node->tag);
1366 
1367 	if (node->tag == JSON_BOOL) {
1368 		if (node->bool_ != FALSE && node->bool_ != TRUE)
1369 			problem("bool_ is neither false (%d) nor true (%d)", (int)FALSE, (int)TRUE);
1370 	} else if (node->tag == JSON_STRING) {
1371 		if (node->string_ == NULL)
1372 			problem("string_ is NULL");
1373 		if (!utf8_validate(node->string_))
1374 			problem("string_ contains invalid UTF-8");
1375 	} else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1376 		JsonNode *head = node->children.head;
1377 		JsonNode *tail = node->children.tail;
1378 
1379 		if (head == NULL || tail == NULL) {
1380 			if (head != NULL)
1381 				problem("tail is NULL, but head is not");
1382 			if (tail != NULL)
1383 				problem("head is NULL, but tail is not");
1384 		} else {
1385 			JsonNode *child;
1386 			JsonNode *last = NULL;
1387 
1388 			if (head->prev != NULL)
1389 				problem("First child's prev pointer is not NULL");
1390 
1391 			for (child = head; child != NULL; last = child, child = child->next) {
1392 				if (child == node)
1393 					problem("node is its own child");
1394 				if (child->next == child)
1395 					problem("child->next == child (cycle)");
1396 				if (child->next == head)
1397 					problem("child->next == head (cycle)");
1398 
1399 				if (child->parent != node)
1400 					problem("child does not point back to parent");
1401 				if (child->next != NULL && child->next->prev != child)
1402 					problem("child->next does not point back to child");
1403 
1404 				if (node->tag == JSON_ARRAY && child->key != NULL)
1405 					problem("Array element's key is not NULL");
1406 				if (node->tag == JSON_OBJECT && child->key == NULL)
1407 					problem("Object member's key is NULL");
1408 
1409 				if (!json_check(child, errmsg))
1410 					return FALSE;
1411 			}
1412 
1413 			if (last != tail)
1414 				problem("tail does not match pointer found by starting at head and following next links");
1415 		}
1416 	}
1417 
1418 	return TRUE;
1419 
1420 	#undef problem
1421 }
1422