1 /*
2 Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3 All rights reserved.
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "ccan-json.h"
25
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 /* ProFTPD: stdbool.h equivalents */
33 #ifndef TRUE
34 # define TRUE 1
35 #endif
36
37 #ifndef FALSE
38 # define FALSE 0
39 #endif
40
41 /* Out-of-memory error handling. */
default_oom(void)42 static void default_oom(void)
43 {
44 fprintf(stderr, "%s", "Out of memory!\n");
45 exit(EXIT_FAILURE);
46 }
47
48 static void (*json_oom)(void) = default_oom;
49
50 /* Sadly, strdup is not portable. */
json_strdup(const char * str)51 static char *json_strdup(const char *str)
52 {
53 char *ret = (char*) malloc(strlen(str) + 1);
54 if (ret == NULL) {
55 json_oom();
56 return NULL;
57 }
58 strcpy(ret, str);
59 return ret;
60 }
61
62 /* String buffer */
63
64 typedef struct
65 {
66 char *cur;
67 char *end;
68 char *start;
69 } SB;
70
sb_init(SB * sb)71 static void sb_init(SB *sb)
72 {
73 sb->start = (char*) malloc(17);
74 if (sb->start == NULL)
75 json_oom();
76 sb->cur = sb->start;
77 sb->end = sb->start + 16;
78 }
79
80 /* sb and need may be evaluated multiple times. */
81 #define sb_need(sb, need) do { \
82 if ((sb)->end - (sb)->cur < (need)) \
83 sb_grow(sb, need); \
84 } while (0)
85
sb_grow(SB * sb,int need)86 static void sb_grow(SB *sb, int need)
87 {
88 size_t length = sb->cur - sb->start;
89 size_t alloc = sb->end - sb->start;
90
91 do {
92 alloc *= 2;
93 } while (alloc < length + need);
94
95 sb->start = (char*) realloc(sb->start, alloc + 1);
96 if (sb->start == NULL)
97 json_oom();
98 sb->cur = sb->start + length;
99 sb->end = sb->start + alloc;
100 }
101
sb_put(SB * sb,const char * bytes,int count)102 static void sb_put(SB *sb, const char *bytes, int count)
103 {
104 sb_need(sb, count);
105 memcpy(sb->cur, bytes, count);
106 sb->cur += count;
107 }
108
109 #define sb_putc(sb, c) do { \
110 if ((sb)->cur >= (sb)->end) \
111 sb_grow(sb, 1); \
112 *(sb)->cur++ = (c); \
113 } while (0)
114
sb_puts(SB * sb,const char * str)115 static void sb_puts(SB *sb, const char *str)
116 {
117 sb_put(sb, str, strlen(str));
118 }
119
sb_finish(SB * sb)120 static char *sb_finish(SB *sb)
121 {
122 *sb->cur = 0;
123 assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
124 return sb->start;
125 }
126
sb_free(SB * sb)127 static void sb_free(SB *sb)
128 {
129 free(sb->start);
130 }
131
132 /*
133 * Unicode helper functions
134 *
135 * These are taken from the ccan/charset module and customized a bit.
136 * Putting them here means the compiler can (choose to) inline them,
137 * and it keeps ccan/json from having a dependency.
138 */
139
140 #if 0
141 /* PROFTPD NOTE: This has been commented out, since it will cause
142 * problems on platforms where `uchar_t' is already defined. Instead,
143 * all ocurrences of `uchar_t' in the original code have been replaced
144 * with `uint32_t'.
145 */
146
147 /*
148 * Type for Unicode codepoints.
149 * We need our own because wchar_t might be 16 bits.
150 */
151 typedef uint32_t uchar_t;
152 #endif
153
154 /*
155 * Validate a single UTF-8 character starting at @s.
156 * The string must be null-terminated.
157 *
158 * If it's valid, return its length (1 thru 4).
159 * If it's invalid or clipped, return 0.
160 *
161 * This function implements the syntax given in RFC3629, which is
162 * the same as that given in The Unicode Standard, Version 6.0.
163 *
164 * It has the following properties:
165 *
166 * * All codepoints U+0000..U+10FFFF may be encoded,
167 * except for U+D800..U+DFFF, which are reserved
168 * for UTF-16 surrogate pair encoding.
169 * * UTF-8 byte sequences longer than 4 bytes are not permitted,
170 * as they exceed the range of Unicode.
171 * * The sixty-six Unicode "non-characters" are permitted
172 * (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
173 */
utf8_validate_cz(const char * s)174 static int utf8_validate_cz(const char *s)
175 {
176 unsigned char c = *s++;
177
178 if (c <= 0x7F) { /* 00..7F */
179 return 1;
180 } else if (c <= 0xC1) { /* 80..C1 */
181 /* Disallow overlong 2-byte sequence. */
182 return 0;
183 } else if (c <= 0xDF) { /* C2..DF */
184 /* Make sure subsequent byte is in the range 0x80..0xBF. */
185 if (((unsigned char)*s++ & 0xC0) != 0x80)
186 return 0;
187
188 return 2;
189 } else if (c <= 0xEF) { /* E0..EF */
190 /* Disallow overlong 3-byte sequence. */
191 if (c == 0xE0 && (unsigned char)*s < 0xA0)
192 return 0;
193
194 /* Disallow U+D800..U+DFFF. */
195 if (c == 0xED && (unsigned char)*s > 0x9F)
196 return 0;
197
198 /* Make sure subsequent bytes are in the range 0x80..0xBF. */
199 if (((unsigned char)*s++ & 0xC0) != 0x80)
200 return 0;
201 if (((unsigned char)*s++ & 0xC0) != 0x80)
202 return 0;
203
204 return 3;
205 } else if (c <= 0xF4) { /* F0..F4 */
206 /* Disallow overlong 4-byte sequence. */
207 if (c == 0xF0 && (unsigned char)*s < 0x90)
208 return 0;
209
210 /* Disallow codepoints beyond U+10FFFF. */
211 if (c == 0xF4 && (unsigned char)*s > 0x8F)
212 return 0;
213
214 /* Make sure subsequent bytes are in the range 0x80..0xBF. */
215 if (((unsigned char)*s++ & 0xC0) != 0x80)
216 return 0;
217 if (((unsigned char)*s++ & 0xC0) != 0x80)
218 return 0;
219 if (((unsigned char)*s++ & 0xC0) != 0x80)
220 return 0;
221
222 return 4;
223 } else { /* F5..FF */
224 return 0;
225 }
226 }
227
228 /* Validate a null-terminated UTF-8 string. */
utf8_validate(const char * s)229 static int utf8_validate(const char *s)
230 {
231 int len;
232
233 for (; *s != 0; s += len) {
234 len = utf8_validate_cz(s);
235 if (len == 0)
236 return FALSE;
237 }
238
239 return TRUE;
240 }
241
242 /*
243 * Read a single UTF-8 character starting at @s,
244 * returning the length, in bytes, of the character read.
245 *
246 * This function assumes input is valid UTF-8,
247 * and that there are enough characters in front of @s.
248 */
utf8_read_char(const char * s,uint32_t * out)249 static int utf8_read_char(const char *s, uint32_t *out)
250 {
251 const unsigned char *c = (const unsigned char*) s;
252
253 assert(utf8_validate_cz(s));
254
255 if (c[0] <= 0x7F) {
256 /* 00..7F */
257 *out = c[0];
258 return 1;
259 } else if (c[0] <= 0xDF) {
260 /* C2..DF (unless input is invalid) */
261 *out = ((uint32_t)c[0] & 0x1F) << 6 |
262 ((uint32_t)c[1] & 0x3F);
263 return 2;
264 } else if (c[0] <= 0xEF) {
265 /* E0..EF */
266 *out = ((uint32_t)c[0] & 0xF) << 12 |
267 ((uint32_t)c[1] & 0x3F) << 6 |
268 ((uint32_t)c[2] & 0x3F);
269 return 3;
270 } else {
271 /* F0..F4 (unless input is invalid) */
272 *out = ((uint32_t)c[0] & 0x7) << 18 |
273 ((uint32_t)c[1] & 0x3F) << 12 |
274 ((uint32_t)c[2] & 0x3F) << 6 |
275 ((uint32_t)c[3] & 0x3F);
276 return 4;
277 }
278 }
279
280 /*
281 * Write a single UTF-8 character to @s,
282 * returning the length, in bytes, of the character written.
283 *
284 * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
285 *
286 * This function will write up to 4 bytes to @out.
287 */
utf8_write_char(uint32_t unicode,char * out)288 static int utf8_write_char(uint32_t unicode, char *out)
289 {
290 unsigned char *o = (unsigned char*) out;
291
292 assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
293
294 if (unicode <= 0x7F) {
295 /* U+0000..U+007F */
296 *o++ = unicode;
297 return 1;
298 } else if (unicode <= 0x7FF) {
299 /* U+0080..U+07FF */
300 *o++ = 0xC0 | unicode >> 6;
301 *o++ = 0x80 | (unicode & 0x3F);
302 return 2;
303 } else if (unicode <= 0xFFFF) {
304 /* U+0800..U+FFFF */
305 *o++ = 0xE0 | unicode >> 12;
306 *o++ = 0x80 | (unicode >> 6 & 0x3F);
307 *o++ = 0x80 | (unicode & 0x3F);
308 return 3;
309 } else {
310 /* U+10000..U+10FFFF */
311 *o++ = 0xF0 | unicode >> 18;
312 *o++ = 0x80 | (unicode >> 12 & 0x3F);
313 *o++ = 0x80 | (unicode >> 6 & 0x3F);
314 *o++ = 0x80 | (unicode & 0x3F);
315 return 4;
316 }
317 }
318
319 /*
320 * Compute the Unicode codepoint of a UTF-16 surrogate pair.
321 *
322 * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
323 * If they aren't, this function returns false.
324 */
from_surrogate_pair(uint16_t uc,uint16_t lc,uint32_t * unicode)325 static int from_surrogate_pair(uint16_t uc, uint16_t lc, uint32_t *unicode)
326 {
327 if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
328 *unicode = 0x10000 + ((((uint32_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
329 return TRUE;
330 } else {
331 return FALSE;
332 }
333 }
334
335 /*
336 * Construct a UTF-16 surrogate pair given a Unicode codepoint.
337 *
338 * @unicode must be U+10000..U+10FFFF.
339 */
to_surrogate_pair(uint32_t unicode,uint16_t * uc,uint16_t * lc)340 static void to_surrogate_pair(uint32_t unicode, uint16_t *uc, uint16_t *lc)
341 {
342 uint32_t n;
343
344 assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
345
346 n = unicode - 0x10000;
347 *uc = ((n >> 10) & 0x3FF) | 0xD800;
348 *lc = (n & 0x3FF) | 0xDC00;
349 }
350
351 #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
352 #define is_digit(c) ((c) >= '0' && (c) <= '9')
353
354 static int parse_value (const char **sp, JsonNode **out, unsigned int depth);
355 static int parse_string (const char **sp, char **out);
356 static int parse_number (const char **sp, double *out);
357 static int parse_array (const char **sp, JsonNode **out, unsigned int depth);
358 static int parse_object (const char **sp, JsonNode **out, unsigned int depth);
359 static int parse_hex16 (const char **sp, uint16_t *out);
360
361 static int expect_literal (const char **sp, const char *str);
362 static void skip_space (const char **sp);
363
364 static void emit_value (SB *out, const JsonNode *node);
365 static void emit_value_indented (SB *out, const JsonNode *node, const char *space, int indent_level);
366 static void emit_string (SB *out, const char *str);
367 static void emit_number (SB *out, double num);
368 static void emit_array (SB *out, const JsonNode *array);
369 static void emit_array_indented (SB *out, const JsonNode *array, const char *space, int indent_level);
370 static void emit_object (SB *out, const JsonNode *object);
371 static void emit_object_indented (SB *out, const JsonNode *object, const char *space, int indent_level);
372
373 static int write_hex16(char *out, uint16_t val);
374
375 static JsonNode *mknode(JsonTag tag);
376 static void append_node(JsonNode *parent, JsonNode *child);
377 static void prepend_node(JsonNode *parent, JsonNode *child);
378 static void append_member(JsonNode *object, char *key, JsonNode *value);
379
380 /* Assertion-friendly validity checks */
381 static int tag_is_valid(unsigned int tag);
382 static int number_is_valid(const char *num);
383
json_set_oom(void (* oom)(void))384 void json_set_oom(void (*oom)(void)) {
385 json_oom = (oom != NULL ? oom : default_oom);
386 }
387
json_decode(const char * json)388 JsonNode *json_decode(const char *json)
389 {
390 const char *s = json;
391 JsonNode *ret;
392
393 skip_space(&s);
394 if (!parse_value(&s, &ret, 0))
395 return NULL;
396
397 skip_space(&s);
398 if (*s != 0) {
399 json_delete(ret);
400 return NULL;
401 }
402
403 return ret;
404 }
405
json_encode(const JsonNode * node)406 char *json_encode(const JsonNode *node)
407 {
408 return json_stringify(node, NULL);
409 }
410
json_encode_string(const char * str)411 char *json_encode_string(const char *str)
412 {
413 char *encoded_str;
414 SB sb;
415 sb_init(&sb);
416
417 emit_string(&sb, str);
418
419 encoded_str = json_strdup(sb_finish(&sb));
420 sb_free(&sb);
421 return encoded_str;
422 }
423
json_stringify(const JsonNode * node,const char * space)424 char *json_stringify(const JsonNode *node, const char *space)
425 {
426 char *str;
427 SB sb;
428 sb_init(&sb);
429
430 if (space != NULL)
431 emit_value_indented(&sb, node, space, 0);
432 else
433 emit_value(&sb, node);
434
435 str = json_strdup(sb_finish(&sb));
436 sb_free(&sb);
437 return str;
438 }
439
json_delete(JsonNode * node)440 void json_delete(JsonNode *node)
441 {
442 if (node != NULL) {
443 json_remove_from_parent(node);
444
445 switch (node->tag) {
446 case JSON_STRING:
447 free(node->string_);
448 break;
449 case JSON_ARRAY:
450 case JSON_OBJECT:
451 {
452 JsonNode *child, *next;
453 for (child = node->children.head; child != NULL; child = next) {
454 next = child->next;
455 json_delete(child);
456 }
457 break;
458 }
459 default:;
460 }
461
462 free(node);
463 }
464 }
465
json_validate(const char * json)466 int json_validate(const char *json)
467 {
468 const char *s = json;
469
470 skip_space(&s);
471 if (!parse_value(&s, NULL, 0))
472 return FALSE;
473
474 skip_space(&s);
475 if (*s != 0)
476 return FALSE;
477
478 return TRUE;
479 }
480
json_find_element(JsonNode * array,unsigned int idx)481 JsonNode *json_find_element(JsonNode *array, unsigned int idx)
482 {
483 JsonNode *element;
484 unsigned int i = 0;
485
486 if (array == NULL || array->tag != JSON_ARRAY)
487 return NULL;
488
489 json_foreach(element, array) {
490 if (i == idx)
491 return element;
492 i++;
493 }
494
495 return NULL;
496 }
497
json_find_member(JsonNode * object,const char * name)498 JsonNode *json_find_member(JsonNode *object, const char *name)
499 {
500 JsonNode *member;
501
502 if (object == NULL || object->tag != JSON_OBJECT)
503 return NULL;
504
505 json_foreach(member, object)
506 if (strcmp(member->key, name) == 0)
507 return member;
508
509 return NULL;
510 }
511
json_first_child(const JsonNode * node)512 JsonNode *json_first_child(const JsonNode *node)
513 {
514 if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
515 return node->children.head;
516 return NULL;
517 }
518
mknode(JsonTag tag)519 static JsonNode *mknode(JsonTag tag)
520 {
521 JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
522 if (ret == NULL) {
523 json_oom();
524 return NULL;
525 }
526 ret->tag = tag;
527 return ret;
528 }
529
json_mknull(void)530 JsonNode *json_mknull(void)
531 {
532 return mknode(JSON_NULL);
533 }
534
json_mkbool(int b)535 JsonNode *json_mkbool(int b)
536 {
537 JsonNode *ret = mknode(JSON_BOOL);
538 ret->bool_ = b;
539 return ret;
540 }
541
mkstring(char * s)542 static JsonNode *mkstring(char *s)
543 {
544 JsonNode *ret = mknode(JSON_STRING);
545 ret->string_ = s;
546 return ret;
547 }
548
json_mkstring(const char * s)549 JsonNode *json_mkstring(const char *s)
550 {
551 return mkstring(json_strdup(s));
552 }
553
json_mknumber(double n)554 JsonNode *json_mknumber(double n)
555 {
556 JsonNode *node = mknode(JSON_NUMBER);
557 node->number_ = n;
558 return node;
559 }
560
json_mkarray(void)561 JsonNode *json_mkarray(void)
562 {
563 return mknode(JSON_ARRAY);
564 }
565
json_mkobject(void)566 JsonNode *json_mkobject(void)
567 {
568 return mknode(JSON_OBJECT);
569 }
570
append_node(JsonNode * parent,JsonNode * child)571 static void append_node(JsonNode *parent, JsonNode *child)
572 {
573 child->parent = parent;
574 child->prev = parent->children.tail;
575 child->next = NULL;
576
577 if (parent->children.tail != NULL)
578 parent->children.tail->next = child;
579 else
580 parent->children.head = child;
581 parent->children.tail = child;
582 }
583
prepend_node(JsonNode * parent,JsonNode * child)584 static void prepend_node(JsonNode *parent, JsonNode *child)
585 {
586 child->parent = parent;
587 child->prev = NULL;
588 child->next = parent->children.head;
589
590 if (parent->children.head != NULL)
591 parent->children.head->prev = child;
592 else
593 parent->children.tail = child;
594 parent->children.head = child;
595 }
596
append_member(JsonNode * object,char * key,JsonNode * value)597 static void append_member(JsonNode *object, char *key, JsonNode *value)
598 {
599 value->key = key;
600 append_node(object, value);
601 }
602
json_append_element(JsonNode * array,JsonNode * element)603 void json_append_element(JsonNode *array, JsonNode *element)
604 {
605 assert(array->tag == JSON_ARRAY);
606 assert(element->parent == NULL);
607
608 append_node(array, element);
609 }
610
json_prepend_element(JsonNode * array,JsonNode * element)611 void json_prepend_element(JsonNode *array, JsonNode *element)
612 {
613 assert(array->tag == JSON_ARRAY);
614 assert(element->parent == NULL);
615
616 prepend_node(array, element);
617 }
618
json_append_member(JsonNode * object,const char * key,JsonNode * value)619 void json_append_member(JsonNode *object, const char *key, JsonNode *value)
620 {
621 assert(object->tag == JSON_OBJECT);
622 assert(value->parent == NULL);
623
624 append_member(object, json_strdup(key), value);
625 }
626
json_prepend_member(JsonNode * object,const char * key,JsonNode * value)627 void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
628 {
629 assert(object->tag == JSON_OBJECT);
630 assert(value->parent == NULL);
631
632 value->key = json_strdup(key);
633 prepend_node(object, value);
634 }
635
json_remove_from_parent(JsonNode * node)636 void json_remove_from_parent(JsonNode *node)
637 {
638 JsonNode *parent = node->parent;
639
640 if (parent != NULL) {
641 if (node->prev != NULL)
642 node->prev->next = node->next;
643 else
644 parent->children.head = node->next;
645 if (node->next != NULL)
646 node->next->prev = node->prev;
647 else
648 parent->children.tail = node->prev;
649
650 free(node->key);
651
652 node->parent = NULL;
653 node->prev = node->next = NULL;
654 node->key = NULL;
655 }
656 }
657
parse_value(const char ** sp,JsonNode ** out,unsigned int depth)658 static int parse_value(const char **sp, JsonNode **out, unsigned int depth)
659 {
660 const char *s = *sp;
661
662 if (depth > CCAN_JSON_MAX_DEPTH) {
663 return FALSE;
664 }
665
666 switch (*s) {
667 case 'n':
668 if (expect_literal(&s, "null")) {
669 if (out)
670 *out = json_mknull();
671 *sp = s;
672 return TRUE;
673 }
674 return FALSE;
675
676 case 'f':
677 if (expect_literal(&s, "false")) {
678 if (out)
679 *out = json_mkbool(FALSE);
680 *sp = s;
681 return TRUE;
682 }
683 return FALSE;
684
685 case 't':
686 if (expect_literal(&s, "true")) {
687 if (out)
688 *out = json_mkbool(TRUE);
689 *sp = s;
690 return TRUE;
691 }
692 return FALSE;
693
694 case '"': {
695 char *str;
696 if (parse_string(&s, out ? &str : NULL)) {
697 if (out)
698 *out = mkstring(str);
699 *sp = s;
700 return TRUE;
701 }
702 return FALSE;
703 }
704
705 case '[':
706 if (parse_array(&s, out, depth)) {
707 *sp = s;
708 return TRUE;
709 }
710 return FALSE;
711
712 case '{':
713 if (parse_object(&s, out, depth)) {
714 *sp = s;
715 return TRUE;
716 }
717 return FALSE;
718
719 default: {
720 double num;
721 if (parse_number(&s, out ? &num : NULL)) {
722 if (out)
723 *out = json_mknumber(num);
724 *sp = s;
725 return TRUE;
726 }
727 return FALSE;
728 }
729 }
730 }
731
parse_array(const char ** sp,JsonNode ** out,unsigned int depth)732 static int parse_array(const char **sp, JsonNode **out, unsigned int depth)
733 {
734 const char *s = *sp;
735 JsonNode *ret = out ? json_mkarray() : NULL;
736 JsonNode *element;
737
738 depth++;
739 if (*s++ != '[')
740 goto failure;
741 skip_space(&s);
742
743 if (*s == ']') {
744 s++;
745 goto success;
746 }
747
748 for (;;) {
749 if (!parse_value(&s, out ? &element : NULL, depth))
750 goto failure;
751 skip_space(&s);
752
753 if (out)
754 json_append_element(ret, element);
755
756 if (*s == ']') {
757 s++;
758 goto success;
759 }
760
761 if (*s++ != ',')
762 goto failure;
763 skip_space(&s);
764 }
765
766 success:
767 depth--;
768 *sp = s;
769 if (out)
770 *out = ret;
771 return TRUE;
772
773 failure:
774 depth--;
775 json_delete(ret);
776 return FALSE;
777 }
778
parse_object(const char ** sp,JsonNode ** out,unsigned int depth)779 static int parse_object(const char **sp, JsonNode **out, unsigned int depth)
780 {
781 const char *s = *sp;
782 JsonNode *ret = out ? json_mkobject() : NULL;
783 char *key;
784 JsonNode *value;
785
786 depth++;
787 if (*s++ != '{')
788 goto failure;
789 skip_space(&s);
790
791 if (*s == '}') {
792 s++;
793 goto success;
794 }
795
796 for (;;) {
797 if (!parse_string(&s, out ? &key : NULL))
798 goto failure;
799 skip_space(&s);
800
801 if (*s++ != ':')
802 goto failure_free_key;
803 skip_space(&s);
804
805 if (!parse_value(&s, out ? &value : NULL, depth))
806 goto failure_free_key;
807 skip_space(&s);
808
809 if (out)
810 append_member(ret, key, value);
811
812 if (*s == '}') {
813 s++;
814 goto success;
815 }
816
817 if (*s++ != ',')
818 goto failure;
819 skip_space(&s);
820 }
821
822 success:
823 depth--;
824 *sp = s;
825 if (out)
826 *out = ret;
827 return TRUE;
828
829 failure_free_key:
830 if (out)
831 free(key);
832 failure:
833 depth--;
834 json_delete(ret);
835 return FALSE;
836 }
837
parse_string(const char ** sp,char ** out)838 int parse_string(const char **sp, char **out)
839 {
840 const char *s = *sp;
841 SB sb;
842 char throwaway_buffer[4];
843 /* enough space for a UTF-8 character */
844 char *b;
845
846 if (*s++ != '"')
847 return FALSE;
848
849 if (out) {
850 sb_init(&sb);
851 sb_need(&sb, 4);
852 b = sb.cur;
853 } else {
854 b = throwaway_buffer;
855 }
856
857 while (*s != '"') {
858 unsigned char c = *s++;
859
860 /* Parse next character, and write it to b. */
861 if (c == '\\') {
862 c = *s++;
863 switch (c) {
864 case '"':
865 case '\\':
866 case '/':
867 *b++ = c;
868 break;
869 case 'b':
870 *b++ = '\b';
871 break;
872 case 'f':
873 *b++ = '\f';
874 break;
875 case 'n':
876 *b++ = '\n';
877 break;
878 case 'r':
879 *b++ = '\r';
880 break;
881 case 't':
882 *b++ = '\t';
883 break;
884 case 'u':
885 {
886 uint16_t uc, lc;
887 uint32_t unicode;
888
889 if (!parse_hex16(&s, &uc))
890 goto failed;
891
892 if (uc >= 0xD800 && uc <= 0xDFFF) {
893 /* Handle UTF-16 surrogate pair. */
894 if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
895 goto failed; /* Incomplete surrogate pair. */
896 if (!from_surrogate_pair(uc, lc, &unicode))
897 goto failed; /* Invalid surrogate pair. */
898 } else if (uc == 0) {
899 /* Disallow "\u0000". */
900 goto failed;
901 } else {
902 unicode = uc;
903 }
904
905 b += utf8_write_char(unicode, b);
906 break;
907 }
908 default:
909 /* Invalid escape */
910 goto failed;
911 }
912 } else if (c <= 0x1F) {
913 /* Control characters are not allowed in string literals. */
914 goto failed;
915 } else {
916 /* Validate and echo a UTF-8 character. */
917 int len;
918
919 s--;
920 len = utf8_validate_cz(s);
921 if (len == 0)
922 goto failed; /* Invalid UTF-8 character. */
923
924 while (len--)
925 *b++ = *s++;
926 }
927
928 /*
929 * Update sb to know about the new bytes,
930 * and set up b to write another character.
931 */
932 if (out) {
933 sb.cur = b;
934 sb_need(&sb, 4);
935 b = sb.cur;
936 } else {
937 b = throwaway_buffer;
938 }
939 }
940 s++;
941
942 if (out)
943 *out = sb_finish(&sb);
944 *sp = s;
945 return TRUE;
946
947 failed:
948 if (out)
949 sb_free(&sb);
950 return FALSE;
951 }
952
953 /*
954 * The JSON spec says that a number shall follow this precise pattern
955 * (spaces and quotes added for readability):
956 * '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
957 *
958 * However, some JSON parsers are more liberal. For instance, PHP accepts
959 * '.5' and '1.'. JSON.parse accepts '+3'.
960 *
961 * This function takes the strict approach.
962 */
parse_number(const char ** sp,double * out)963 int parse_number(const char **sp, double *out)
964 {
965 const char *s = *sp;
966
967 /* '-'? */
968 if (*s == '-')
969 s++;
970
971 /* (0 | [1-9][0-9]*) */
972 if (*s == '0') {
973 s++;
974 } else {
975 if (!is_digit(*s))
976 return FALSE;
977 do {
978 s++;
979 } while (is_digit(*s));
980 }
981
982 /* ('.' [0-9]+)? */
983 if (*s == '.') {
984 s++;
985 if (!is_digit(*s))
986 return FALSE;
987 do {
988 s++;
989 } while (is_digit(*s));
990 }
991
992 /* ([Ee] [+-]? [0-9]+)? */
993 if (*s == 'E' || *s == 'e') {
994 s++;
995 if (*s == '+' || *s == '-')
996 s++;
997 if (!is_digit(*s))
998 return FALSE;
999 do {
1000 s++;
1001 } while (is_digit(*s));
1002 }
1003
1004 if (out)
1005 *out = strtod(*sp, NULL);
1006
1007 *sp = s;
1008 return TRUE;
1009 }
1010
skip_space(const char ** sp)1011 static void skip_space(const char **sp)
1012 {
1013 const char *s = *sp;
1014 while (is_space(*s))
1015 s++;
1016 *sp = s;
1017 }
1018
emit_value(SB * out,const JsonNode * node)1019 static void emit_value(SB *out, const JsonNode *node)
1020 {
1021 assert(tag_is_valid(node->tag));
1022 switch (node->tag) {
1023 case JSON_NULL:
1024 sb_puts(out, "null");
1025 break;
1026 case JSON_BOOL:
1027 sb_puts(out, node->bool_ ? "true" : "false");
1028 break;
1029 case JSON_STRING:
1030 emit_string(out, node->string_);
1031 break;
1032 case JSON_NUMBER:
1033 emit_number(out, node->number_);
1034 break;
1035 case JSON_ARRAY:
1036 emit_array(out, node);
1037 break;
1038 case JSON_OBJECT:
1039 emit_object(out, node);
1040 break;
1041 default:
1042 assert(FALSE);
1043 }
1044 }
1045
emit_value_indented(SB * out,const JsonNode * node,const char * space,int indent_level)1046 void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1047 {
1048 assert(tag_is_valid(node->tag));
1049 switch (node->tag) {
1050 case JSON_NULL:
1051 sb_puts(out, "null");
1052 break;
1053 case JSON_BOOL:
1054 sb_puts(out, node->bool_ ? "true" : "false");
1055 break;
1056 case JSON_STRING:
1057 emit_string(out, node->string_);
1058 break;
1059 case JSON_NUMBER:
1060 emit_number(out, node->number_);
1061 break;
1062 case JSON_ARRAY:
1063 emit_array_indented(out, node, space, indent_level);
1064 break;
1065 case JSON_OBJECT:
1066 emit_object_indented(out, node, space, indent_level);
1067 break;
1068 default:
1069 assert(FALSE);
1070 }
1071 }
1072
emit_array(SB * out,const JsonNode * array)1073 static void emit_array(SB *out, const JsonNode *array)
1074 {
1075 const JsonNode *element;
1076
1077 sb_putc(out, '[');
1078 json_foreach(element, array) {
1079 emit_value(out, element);
1080 if (element->next != NULL)
1081 sb_putc(out, ',');
1082 }
1083 sb_putc(out, ']');
1084 }
1085
emit_array_indented(SB * out,const JsonNode * array,const char * space,int indent_level)1086 static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1087 {
1088 const JsonNode *element = array->children.head;
1089 int i;
1090
1091 if (element == NULL) {
1092 sb_puts(out, "[]");
1093 return;
1094 }
1095
1096 sb_puts(out, "[\n");
1097 while (element != NULL) {
1098 for (i = 0; i < indent_level + 1; i++)
1099 sb_puts(out, space);
1100 emit_value_indented(out, element, space, indent_level + 1);
1101
1102 element = element->next;
1103 sb_puts(out, element != NULL ? ",\n" : "\n");
1104 }
1105 for (i = 0; i < indent_level; i++)
1106 sb_puts(out, space);
1107 sb_putc(out, ']');
1108 }
1109
emit_object(SB * out,const JsonNode * object)1110 static void emit_object(SB *out, const JsonNode *object)
1111 {
1112 const JsonNode *member;
1113
1114 sb_putc(out, '{');
1115 json_foreach(member, object) {
1116 emit_string(out, member->key);
1117 sb_putc(out, ':');
1118 emit_value(out, member);
1119 if (member->next != NULL)
1120 sb_putc(out, ',');
1121 }
1122 sb_putc(out, '}');
1123 }
1124
emit_object_indented(SB * out,const JsonNode * object,const char * space,int indent_level)1125 static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1126 {
1127 const JsonNode *member = object->children.head;
1128 int i;
1129
1130 if (member == NULL) {
1131 sb_puts(out, "{}");
1132 return;
1133 }
1134
1135 sb_puts(out, "{\n");
1136 while (member != NULL) {
1137 for (i = 0; i < indent_level + 1; i++)
1138 sb_puts(out, space);
1139 emit_string(out, member->key);
1140 sb_puts(out, ": ");
1141 emit_value_indented(out, member, space, indent_level + 1);
1142
1143 member = member->next;
1144 sb_puts(out, member != NULL ? ",\n" : "\n");
1145 }
1146 for (i = 0; i < indent_level; i++)
1147 sb_puts(out, space);
1148 sb_putc(out, '}');
1149 }
1150
emit_string(SB * out,const char * str)1151 void emit_string(SB *out, const char *str)
1152 {
1153 const char *s = str;
1154 char *b;
1155
1156 assert(utf8_validate(str));
1157
1158 /*
1159 * 14 bytes is enough space to write up to two
1160 * \uXXXX escapes and two quotation marks.
1161 */
1162 sb_need(out, 14);
1163 b = out->cur;
1164
1165 *b++ = '"';
1166 while (*s != 0) {
1167 unsigned char c = *s++;
1168
1169 /* Encode the next character, and write it to b. */
1170 switch (c) {
1171 case '"':
1172 *b++ = '\\';
1173 *b++ = '"';
1174 break;
1175 case '\\':
1176 *b++ = '\\';
1177 *b++ = '\\';
1178 break;
1179 case '\b':
1180 *b++ = '\\';
1181 *b++ = 'b';
1182 break;
1183 case '\f':
1184 *b++ = '\\';
1185 *b++ = 'f';
1186 break;
1187 case '\n':
1188 *b++ = '\\';
1189 *b++ = 'n';
1190 break;
1191 case '\r':
1192 *b++ = '\\';
1193 *b++ = 'r';
1194 break;
1195 case '\t':
1196 *b++ = '\\';
1197 *b++ = 't';
1198 break;
1199 default: {
1200 int len;
1201
1202 s--;
1203 len = utf8_validate_cz(s);
1204
1205 if (len == 0) {
1206 /*
1207 * Handle invalid UTF-8 character gracefully in production
1208 * by writing a replacement character (U+FFFD)
1209 * and skipping a single byte.
1210 *
1211 * This should never happen when assertions are enabled
1212 * due to the assertion at the beginning of this function.
1213 */
1214 assert(FALSE);
1215 *b++ = 0xEF;
1216 *b++ = 0xBF;
1217 *b++ = 0xBD;
1218 s++;
1219 } else if (c < 0x1F) {
1220 /* Encode using \u.... */
1221 uint32_t unicode;
1222
1223 s += utf8_read_char(s, &unicode);
1224
1225 if (unicode <= 0xFFFF) {
1226 *b++ = '\\';
1227 *b++ = 'u';
1228 b += write_hex16(b, unicode);
1229 } else {
1230 /* Produce a surrogate pair. */
1231 uint16_t uc, lc;
1232 assert(unicode <= 0x10FFFF);
1233 to_surrogate_pair(unicode, &uc, &lc);
1234 *b++ = '\\';
1235 *b++ = 'u';
1236 b += write_hex16(b, uc);
1237 *b++ = '\\';
1238 *b++ = 'u';
1239 b += write_hex16(b, lc);
1240 }
1241 } else {
1242 /* Write the character directly. */
1243 while (len--)
1244 *b++ = *s++;
1245 }
1246
1247 break;
1248 }
1249 }
1250
1251 /*
1252 * Update *out to know about the new bytes,
1253 * and set up b to write another encoded character.
1254 */
1255 out->cur = b;
1256 sb_need(out, 14);
1257 b = out->cur;
1258 }
1259 *b++ = '"';
1260
1261 out->cur = b;
1262 }
1263
emit_number(SB * out,double num)1264 static void emit_number(SB *out, double num)
1265 {
1266 /*
1267 * This isn't exactly how JavaScript renders numbers,
1268 * but it should produce valid JSON for reasonable numbers
1269 * preserve precision well enough, and avoid some oddities
1270 * like 0.3 -> 0.299999999999999988898 .
1271 */
1272 char buf[64];
1273 snprintf(buf, sizeof(buf)-1, "%.16g", num);
1274 buf[sizeof(buf)-1] = '\0';
1275
1276 if (number_is_valid(buf))
1277 sb_puts(out, buf);
1278 else
1279 sb_puts(out, "null");
1280 }
1281
tag_is_valid(unsigned int tag)1282 static int tag_is_valid(unsigned int tag)
1283 {
1284 return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1285 }
1286
number_is_valid(const char * num)1287 static int number_is_valid(const char *num)
1288 {
1289 return (parse_number(&num, NULL) && *num == '\0');
1290 }
1291
expect_literal(const char ** sp,const char * str)1292 static int expect_literal(const char **sp, const char *str)
1293 {
1294 const char *s = *sp;
1295
1296 while (*str != '\0')
1297 if (*s++ != *str++)
1298 return FALSE;
1299
1300 *sp = s;
1301 return TRUE;
1302 }
1303
1304 /*
1305 * Parses exactly 4 hex characters (capital or lowercase).
1306 * Fails if any input chars are not [0-9A-Fa-f].
1307 */
parse_hex16(const char ** sp,uint16_t * out)1308 static int parse_hex16(const char **sp, uint16_t *out)
1309 {
1310 const char *s = *sp;
1311 uint16_t ret = 0;
1312 uint16_t i;
1313 uint16_t tmp;
1314 char c;
1315
1316 for (i = 0; i < 4; i++) {
1317 c = *s++;
1318 if (c >= '0' && c <= '9')
1319 tmp = c - '0';
1320 else if (c >= 'A' && c <= 'F')
1321 tmp = c - 'A' + 10;
1322 else if (c >= 'a' && c <= 'f')
1323 tmp = c - 'a' + 10;
1324 else
1325 return FALSE;
1326
1327 ret <<= 4;
1328 ret += tmp;
1329 }
1330
1331 if (out)
1332 *out = ret;
1333 *sp = s;
1334 return TRUE;
1335 }
1336
1337 /*
1338 * Encodes a 16-bit number into hexadecimal,
1339 * writing exactly 4 hex chars.
1340 */
write_hex16(char * out,uint16_t val)1341 static int write_hex16(char *out, uint16_t val)
1342 {
1343 const char *hex = "0123456789ABCDEF";
1344
1345 *out++ = hex[(val >> 12) & 0xF];
1346 *out++ = hex[(val >> 8) & 0xF];
1347 *out++ = hex[(val >> 4) & 0xF];
1348 *out++ = hex[ val & 0xF];
1349
1350 return 4;
1351 }
1352
json_check(const JsonNode * node,char errmsg[256])1353 int json_check(const JsonNode *node, char errmsg[256])
1354 {
1355 #define problem(...) do { \
1356 if (errmsg != NULL) \
1357 snprintf(errmsg, 256, __VA_ARGS__); \
1358 return FALSE; \
1359 } while (0)
1360
1361 if (node->key != NULL && !utf8_validate(node->key))
1362 problem("key contains invalid UTF-8");
1363
1364 if (!tag_is_valid(node->tag))
1365 problem("tag is invalid (%u)", node->tag);
1366
1367 if (node->tag == JSON_BOOL) {
1368 if (node->bool_ != FALSE && node->bool_ != TRUE)
1369 problem("bool_ is neither false (%d) nor true (%d)", (int)FALSE, (int)TRUE);
1370 } else if (node->tag == JSON_STRING) {
1371 if (node->string_ == NULL)
1372 problem("string_ is NULL");
1373 if (!utf8_validate(node->string_))
1374 problem("string_ contains invalid UTF-8");
1375 } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1376 JsonNode *head = node->children.head;
1377 JsonNode *tail = node->children.tail;
1378
1379 if (head == NULL || tail == NULL) {
1380 if (head != NULL)
1381 problem("tail is NULL, but head is not");
1382 if (tail != NULL)
1383 problem("head is NULL, but tail is not");
1384 } else {
1385 JsonNode *child;
1386 JsonNode *last = NULL;
1387
1388 if (head->prev != NULL)
1389 problem("First child's prev pointer is not NULL");
1390
1391 for (child = head; child != NULL; last = child, child = child->next) {
1392 if (child == node)
1393 problem("node is its own child");
1394 if (child->next == child)
1395 problem("child->next == child (cycle)");
1396 if (child->next == head)
1397 problem("child->next == head (cycle)");
1398
1399 if (child->parent != node)
1400 problem("child does not point back to parent");
1401 if (child->next != NULL && child->next->prev != child)
1402 problem("child->next does not point back to child");
1403
1404 if (node->tag == JSON_ARRAY && child->key != NULL)
1405 problem("Array element's key is not NULL");
1406 if (node->tag == JSON_OBJECT && child->key == NULL)
1407 problem("Object member's key is NULL");
1408
1409 if (!json_check(child, errmsg))
1410 return FALSE;
1411 }
1412
1413 if (last != tail)
1414 problem("tail does not match pointer found by starting at head and following next links");
1415 }
1416 }
1417
1418 return TRUE;
1419
1420 #undef problem
1421 }
1422