1 /*
2 Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3 All rights reserved.
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "json.h"
25
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #define out_of_memory() do { \
33 fprintf(stderr, "Out of memory.\n"); \
34 exit(EXIT_FAILURE); \
35 } while (0)
36
37 /* Sadly, strdup is not portable. */
json_strdup(const char * str)38 static char *json_strdup(const char *str)
39 {
40 char *ret = (char*) malloc(strlen(str) + 1);
41 if (ret == NULL)
42 out_of_memory();
43 strcpy(ret, str);
44 return ret;
45 }
46
47 /* String buffer */
48
49 typedef struct
50 {
51 char *cur;
52 char *end;
53 char *start;
54 } SB;
55
sb_init(SB * sb)56 static void sb_init(SB *sb)
57 {
58 sb->start = (char*) malloc(17);
59 if (sb->start == NULL)
60 out_of_memory();
61 sb->cur = sb->start;
62 sb->end = sb->start + 16;
63 }
64
65 /* sb and need may be evaluated multiple times. */
66 #define sb_need(sb, need) do { \
67 if ((sb)->end - (sb)->cur < (need)) \
68 sb_grow(sb, need); \
69 } while (0)
70
sb_grow(SB * sb,int need)71 static void sb_grow(SB *sb, int need)
72 {
73 size_t length = sb->cur - sb->start;
74 size_t alloc = sb->end - sb->start;
75
76 do {
77 alloc *= 2;
78 } while (alloc < length + need);
79
80 sb->start = (char*) realloc(sb->start, alloc + 1);
81 if (sb->start == NULL)
82 out_of_memory();
83 sb->cur = sb->start + length;
84 sb->end = sb->start + alloc;
85 }
86
sb_put(SB * sb,const char * bytes,int count)87 static void sb_put(SB *sb, const char *bytes, int count)
88 {
89 sb_need(sb, count);
90 memcpy(sb->cur, bytes, count);
91 sb->cur += count;
92 }
93
94 #define sb_putc(sb, c) do { \
95 if ((sb)->cur >= (sb)->end) \
96 sb_grow(sb, 1); \
97 *(sb)->cur++ = (c); \
98 } while (0)
99
sb_puts(SB * sb,const char * str)100 static void sb_puts(SB *sb, const char *str)
101 {
102 sb_put(sb, str, strlen(str));
103 }
104
sb_finish(SB * sb)105 static char *sb_finish(SB *sb)
106 {
107 *sb->cur = 0;
108 assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
109 return sb->start;
110 }
111
sb_free(SB * sb)112 static void sb_free(SB *sb)
113 {
114 free(sb->start);
115 }
116
117 /*
118 * Unicode helper functions
119 *
120 * These are taken from the ccan/charset module and customized a bit.
121 * Putting them here means the compiler can (choose to) inline them,
122 * and it keeps ccan/json from having a dependency.
123 */
124
125 /*
126 * Type for Unicode codepoints.
127 * We need our own because wchar_t might be 16 bits.
128 */
129 typedef uint32_t uchar_t;
130
131 /*
132 * Validate a single UTF-8 character starting at @s.
133 * The string must be null-terminated.
134 *
135 * If it's valid, return its length (1 thru 4).
136 * If it's invalid or clipped, return 0.
137 *
138 * This function implements the syntax given in RFC3629, which is
139 * the same as that given in The Unicode Standard, Version 6.0.
140 *
141 * It has the following properties:
142 *
143 * * All codepoints U+0000..U+10FFFF may be encoded,
144 * except for U+D800..U+DFFF, which are reserved
145 * for UTF-16 surrogate pair encoding.
146 * * UTF-8 byte sequences longer than 4 bytes are not permitted,
147 * as they exceed the range of Unicode.
148 * * The sixty-six Unicode "non-characters" are permitted
149 * (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
150 */
utf8_validate_cz(const char * s)151 static int utf8_validate_cz(const char *s)
152 {
153 unsigned char c = *s++;
154
155 if (c <= 0x7F) { /* 00..7F */
156 return 1;
157 } else if (c <= 0xC1) { /* 80..C1 */
158 /* Disallow overlong 2-byte sequence. */
159 return 0;
160 } else if (c <= 0xDF) { /* C2..DF */
161 /* Make sure subsequent byte is in the range 0x80..0xBF. */
162 if (((unsigned char)*s++ & 0xC0) != 0x80)
163 return 0;
164
165 return 2;
166 } else if (c <= 0xEF) { /* E0..EF */
167 /* Disallow overlong 3-byte sequence. */
168 if (c == 0xE0 && (unsigned char)*s < 0xA0)
169 return 0;
170
171 /* Disallow U+D800..U+DFFF. */
172 if (c == 0xED && (unsigned char)*s > 0x9F)
173 return 0;
174
175 /* Make sure subsequent bytes are in the range 0x80..0xBF. */
176 if (((unsigned char)*s++ & 0xC0) != 0x80)
177 return 0;
178 if (((unsigned char)*s++ & 0xC0) != 0x80)
179 return 0;
180
181 return 3;
182 } else if (c <= 0xF4) { /* F0..F4 */
183 /* Disallow overlong 4-byte sequence. */
184 if (c == 0xF0 && (unsigned char)*s < 0x90)
185 return 0;
186
187 /* Disallow codepoints beyond U+10FFFF. */
188 if (c == 0xF4 && (unsigned char)*s > 0x8F)
189 return 0;
190
191 /* Make sure subsequent bytes are in the range 0x80..0xBF. */
192 if (((unsigned char)*s++ & 0xC0) != 0x80)
193 return 0;
194 if (((unsigned char)*s++ & 0xC0) != 0x80)
195 return 0;
196 if (((unsigned char)*s++ & 0xC0) != 0x80)
197 return 0;
198
199 return 4;
200 } else { /* F5..FF */
201 return 0;
202 }
203 }
204
205 /* Validate a null-terminated UTF-8 string. */
utf8_validate(const char * s)206 static bool utf8_validate(const char *s)
207 {
208 int len;
209
210 for (; *s != 0; s += len) {
211 len = utf8_validate_cz(s);
212 if (len == 0)
213 return false;
214 }
215
216 return true;
217 }
218
219 /*
220 * Read a single UTF-8 character starting at @s,
221 * returning the length, in bytes, of the character read.
222 *
223 * This function assumes input is valid UTF-8,
224 * and that there are enough characters in front of @s.
225 */
utf8_read_char(const char * s,uchar_t * out)226 static int utf8_read_char(const char *s, uchar_t *out)
227 {
228 const unsigned char *c = (const unsigned char*) s;
229
230 assert(utf8_validate_cz(s));
231
232 if (c[0] <= 0x7F) {
233 /* 00..7F */
234 *out = c[0];
235 return 1;
236 } else if (c[0] <= 0xDF) {
237 /* C2..DF (unless input is invalid) */
238 *out = ((uchar_t)c[0] & 0x1F) << 6 |
239 ((uchar_t)c[1] & 0x3F);
240 return 2;
241 } else if (c[0] <= 0xEF) {
242 /* E0..EF */
243 *out = ((uchar_t)c[0] & 0xF) << 12 |
244 ((uchar_t)c[1] & 0x3F) << 6 |
245 ((uchar_t)c[2] & 0x3F);
246 return 3;
247 } else {
248 /* F0..F4 (unless input is invalid) */
249 *out = ((uchar_t)c[0] & 0x7) << 18 |
250 ((uchar_t)c[1] & 0x3F) << 12 |
251 ((uchar_t)c[2] & 0x3F) << 6 |
252 ((uchar_t)c[3] & 0x3F);
253 return 4;
254 }
255 }
256
257 /*
258 * Write a single UTF-8 character to @s,
259 * returning the length, in bytes, of the character written.
260 *
261 * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
262 *
263 * This function will write up to 4 bytes to @out.
264 */
utf8_write_char(uchar_t unicode,char * out)265 static int utf8_write_char(uchar_t unicode, char *out)
266 {
267 unsigned char *o = (unsigned char*) out;
268
269 assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
270
271 if (unicode <= 0x7F) {
272 /* U+0000..U+007F */
273 *o++ = unicode;
274 return 1;
275 } else if (unicode <= 0x7FF) {
276 /* U+0080..U+07FF */
277 *o++ = 0xC0 | unicode >> 6;
278 *o++ = 0x80 | (unicode & 0x3F);
279 return 2;
280 } else if (unicode <= 0xFFFF) {
281 /* U+0800..U+FFFF */
282 *o++ = 0xE0 | unicode >> 12;
283 *o++ = 0x80 | (unicode >> 6 & 0x3F);
284 *o++ = 0x80 | (unicode & 0x3F);
285 return 3;
286 } else {
287 /* U+10000..U+10FFFF */
288 *o++ = 0xF0 | unicode >> 18;
289 *o++ = 0x80 | (unicode >> 12 & 0x3F);
290 *o++ = 0x80 | (unicode >> 6 & 0x3F);
291 *o++ = 0x80 | (unicode & 0x3F);
292 return 4;
293 }
294 }
295
296 /*
297 * Compute the Unicode codepoint of a UTF-16 surrogate pair.
298 *
299 * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
300 * If they aren't, this function returns false.
301 */
from_surrogate_pair(uint16_t uc,uint16_t lc,uchar_t * unicode)302 static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode)
303 {
304 if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
305 *unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
306 return true;
307 } else {
308 return false;
309 }
310 }
311
312 /*
313 * Construct a UTF-16 surrogate pair given a Unicode codepoint.
314 *
315 * @unicode must be U+10000..U+10FFFF.
316 */
to_surrogate_pair(uchar_t unicode,uint16_t * uc,uint16_t * lc)317 static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc)
318 {
319 uchar_t n;
320
321 assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
322
323 n = unicode - 0x10000;
324 *uc = ((n >> 10) & 0x3FF) | 0xD800;
325 *lc = (n & 0x3FF) | 0xDC00;
326 }
327
328 #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
329 #define is_digit(c) ((c) >= '0' && (c) <= '9')
330
331 static bool parse_value (const char **sp, JsonNode **out);
332 static bool parse_string (const char **sp, char **out);
333 static bool parse_number (const char **sp, double *out);
334 static bool parse_array (const char **sp, JsonNode **out);
335 static bool parse_object (const char **sp, JsonNode **out);
336 static bool parse_hex16 (const char **sp, uint16_t *out);
337
338 static bool expect_literal (const char **sp, const char *str);
339 static void skip_space (const char **sp);
340
341 static void emit_value (SB *out, const JsonNode *node);
342 static void emit_value_indented (SB *out, const JsonNode *node, const char *space, int indent_level);
343 static void emit_string (SB *out, const char *str);
344 static void emit_number (SB *out, double num);
345 static void emit_array (SB *out, const JsonNode *array);
346 static void emit_array_indented (SB *out, const JsonNode *array, const char *space, int indent_level);
347 static void emit_object (SB *out, const JsonNode *object);
348 static void emit_object_indented (SB *out, const JsonNode *object, const char *space, int indent_level);
349
350 static int write_hex16(char *out, uint16_t val);
351
352 static JsonNode *mknode(JsonTag tag);
353 static void append_node(JsonNode *parent, JsonNode *child);
354 static void prepend_node(JsonNode *parent, JsonNode *child);
355 static void append_member(JsonNode *object, char *key, JsonNode *value);
356
357 /* Assertion-friendly validity checks */
358 static bool tag_is_valid(unsigned int tag);
359 static bool number_is_valid(const char *num);
360
json_decode(const char * json)361 JsonNode *json_decode(const char *json)
362 {
363 const char *s = json;
364 JsonNode *ret;
365
366 skip_space(&s);
367 if (!parse_value(&s, &ret))
368 return NULL;
369
370 skip_space(&s);
371 if (*s != 0) {
372 json_delete(ret);
373 return NULL;
374 }
375
376 return ret;
377 }
378
json_encode(const JsonNode * node)379 char *json_encode(const JsonNode *node)
380 {
381 return json_stringify(node, NULL);
382 }
383
json_encode_string(const char * str)384 char *json_encode_string(const char *str)
385 {
386 SB sb;
387 sb_init(&sb);
388
389 emit_string(&sb, str);
390
391 return sb_finish(&sb);
392 }
393
json_stringify(const JsonNode * node,const char * space)394 char *json_stringify(const JsonNode *node, const char *space)
395 {
396 SB sb;
397 sb_init(&sb);
398
399 if (space != NULL)
400 emit_value_indented(&sb, node, space, 0);
401 else
402 emit_value(&sb, node);
403
404 return sb_finish(&sb);
405 }
406
json_delete(JsonNode * node)407 void json_delete(JsonNode *node)
408 {
409 if (node != NULL) {
410 json_remove_from_parent(node);
411
412 switch (node->tag) {
413 case JSON_STRING:
414 free(node->string_);
415 break;
416 case JSON_ARRAY:
417 case JSON_OBJECT:
418 {
419 JsonNode *child, *next;
420 for (child = node->children.head; child != NULL; child = next) {
421 next = child->next;
422 json_delete(child);
423 }
424 break;
425 }
426 default:;
427 }
428
429 free(node);
430 }
431 }
432
json_validate(const char * json)433 bool json_validate(const char *json)
434 {
435 const char *s = json;
436
437 skip_space(&s);
438 if (!parse_value(&s, NULL))
439 return false;
440
441 skip_space(&s);
442 if (*s != 0)
443 return false;
444
445 return true;
446 }
447
json_find_element(JsonNode * array,int index)448 JsonNode *json_find_element(JsonNode *array, int index)
449 {
450 JsonNode *element;
451 int i = 0;
452
453 if (array == NULL || array->tag != JSON_ARRAY)
454 return NULL;
455
456 json_foreach(element, array) {
457 if (i == index)
458 return element;
459 i++;
460 }
461
462 return NULL;
463 }
464
json_find_member(JsonNode * object,const char * name)465 JsonNode *json_find_member(JsonNode *object, const char *name)
466 {
467 JsonNode *member;
468
469 if (object == NULL || object->tag != JSON_OBJECT)
470 return NULL;
471
472 json_foreach(member, object)
473 if (strcmp(member->key, name) == 0)
474 return member;
475
476 return NULL;
477 }
478
json_first_child(const JsonNode * node)479 JsonNode *json_first_child(const JsonNode *node)
480 {
481 if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
482 return node->children.head;
483 return NULL;
484 }
485
mknode(JsonTag tag)486 static JsonNode *mknode(JsonTag tag)
487 {
488 JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
489 if (ret == NULL)
490 out_of_memory();
491 ret->tag = tag;
492 return ret;
493 }
494
json_mknull(void)495 JsonNode *json_mknull(void)
496 {
497 return mknode(JSON_NULL);
498 }
499
json_mkbool(bool b)500 JsonNode *json_mkbool(bool b)
501 {
502 JsonNode *ret = mknode(JSON_BOOL);
503 ret->bool_ = b;
504 return ret;
505 }
506
mkstring(char * s)507 static JsonNode *mkstring(char *s)
508 {
509 JsonNode *ret = mknode(JSON_STRING);
510 ret->string_ = s;
511 return ret;
512 }
513
json_mkstring(const char * s)514 JsonNode *json_mkstring(const char *s)
515 {
516 return mkstring(json_strdup(s));
517 }
518
json_mknumber(double n)519 JsonNode *json_mknumber(double n)
520 {
521 JsonNode *node = mknode(JSON_NUMBER);
522 node->number_ = n;
523 return node;
524 }
525
json_mkarray(void)526 JsonNode *json_mkarray(void)
527 {
528 return mknode(JSON_ARRAY);
529 }
530
json_mkobject(void)531 JsonNode *json_mkobject(void)
532 {
533 return mknode(JSON_OBJECT);
534 }
535
append_node(JsonNode * parent,JsonNode * child)536 static void append_node(JsonNode *parent, JsonNode *child)
537 {
538 child->parent = parent;
539 child->prev = parent->children.tail;
540 child->next = NULL;
541
542 if (parent->children.tail != NULL)
543 parent->children.tail->next = child;
544 else
545 parent->children.head = child;
546 parent->children.tail = child;
547 }
548
prepend_node(JsonNode * parent,JsonNode * child)549 static void prepend_node(JsonNode *parent, JsonNode *child)
550 {
551 child->parent = parent;
552 child->prev = NULL;
553 child->next = parent->children.head;
554
555 if (parent->children.head != NULL)
556 parent->children.head->prev = child;
557 else
558 parent->children.tail = child;
559 parent->children.head = child;
560 }
561
append_member(JsonNode * object,char * key,JsonNode * value)562 static void append_member(JsonNode *object, char *key, JsonNode *value)
563 {
564 value->key = key;
565 append_node(object, value);
566 }
567
json_append_element(JsonNode * array,JsonNode * element)568 void json_append_element(JsonNode *array, JsonNode *element)
569 {
570 assert(array->tag == JSON_ARRAY);
571 assert(element->parent == NULL);
572
573 append_node(array, element);
574 }
575
json_prepend_element(JsonNode * array,JsonNode * element)576 void json_prepend_element(JsonNode *array, JsonNode *element)
577 {
578 assert(array->tag == JSON_ARRAY);
579 assert(element->parent == NULL);
580
581 prepend_node(array, element);
582 }
583
json_append_member(JsonNode * object,const char * key,JsonNode * value)584 void json_append_member(JsonNode *object, const char *key, JsonNode *value)
585 {
586 assert(object->tag == JSON_OBJECT);
587 assert(value->parent == NULL);
588
589 append_member(object, json_strdup(key), value);
590 }
591
json_prepend_member(JsonNode * object,const char * key,JsonNode * value)592 void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
593 {
594 assert(object->tag == JSON_OBJECT);
595 assert(value->parent == NULL);
596
597 value->key = json_strdup(key);
598 prepend_node(object, value);
599 }
600
json_remove_from_parent(JsonNode * node)601 void json_remove_from_parent(JsonNode *node)
602 {
603 JsonNode *parent = node->parent;
604
605 if (parent != NULL) {
606 if (node->prev != NULL)
607 node->prev->next = node->next;
608 else
609 parent->children.head = node->next;
610 if (node->next != NULL)
611 node->next->prev = node->prev;
612 else
613 parent->children.tail = node->prev;
614
615 free(node->key);
616
617 node->parent = NULL;
618 node->prev = node->next = NULL;
619 node->key = NULL;
620 }
621 }
622
parse_value(const char ** sp,JsonNode ** out)623 static bool parse_value(const char **sp, JsonNode **out)
624 {
625 const char *s = *sp;
626
627 switch (*s) {
628 case 'n':
629 if (expect_literal(&s, "null")) {
630 if (out)
631 *out = json_mknull();
632 *sp = s;
633 return true;
634 }
635 return false;
636
637 case 'f':
638 if (expect_literal(&s, "false")) {
639 if (out)
640 *out = json_mkbool(false);
641 *sp = s;
642 return true;
643 }
644 return false;
645
646 case 't':
647 if (expect_literal(&s, "true")) {
648 if (out)
649 *out = json_mkbool(true);
650 *sp = s;
651 return true;
652 }
653 return false;
654
655 case '"': {
656 char *str;
657 if (parse_string(&s, out ? &str : NULL)) {
658 if (out)
659 *out = mkstring(str);
660 *sp = s;
661 return true;
662 }
663 return false;
664 }
665
666 case '[':
667 if (parse_array(&s, out)) {
668 *sp = s;
669 return true;
670 }
671 return false;
672
673 case '{':
674 if (parse_object(&s, out)) {
675 *sp = s;
676 return true;
677 }
678 return false;
679
680 default: {
681 double num;
682 if (parse_number(&s, out ? &num : NULL)) {
683 if (out)
684 *out = json_mknumber(num);
685 *sp = s;
686 return true;
687 }
688 return false;
689 }
690 }
691 }
692
parse_array(const char ** sp,JsonNode ** out)693 static bool parse_array(const char **sp, JsonNode **out)
694 {
695 const char *s = *sp;
696 JsonNode *ret = out ? json_mkarray() : NULL;
697 JsonNode *element;
698
699 if (*s++ != '[')
700 goto failure;
701 skip_space(&s);
702
703 if (*s == ']') {
704 s++;
705 goto success;
706 }
707
708 for (;;) {
709 if (!parse_value(&s, out ? &element : NULL))
710 goto failure;
711 skip_space(&s);
712
713 if (out)
714 json_append_element(ret, element);
715
716 if (*s == ']') {
717 s++;
718 goto success;
719 }
720
721 if (*s++ != ',')
722 goto failure;
723 skip_space(&s);
724 }
725
726 success:
727 *sp = s;
728 if (out)
729 *out = ret;
730 return true;
731
732 failure:
733 json_delete(ret);
734 return false;
735 }
736
parse_object(const char ** sp,JsonNode ** out)737 static bool parse_object(const char **sp, JsonNode **out)
738 {
739 const char *s = *sp;
740 JsonNode *ret = out ? json_mkobject() : NULL;
741 char *key;
742 JsonNode *value;
743
744 if (*s++ != '{')
745 goto failure;
746 skip_space(&s);
747
748 if (*s == '}') {
749 s++;
750 goto success;
751 }
752
753 for (;;) {
754 if (!parse_string(&s, out ? &key : NULL))
755 goto failure;
756 skip_space(&s);
757
758 if (*s++ != ':')
759 goto failure_free_key;
760 skip_space(&s);
761
762 if (!parse_value(&s, out ? &value : NULL))
763 goto failure_free_key;
764 skip_space(&s);
765
766 if (out)
767 append_member(ret, key, value);
768
769 if (*s == '}') {
770 s++;
771 goto success;
772 }
773
774 if (*s++ != ',')
775 goto failure;
776 skip_space(&s);
777 }
778
779 success:
780 *sp = s;
781 if (out)
782 *out = ret;
783 return true;
784
785 failure_free_key:
786 if (out)
787 free(key);
788 failure:
789 json_delete(ret);
790 return false;
791 }
792
parse_string(const char ** sp,char ** out)793 bool parse_string(const char **sp, char **out)
794 {
795 const char *s = *sp;
796 SB sb;
797 char throwaway_buffer[4];
798 /* enough space for a UTF-8 character */
799 char *b;
800
801 if (*s++ != '"')
802 return false;
803
804 if (out) {
805 sb_init(&sb);
806 sb_need(&sb, 4);
807 b = sb.cur;
808 } else {
809 b = throwaway_buffer;
810 }
811
812 while (*s != '"') {
813 unsigned char c = *s++;
814
815 /* Parse next character, and write it to b. */
816 if (c == '\\') {
817 c = *s++;
818 switch (c) {
819 case '"':
820 case '\\':
821 case '/':
822 *b++ = c;
823 break;
824 case 'b':
825 *b++ = '\b';
826 break;
827 case 'f':
828 *b++ = '\f';
829 break;
830 case 'n':
831 *b++ = '\n';
832 break;
833 case 'r':
834 *b++ = '\r';
835 break;
836 case 't':
837 *b++ = '\t';
838 break;
839 case 'u':
840 {
841 uint16_t uc, lc;
842 uchar_t unicode;
843
844 if (!parse_hex16(&s, &uc))
845 goto failed;
846
847 if (uc >= 0xD800 && uc <= 0xDFFF) {
848 /* Handle UTF-16 surrogate pair. */
849 if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
850 goto failed; /* Incomplete surrogate pair. */
851 if (!from_surrogate_pair(uc, lc, &unicode))
852 goto failed; /* Invalid surrogate pair. */
853 } else if (uc == 0) {
854 /* Disallow "\u0000". */
855 goto failed;
856 } else {
857 unicode = uc;
858 }
859
860 b += utf8_write_char(unicode, b);
861 break;
862 }
863 default:
864 /* Invalid escape */
865 goto failed;
866 }
867 } else if (c <= 0x1F) {
868 /* Control characters are not allowed in string literals. */
869 goto failed;
870 } else {
871 /* Validate and echo a UTF-8 character. */
872 int len;
873
874 s--;
875 len = utf8_validate_cz(s);
876 if (len == 0)
877 goto failed; /* Invalid UTF-8 character. */
878
879 while (len--)
880 *b++ = *s++;
881 }
882
883 /*
884 * Update sb to know about the new bytes,
885 * and set up b to write another character.
886 */
887 if (out) {
888 sb.cur = b;
889 sb_need(&sb, 4);
890 b = sb.cur;
891 } else {
892 b = throwaway_buffer;
893 }
894 }
895 s++;
896
897 if (out)
898 *out = sb_finish(&sb);
899 *sp = s;
900 return true;
901
902 failed:
903 if (out)
904 sb_free(&sb);
905 return false;
906 }
907
908 /*
909 * The JSON spec says that a number shall follow this precise pattern
910 * (spaces and quotes added for readability):
911 * '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
912 *
913 * However, some JSON parsers are more liberal. For instance, PHP accepts
914 * '.5' and '1.'. JSON.parse accepts '+3'.
915 *
916 * This function takes the strict approach.
917 */
parse_number(const char ** sp,double * out)918 bool parse_number(const char **sp, double *out)
919 {
920 const char *s = *sp;
921
922 /* '-'? */
923 if (*s == '-')
924 s++;
925
926 /* (0 | [1-9][0-9]*) */
927 if (*s == '0') {
928 s++;
929 } else {
930 if (!is_digit(*s))
931 return false;
932 do {
933 s++;
934 } while (is_digit(*s));
935 }
936
937 /* ('.' [0-9]+)? */
938 if (*s == '.') {
939 s++;
940 if (!is_digit(*s))
941 return false;
942 do {
943 s++;
944 } while (is_digit(*s));
945 }
946
947 /* ([Ee] [+-]? [0-9]+)? */
948 if (*s == 'E' || *s == 'e') {
949 s++;
950 if (*s == '+' || *s == '-')
951 s++;
952 if (!is_digit(*s))
953 return false;
954 do {
955 s++;
956 } while (is_digit(*s));
957 }
958
959 if (out)
960 *out = strtod(*sp, NULL);
961
962 *sp = s;
963 return true;
964 }
965
skip_space(const char ** sp)966 static void skip_space(const char **sp)
967 {
968 const char *s = *sp;
969 while (is_space(*s))
970 s++;
971 *sp = s;
972 }
973
emit_value(SB * out,const JsonNode * node)974 static void emit_value(SB *out, const JsonNode *node)
975 {
976 assert(tag_is_valid(node->tag));
977 switch (node->tag) {
978 case JSON_NULL:
979 sb_puts(out, "null");
980 break;
981 case JSON_BOOL:
982 sb_puts(out, node->bool_ ? "true" : "false");
983 break;
984 case JSON_STRING:
985 emit_string(out, node->string_);
986 break;
987 case JSON_NUMBER:
988 emit_number(out, node->number_);
989 break;
990 case JSON_ARRAY:
991 emit_array(out, node);
992 break;
993 case JSON_OBJECT:
994 emit_object(out, node);
995 break;
996 default:
997 assert(false);
998 }
999 }
1000
emit_value_indented(SB * out,const JsonNode * node,const char * space,int indent_level)1001 void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1002 {
1003 assert(tag_is_valid(node->tag));
1004 switch (node->tag) {
1005 case JSON_NULL:
1006 sb_puts(out, "null");
1007 break;
1008 case JSON_BOOL:
1009 sb_puts(out, node->bool_ ? "true" : "false");
1010 break;
1011 case JSON_STRING:
1012 emit_string(out, node->string_);
1013 break;
1014 case JSON_NUMBER:
1015 emit_number(out, node->number_);
1016 break;
1017 case JSON_ARRAY:
1018 emit_array_indented(out, node, space, indent_level);
1019 break;
1020 case JSON_OBJECT:
1021 emit_object_indented(out, node, space, indent_level);
1022 break;
1023 default:
1024 assert(false);
1025 }
1026 }
1027
emit_array(SB * out,const JsonNode * array)1028 static void emit_array(SB *out, const JsonNode *array)
1029 {
1030 const JsonNode *element;
1031
1032 sb_putc(out, '[');
1033 json_foreach(element, array) {
1034 emit_value(out, element);
1035 if (element->next != NULL)
1036 sb_putc(out, ',');
1037 }
1038 sb_putc(out, ']');
1039 }
1040
emit_array_indented(SB * out,const JsonNode * array,const char * space,int indent_level)1041 static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1042 {
1043 const JsonNode *element = array->children.head;
1044 int i;
1045
1046 if (element == NULL) {
1047 sb_puts(out, "[]");
1048 return;
1049 }
1050
1051 sb_puts(out, "[\n");
1052 while (element != NULL) {
1053 for (i = 0; i < indent_level + 1; i++)
1054 sb_puts(out, space);
1055 emit_value_indented(out, element, space, indent_level + 1);
1056
1057 element = element->next;
1058 sb_puts(out, element != NULL ? ",\n" : "\n");
1059 }
1060 for (i = 0; i < indent_level; i++)
1061 sb_puts(out, space);
1062 sb_putc(out, ']');
1063 }
1064
emit_object(SB * out,const JsonNode * object)1065 static void emit_object(SB *out, const JsonNode *object)
1066 {
1067 const JsonNode *member;
1068
1069 sb_putc(out, '{');
1070 json_foreach(member, object) {
1071 emit_string(out, member->key);
1072 sb_putc(out, ':');
1073 emit_value(out, member);
1074 if (member->next != NULL)
1075 sb_putc(out, ',');
1076 }
1077 sb_putc(out, '}');
1078 }
1079
emit_object_indented(SB * out,const JsonNode * object,const char * space,int indent_level)1080 static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1081 {
1082 const JsonNode *member = object->children.head;
1083 int i;
1084
1085 if (member == NULL) {
1086 sb_puts(out, "{}");
1087 return;
1088 }
1089
1090 sb_puts(out, "{\n");
1091 while (member != NULL) {
1092 for (i = 0; i < indent_level + 1; i++)
1093 sb_puts(out, space);
1094 emit_string(out, member->key);
1095 sb_puts(out, ": ");
1096 emit_value_indented(out, member, space, indent_level + 1);
1097
1098 member = member->next;
1099 sb_puts(out, member != NULL ? ",\n" : "\n");
1100 }
1101 for (i = 0; i < indent_level; i++)
1102 sb_puts(out, space);
1103 sb_putc(out, '}');
1104 }
1105
emit_string(SB * out,const char * str)1106 void emit_string(SB *out, const char *str)
1107 {
1108 bool escape_unicode = false;
1109 const char *s = str;
1110 char *b;
1111
1112 assert(utf8_validate(str));
1113
1114 /*
1115 * 14 bytes is enough space to write up to two
1116 * \uXXXX escapes and two quotation marks.
1117 */
1118 sb_need(out, 14);
1119 b = out->cur;
1120
1121 *b++ = '"';
1122 while (*s != 0) {
1123 unsigned char c = *s++;
1124
1125 /* Encode the next character, and write it to b. */
1126 switch (c) {
1127 case '"':
1128 *b++ = '\\';
1129 *b++ = '"';
1130 break;
1131 case '\\':
1132 *b++ = '\\';
1133 *b++ = '\\';
1134 break;
1135 case '\b':
1136 *b++ = '\\';
1137 *b++ = 'b';
1138 break;
1139 case '\f':
1140 *b++ = '\\';
1141 *b++ = 'f';
1142 break;
1143 case '\n':
1144 *b++ = '\\';
1145 *b++ = 'n';
1146 break;
1147 case '\r':
1148 *b++ = '\\';
1149 *b++ = 'r';
1150 break;
1151 case '\t':
1152 *b++ = '\\';
1153 *b++ = 't';
1154 break;
1155 default: {
1156 int len;
1157
1158 s--;
1159 len = utf8_validate_cz(s);
1160
1161 if (len == 0) {
1162 /*
1163 * Handle invalid UTF-8 character gracefully in production
1164 * by writing a replacement character (U+FFFD)
1165 * and skipping a single byte.
1166 *
1167 * This should never happen when assertions are enabled
1168 * due to the assertion at the beginning of this function.
1169 */
1170 assert(false);
1171 if (escape_unicode) {
1172 strcpy(b, "\\uFFFD");
1173 b += 6;
1174 } else {
1175 *b++ = 0xEF;
1176 *b++ = 0xBF;
1177 *b++ = 0xBD;
1178 }
1179 s++;
1180 } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) {
1181 /* Encode using \u.... */
1182 uint32_t unicode;
1183
1184 s += utf8_read_char(s, &unicode);
1185
1186 if (unicode <= 0xFFFF) {
1187 *b++ = '\\';
1188 *b++ = 'u';
1189 b += write_hex16(b, unicode);
1190 } else {
1191 /* Produce a surrogate pair. */
1192 uint16_t uc, lc;
1193 assert(unicode <= 0x10FFFF);
1194 to_surrogate_pair(unicode, &uc, &lc);
1195 *b++ = '\\';
1196 *b++ = 'u';
1197 b += write_hex16(b, uc);
1198 *b++ = '\\';
1199 *b++ = 'u';
1200 b += write_hex16(b, lc);
1201 }
1202 } else {
1203 /* Write the character directly. */
1204 while (len--)
1205 *b++ = *s++;
1206 }
1207
1208 break;
1209 }
1210 }
1211
1212 /*
1213 * Update *out to know about the new bytes,
1214 * and set up b to write another encoded character.
1215 */
1216 out->cur = b;
1217 sb_need(out, 14);
1218 b = out->cur;
1219 }
1220 *b++ = '"';
1221
1222 out->cur = b;
1223 }
1224
emit_number(SB * out,double num)1225 static void emit_number(SB *out, double num)
1226 {
1227 /*
1228 * This isn't exactly how JavaScript renders numbers,
1229 * but it should produce valid JSON for reasonable numbers
1230 * preserve precision well enough, and avoid some oddities
1231 * like 0.3 -> 0.299999999999999988898 .
1232 */
1233 char buf[64];
1234 sprintf(buf, "%g", num);
1235
1236 if (number_is_valid(buf))
1237 sb_puts(out, buf);
1238 else
1239 sb_puts(out, "null");
1240 }
1241
tag_is_valid(unsigned int tag)1242 static bool tag_is_valid(unsigned int tag)
1243 {
1244 return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1245 }
1246
number_is_valid(const char * num)1247 static bool number_is_valid(const char *num)
1248 {
1249 return (parse_number(&num, NULL) && *num == '\0');
1250 }
1251
expect_literal(const char ** sp,const char * str)1252 static bool expect_literal(const char **sp, const char *str)
1253 {
1254 const char *s = *sp;
1255
1256 while (*str != '\0')
1257 if (*s++ != *str++)
1258 return false;
1259
1260 *sp = s;
1261 return true;
1262 }
1263
1264 /*
1265 * Parses exactly 4 hex characters (capital or lowercase).
1266 * Fails if any input chars are not [0-9A-Fa-f].
1267 */
parse_hex16(const char ** sp,uint16_t * out)1268 static bool parse_hex16(const char **sp, uint16_t *out)
1269 {
1270 const char *s = *sp;
1271 uint16_t ret = 0;
1272 uint16_t i;
1273 uint16_t tmp;
1274 char c;
1275
1276 for (i = 0; i < 4; i++) {
1277 c = *s++;
1278 if (c >= '0' && c <= '9')
1279 tmp = c - '0';
1280 else if (c >= 'A' && c <= 'F')
1281 tmp = c - 'A' + 10;
1282 else if (c >= 'a' && c <= 'f')
1283 tmp = c - 'a' + 10;
1284 else
1285 return false;
1286
1287 ret <<= 4;
1288 ret += tmp;
1289 }
1290
1291 if (out)
1292 *out = ret;
1293 *sp = s;
1294 return true;
1295 }
1296
1297 /*
1298 * Encodes a 16-bit number into hexadecimal,
1299 * writing exactly 4 hex chars.
1300 */
write_hex16(char * out,uint16_t val)1301 static int write_hex16(char *out, uint16_t val)
1302 {
1303 const char *hex = "0123456789ABCDEF";
1304
1305 *out++ = hex[(val >> 12) & 0xF];
1306 *out++ = hex[(val >> 8) & 0xF];
1307 *out++ = hex[(val >> 4) & 0xF];
1308 *out++ = hex[ val & 0xF];
1309
1310 return 4;
1311 }
1312
json_check(const JsonNode * node,char errmsg[256])1313 bool json_check(const JsonNode *node, char errmsg[256])
1314 {
1315 #define problem(...) do { \
1316 if (errmsg != NULL) \
1317 snprintf(errmsg, 256, __VA_ARGS__); \
1318 return false; \
1319 } while (0)
1320
1321 if (node->key != NULL && !utf8_validate(node->key))
1322 problem("key contains invalid UTF-8");
1323
1324 if (!tag_is_valid(node->tag))
1325 problem("tag is invalid (%u)", node->tag);
1326
1327 if (node->tag == JSON_BOOL) {
1328 if (node->bool_ != false && node->bool_ != true)
1329 problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
1330 } else if (node->tag == JSON_STRING) {
1331 if (node->string_ == NULL)
1332 problem("string_ is NULL");
1333 if (!utf8_validate(node->string_))
1334 problem("string_ contains invalid UTF-8");
1335 } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1336 JsonNode *head = node->children.head;
1337 JsonNode *tail = node->children.tail;
1338
1339 if (head == NULL || tail == NULL) {
1340 if (head != NULL)
1341 problem("tail is NULL, but head is not");
1342 if (tail != NULL)
1343 problem("head is NULL, but tail is not");
1344 } else {
1345 JsonNode *child;
1346 JsonNode *last = NULL;
1347
1348 if (head->prev != NULL)
1349 problem("First child's prev pointer is not NULL");
1350
1351 for (child = head; child != NULL; last = child, child = child->next) {
1352 if (child == node)
1353 problem("node is its own child");
1354 if (child->next == child)
1355 problem("child->next == child (cycle)");
1356 if (child->next == head)
1357 problem("child->next == head (cycle)");
1358
1359 if (child->parent != node)
1360 problem("child does not point back to parent");
1361 if (child->next != NULL && child->next->prev != child)
1362 problem("child->next does not point back to child");
1363
1364 if (node->tag == JSON_ARRAY && child->key != NULL)
1365 problem("Array element's key is not NULL");
1366 if (node->tag == JSON_OBJECT && child->key == NULL)
1367 problem("Object member's key is NULL");
1368
1369 if (!json_check(child, errmsg))
1370 return false;
1371 }
1372
1373 if (last != tail)
1374 problem("tail does not match pointer found by starting at head and following next links");
1375 }
1376 }
1377
1378 return true;
1379
1380 #undef problem
1381 }
1382