1 /*
2 * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
3 *
4 * Jansson is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
6 */
7
8 #ifndef _GNU_SOURCE
9 #define _GNU_SOURCE
10 #endif
11
12 #include <errno.h>
13 #include <limits.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <assert.h>
18
19 #include "jansson.h"
20 #include "jansson_private.h"
21 #include "strbuffer.h"
22 #include "utf.h"
23
24 #define STREAM_STATE_OK 0
25 #define STREAM_STATE_EOF -1
26 #define STREAM_STATE_ERROR -2
27
28 #define TOKEN_INVALID -1
29 #define TOKEN_EOF 0
30 #define TOKEN_STRING 256
31 #define TOKEN_INTEGER 257
32 #define TOKEN_REAL 258
33 #define TOKEN_TRUE 259
34 #define TOKEN_FALSE 260
35 #define TOKEN_NULL 261
36
37 /* Locale independent versions of isxxx() functions */
38 #define l_isupper(c) ('A' <= (c) && (c) <= 'Z')
39 #define l_islower(c) ('a' <= (c) && (c) <= 'z')
40 #define l_isalpha(c) (l_isupper(c) || l_islower(c))
41 #define l_isdigit(c) ('0' <= (c) && (c) <= '9')
42 #define l_isxdigit(c) \
43 (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
44
45 /* Read one byte from stream, convert to unsigned char, then int, and
46 return. return EOF on end of file. This corresponds to the
47 behaviour of fgetc(). */
48 typedef int (*get_func)(void *data);
49
50 typedef struct {
51 get_func get;
52 void *data;
53 char buffer[5];
54 size_t buffer_pos;
55 int state;
56 int line;
57 int column, last_column;
58 size_t position;
59 } stream_t;
60
61 typedef struct {
62 stream_t stream;
63 strbuffer_t saved_text;
64 size_t flags;
65 size_t depth;
66 int token;
67 union {
68 struct {
69 char *val;
70 size_t len;
71 } string;
72 json_int_t integer;
73 double real;
74 } value;
75 } lex_t;
76
77 #define stream_to_lex(stream) container_of(stream, lex_t, stream)
78
79
80 /*** error reporting ***/
81
error_set(json_error_t * error,const lex_t * lex,const char * msg,...)82 static void error_set(json_error_t *error, const lex_t *lex,
83 const char *msg, ...)
84 {
85 va_list ap;
86 char msg_text[JSON_ERROR_TEXT_LENGTH];
87 char msg_with_context[JSON_ERROR_TEXT_LENGTH];
88
89 int line = -1, col = -1;
90 size_t pos = 0;
91 const char *result = msg_text;
92
93 if(!error)
94 return;
95
96 va_start(ap, msg);
97 vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap);
98 msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
99 va_end(ap);
100
101 if(lex)
102 {
103 const char *saved_text = strbuffer_value(&lex->saved_text);
104
105 line = lex->stream.line;
106 col = lex->stream.column;
107 pos = lex->stream.position;
108
109 if(saved_text && saved_text[0])
110 {
111 if(lex->saved_text.length <= 20) {
112 snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
113 "%s near '%s'", msg_text, saved_text);
114 msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
115 result = msg_with_context;
116 }
117 }
118 else
119 {
120 if(lex->stream.state == STREAM_STATE_ERROR) {
121 /* No context for UTF-8 decoding errors */
122 result = msg_text;
123 }
124 else {
125 snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
126 "%s near end of file", msg_text);
127 msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
128 result = msg_with_context;
129 }
130 }
131 }
132
133 jsonp_error_set(error, line, col, pos, "%s", result);
134 }
135
136
137 /*** lexical analyzer ***/
138
139 static void
stream_init(stream_t * stream,get_func get,void * data)140 stream_init(stream_t *stream, get_func get, void *data)
141 {
142 stream->get = get;
143 stream->data = data;
144 stream->buffer[0] = '\0';
145 stream->buffer_pos = 0;
146
147 stream->state = STREAM_STATE_OK;
148 stream->line = 1;
149 stream->column = 0;
150 stream->position = 0;
151 }
152
stream_get(stream_t * stream,json_error_t * error)153 static int stream_get(stream_t *stream, json_error_t *error)
154 {
155 int c;
156
157 if(stream->state != STREAM_STATE_OK)
158 return stream->state;
159
160 if(!stream->buffer[stream->buffer_pos])
161 {
162 c = stream->get(stream->data);
163 if(c == EOF) {
164 stream->state = STREAM_STATE_EOF;
165 return STREAM_STATE_EOF;
166 }
167
168 stream->buffer[0] = c;
169 stream->buffer_pos = 0;
170
171 if(0x80 <= c && c <= 0xFF)
172 {
173 /* multi-byte UTF-8 sequence */
174 size_t i, count;
175
176 count = utf8_check_first(c);
177 if(!count)
178 goto out;
179
180 assert(count >= 2);
181
182 for(i = 1; i < count; i++)
183 stream->buffer[i] = stream->get(stream->data);
184
185 if(!utf8_check_full(stream->buffer, count, NULL))
186 goto out;
187
188 stream->buffer[count] = '\0';
189 }
190 else
191 stream->buffer[1] = '\0';
192 }
193
194 c = stream->buffer[stream->buffer_pos++];
195
196 stream->position++;
197 if(c == '\n') {
198 stream->line++;
199 stream->last_column = stream->column;
200 stream->column = 0;
201 }
202 else if(utf8_check_first(c)) {
203 /* track the Unicode character column, so increment only if
204 this is the first character of a UTF-8 sequence */
205 stream->column++;
206 }
207
208 return c;
209
210 out:
211 stream->state = STREAM_STATE_ERROR;
212 error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x", c);
213 return STREAM_STATE_ERROR;
214 }
215
stream_unget(stream_t * stream,int c)216 static void stream_unget(stream_t *stream, int c)
217 {
218 if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR)
219 return;
220
221 stream->position--;
222 if(c == '\n') {
223 stream->line--;
224 stream->column = stream->last_column;
225 }
226 else if(utf8_check_first(c))
227 stream->column--;
228
229 assert(stream->buffer_pos > 0);
230 stream->buffer_pos--;
231 assert(stream->buffer[stream->buffer_pos] == c);
232 }
233
234
lex_get(lex_t * lex,json_error_t * error)235 static int lex_get(lex_t *lex, json_error_t *error)
236 {
237 return stream_get(&lex->stream, error);
238 }
239
lex_save(lex_t * lex,int c)240 static void lex_save(lex_t *lex, int c)
241 {
242 strbuffer_append_byte(&lex->saved_text, c);
243 }
244
lex_get_save(lex_t * lex,json_error_t * error)245 static int lex_get_save(lex_t *lex, json_error_t *error)
246 {
247 int c = stream_get(&lex->stream, error);
248 if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR)
249 lex_save(lex, c);
250 return c;
251 }
252
lex_unget(lex_t * lex,int c)253 static void lex_unget(lex_t *lex, int c)
254 {
255 stream_unget(&lex->stream, c);
256 }
257
lex_unget_unsave(lex_t * lex,int c)258 static void lex_unget_unsave(lex_t *lex, int c)
259 {
260 if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) {
261 /* Since we treat warnings as errors, when assertions are turned
262 * off the "d" variable would be set but never used. Which is
263 * treated as an error by GCC.
264 */
265 #ifndef NDEBUG
266 char d;
267 #endif
268 stream_unget(&lex->stream, c);
269 #ifndef NDEBUG
270 d =
271 #endif
272 strbuffer_pop(&lex->saved_text);
273 assert(c == d);
274 }
275 }
276
lex_save_cached(lex_t * lex)277 static void lex_save_cached(lex_t *lex)
278 {
279 while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')
280 {
281 lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
282 lex->stream.buffer_pos++;
283 lex->stream.position++;
284 }
285 }
286
lex_free_string(lex_t * lex)287 static void lex_free_string(lex_t *lex)
288 {
289 jsonp_free(lex->value.string.val);
290 lex->value.string.val = NULL;
291 lex->value.string.len = 0;
292 }
293
294 /* assumes that str points to 'u' plus at least 4 valid hex digits */
decode_unicode_escape(const char * str)295 static int32_t decode_unicode_escape(const char *str)
296 {
297 int i;
298 int32_t value = 0;
299
300 assert(str[0] == 'u');
301
302 for(i = 1; i <= 4; i++) {
303 char c = str[i];
304 value <<= 4;
305 if(l_isdigit(c))
306 value += c - '0';
307 else if(l_islower(c))
308 value += c - 'a' + 10;
309 else if(l_isupper(c))
310 value += c - 'A' + 10;
311 else
312 return -1;
313 }
314
315 return value;
316 }
317
lex_scan_string(lex_t * lex,json_error_t * error)318 static void lex_scan_string(lex_t *lex, json_error_t *error)
319 {
320 int c;
321 const char *p;
322 char *t;
323 int i;
324
325 lex->value.string.val = NULL;
326 lex->token = TOKEN_INVALID;
327
328 c = lex_get_save(lex, error);
329
330 while(c != '"') {
331 if(c == STREAM_STATE_ERROR)
332 goto out;
333
334 else if(c == STREAM_STATE_EOF) {
335 error_set(error, lex, "premature end of input");
336 goto out;
337 }
338
339 else if(0 <= c && c <= 0x1F) {
340 /* control character */
341 lex_unget_unsave(lex, c);
342 if(c == '\n')
343 error_set(error, lex, "unexpected newline");
344 else
345 error_set(error, lex, "control character 0x%x", c);
346 goto out;
347 }
348
349 else if(c == '\\') {
350 c = lex_get_save(lex, error);
351 if(c == 'u') {
352 c = lex_get_save(lex, error);
353 for(i = 0; i < 4; i++) {
354 if(!l_isxdigit(c)) {
355 error_set(error, lex, "invalid escape");
356 goto out;
357 }
358 c = lex_get_save(lex, error);
359 }
360 }
361 else if(c == '"' || c == '\\' || c == '/' || c == 'b' ||
362 c == 'f' || c == 'n' || c == 'r' || c == 't')
363 c = lex_get_save(lex, error);
364 else {
365 error_set(error, lex, "invalid escape");
366 goto out;
367 }
368 }
369 else
370 c = lex_get_save(lex, error);
371 }
372
373 /* the actual value is at most of the same length as the source
374 string, because:
375 - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
376 - a single \uXXXX escape (length 6) is converted to at most 3 bytes
377 - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
378 are converted to 4 bytes
379 */
380 t = jsonp_malloc(lex->saved_text.length + 1);
381 if(!t) {
382 /* this is not very nice, since TOKEN_INVALID is returned */
383 goto out;
384 }
385 lex->value.string.val = t;
386
387 /* + 1 to skip the " */
388 p = strbuffer_value(&lex->saved_text) + 1;
389
390 while(*p != '"') {
391 if(*p == '\\') {
392 p++;
393 if(*p == 'u') {
394 size_t length;
395 int32_t value;
396
397 value = decode_unicode_escape(p);
398 if(value < 0) {
399 error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1);
400 goto out;
401 }
402 p += 5;
403
404 if(0xD800 <= value && value <= 0xDBFF) {
405 /* surrogate pair */
406 if(*p == '\\' && *(p + 1) == 'u') {
407 int32_t value2 = decode_unicode_escape(++p);
408 if(value2 < 0) {
409 error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1);
410 goto out;
411 }
412 p += 5;
413
414 if(0xDC00 <= value2 && value2 <= 0xDFFF) {
415 /* valid second surrogate */
416 value =
417 ((value - 0xD800) << 10) +
418 (value2 - 0xDC00) +
419 0x10000;
420 }
421 else {
422 /* invalid second surrogate */
423 error_set(error, lex,
424 "invalid Unicode '\\u%04X\\u%04X'",
425 value, value2);
426 goto out;
427 }
428 }
429 else {
430 /* no second surrogate */
431 error_set(error, lex, "invalid Unicode '\\u%04X'",
432 value);
433 goto out;
434 }
435 }
436 else if(0xDC00 <= value && value <= 0xDFFF) {
437 error_set(error, lex, "invalid Unicode '\\u%04X'", value);
438 goto out;
439 }
440
441 if(utf8_encode(value, t, &length))
442 assert(0);
443 t += length;
444 }
445 else {
446 switch(*p) {
447 case '"': case '\\': case '/':
448 *t = *p; break;
449 case 'b': *t = '\b'; break;
450 case 'f': *t = '\f'; break;
451 case 'n': *t = '\n'; break;
452 case 'r': *t = '\r'; break;
453 case 't': *t = '\t'; break;
454 default: assert(0);
455 }
456 t++;
457 p++;
458 }
459 }
460 else
461 *(t++) = *(p++);
462 }
463 *t = '\0';
464 lex->value.string.len = t - lex->value.string.val;
465 lex->token = TOKEN_STRING;
466 return;
467
468 out:
469 lex_free_string(lex);
470 }
471
472 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
473 #if JSON_INTEGER_IS_LONG_LONG
474 #ifdef _MSC_VER /* Microsoft Visual Studio */
475 #define json_strtoint _strtoi64
476 #else
477 #define json_strtoint strtoll
478 #endif
479 #else
480 #define json_strtoint strtol
481 #endif
482 #endif
483
lex_scan_number(lex_t * lex,int c,json_error_t * error)484 static int lex_scan_number(lex_t *lex, int c, json_error_t *error)
485 {
486 const char *saved_text;
487 char *end;
488 double doubleval;
489
490 lex->token = TOKEN_INVALID;
491
492 if(c == '-')
493 c = lex_get_save(lex, error);
494
495 if(c == '0') {
496 c = lex_get_save(lex, error);
497 if(l_isdigit(c)) {
498 lex_unget_unsave(lex, c);
499 goto out;
500 }
501 }
502 else if(l_isdigit(c)) {
503 do
504 c = lex_get_save(lex, error);
505 while(l_isdigit(c));
506 }
507 else {
508 lex_unget_unsave(lex, c);
509 goto out;
510 }
511
512 if(!(lex->flags & JSON_DECODE_INT_AS_REAL) &&
513 c != '.' && c != 'E' && c != 'e')
514 {
515 json_int_t intval;
516
517 lex_unget_unsave(lex, c);
518
519 saved_text = strbuffer_value(&lex->saved_text);
520
521 errno = 0;
522 intval = json_strtoint(saved_text, &end, 10);
523 if(errno == ERANGE) {
524 if(intval < 0)
525 error_set(error, lex, "too big negative integer");
526 else
527 error_set(error, lex, "too big integer");
528 goto out;
529 }
530
531 assert(end == saved_text + lex->saved_text.length);
532
533 lex->token = TOKEN_INTEGER;
534 lex->value.integer = intval;
535 return 0;
536 }
537
538 if(c == '.') {
539 c = lex_get(lex, error);
540 if(!l_isdigit(c)) {
541 lex_unget(lex, c);
542 goto out;
543 }
544 lex_save(lex, c);
545
546 do
547 c = lex_get_save(lex, error);
548 while(l_isdigit(c));
549 }
550
551 if(c == 'E' || c == 'e') {
552 c = lex_get_save(lex, error);
553 if(c == '+' || c == '-')
554 c = lex_get_save(lex, error);
555
556 if(!l_isdigit(c)) {
557 lex_unget_unsave(lex, c);
558 goto out;
559 }
560
561 do
562 c = lex_get_save(lex, error);
563 while(l_isdigit(c));
564 }
565
566 lex_unget_unsave(lex, c);
567
568 if(jsonp_strtod(&lex->saved_text, &doubleval)) {
569 error_set(error, lex, "real number overflow");
570 goto out;
571 }
572
573 lex->token = TOKEN_REAL;
574 lex->value.real = doubleval;
575 return 0;
576
577 out:
578 return -1;
579 }
580
lex_scan(lex_t * lex,json_error_t * error)581 static int lex_scan(lex_t *lex, json_error_t *error)
582 {
583 int c;
584
585 strbuffer_clear(&lex->saved_text);
586
587 if(lex->token == TOKEN_STRING)
588 lex_free_string(lex);
589
590 do
591 c = lex_get(lex, error);
592 while(c == ' ' || c == '\t' || c == '\n' || c == '\r');
593
594 if(c == STREAM_STATE_EOF) {
595 lex->token = TOKEN_EOF;
596 goto out;
597 }
598
599 if(c == STREAM_STATE_ERROR) {
600 lex->token = TOKEN_INVALID;
601 goto out;
602 }
603
604 lex_save(lex, c);
605
606 if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
607 lex->token = c;
608
609 else if(c == '"')
610 lex_scan_string(lex, error);
611
612 else if(l_isdigit(c) || c == '-') {
613 if(lex_scan_number(lex, c, error))
614 goto out;
615 }
616
617 else if(l_isalpha(c)) {
618 /* eat up the whole identifier for clearer error messages */
619 const char *saved_text;
620
621 do
622 c = lex_get_save(lex, error);
623 while(l_isalpha(c));
624 lex_unget_unsave(lex, c);
625
626 saved_text = strbuffer_value(&lex->saved_text);
627
628 if(strcmp(saved_text, "true") == 0)
629 lex->token = TOKEN_TRUE;
630 else if(strcmp(saved_text, "false") == 0)
631 lex->token = TOKEN_FALSE;
632 else if(strcmp(saved_text, "null") == 0)
633 lex->token = TOKEN_NULL;
634 else
635 lex->token = TOKEN_INVALID;
636 }
637
638 else {
639 /* save the rest of the input UTF-8 sequence to get an error
640 message of valid UTF-8 */
641 lex_save_cached(lex);
642 lex->token = TOKEN_INVALID;
643 }
644
645 out:
646 return lex->token;
647 }
648
lex_steal_string(lex_t * lex,size_t * out_len)649 static char *lex_steal_string(lex_t *lex, size_t *out_len)
650 {
651 char *result = NULL;
652 if(lex->token == TOKEN_STRING) {
653 result = lex->value.string.val;
654 *out_len = lex->value.string.len;
655 lex->value.string.val = NULL;
656 lex->value.string.len = 0;
657 }
658 return result;
659 }
660
lex_init(lex_t * lex,get_func get,size_t flags,void * data)661 static int lex_init(lex_t *lex, get_func get, size_t flags, void *data)
662 {
663 stream_init(&lex->stream, get, data);
664 if(strbuffer_init(&lex->saved_text))
665 return -1;
666
667 lex->flags = flags;
668 lex->token = TOKEN_INVALID;
669 return 0;
670 }
671
lex_close(lex_t * lex)672 static void lex_close(lex_t *lex)
673 {
674 if(lex->token == TOKEN_STRING)
675 lex_free_string(lex);
676 strbuffer_close(&lex->saved_text);
677 }
678
679
680 /*** parser ***/
681
682 static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error);
683
parse_object(lex_t * lex,size_t flags,json_error_t * error)684 static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error)
685 {
686 json_t *object = json_object();
687 if(!object)
688 return NULL;
689
690 lex_scan(lex, error);
691 if(lex->token == '}')
692 return object;
693
694 while(1) {
695 char *key;
696 size_t len;
697 json_t *value;
698
699 if(lex->token != TOKEN_STRING) {
700 error_set(error, lex, "string or '}' expected");
701 goto error;
702 }
703
704 key = lex_steal_string(lex, &len);
705 if(!key)
706 return NULL;
707 if (memchr(key, '\0', len)) {
708 jsonp_free(key);
709 error_set(error, lex, "NUL byte in object key not supported");
710 goto error;
711 }
712
713 if(flags & JSON_REJECT_DUPLICATES) {
714 if(json_object_get(object, key)) {
715 jsonp_free(key);
716 error_set(error, lex, "duplicate object key");
717 goto error;
718 }
719 }
720
721 lex_scan(lex, error);
722 if(lex->token != ':') {
723 jsonp_free(key);
724 error_set(error, lex, "':' expected");
725 goto error;
726 }
727
728 lex_scan(lex, error);
729 value = parse_value(lex, flags, error);
730 if(!value) {
731 jsonp_free(key);
732 goto error;
733 }
734
735 if(json_object_set_nocheck(object, key, value)) {
736 jsonp_free(key);
737 json_decref(value);
738 goto error;
739 }
740
741 json_decref(value);
742 jsonp_free(key);
743
744 lex_scan(lex, error);
745 if(lex->token != ',')
746 break;
747
748 lex_scan(lex, error);
749 }
750
751 if(lex->token != '}') {
752 error_set(error, lex, "'}' expected");
753 goto error;
754 }
755
756 return object;
757
758 error:
759 json_decref(object);
760 return NULL;
761 }
762
parse_array(lex_t * lex,size_t flags,json_error_t * error)763 static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error)
764 {
765 json_t *array = json_array();
766 if(!array)
767 return NULL;
768
769 lex_scan(lex, error);
770 if(lex->token == ']')
771 return array;
772
773 while(lex->token) {
774 json_t *elem = parse_value(lex, flags, error);
775 if(!elem)
776 goto error;
777
778 if(json_array_append(array, elem)) {
779 json_decref(elem);
780 goto error;
781 }
782 json_decref(elem);
783
784 lex_scan(lex, error);
785 if(lex->token != ',')
786 break;
787
788 lex_scan(lex, error);
789 }
790
791 if(lex->token != ']') {
792 error_set(error, lex, "']' expected");
793 goto error;
794 }
795
796 return array;
797
798 error:
799 json_decref(array);
800 return NULL;
801 }
802
parse_value(lex_t * lex,size_t flags,json_error_t * error)803 static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error)
804 {
805 json_t *json;
806
807 lex->depth++;
808 if(lex->depth > JSON_PARSER_MAX_DEPTH) {
809 error_set(error, lex, "maximum parsing depth reached");
810 return NULL;
811 }
812
813 switch(lex->token) {
814 case TOKEN_STRING: {
815 const char *value = lex->value.string.val;
816 size_t len = lex->value.string.len;
817
818 if(!(flags & JSON_ALLOW_NUL)) {
819 if(memchr(value, '\0', len)) {
820 error_set(error, lex, "\\u0000 is not allowed without JSON_ALLOW_NUL");
821 return NULL;
822 }
823 }
824
825 json = jsonp_stringn_nocheck_own(value, len);
826 if(json) {
827 lex->value.string.val = NULL;
828 lex->value.string.len = 0;
829 }
830 break;
831 }
832
833 case TOKEN_INTEGER: {
834 json = json_integer(lex->value.integer);
835 break;
836 }
837
838 case TOKEN_REAL: {
839 json = json_real(lex->value.real);
840 break;
841 }
842
843 case TOKEN_TRUE:
844 json = json_true();
845 break;
846
847 case TOKEN_FALSE:
848 json = json_false();
849 break;
850
851 case TOKEN_NULL:
852 json = json_null();
853 break;
854
855 case '{':
856 json = parse_object(lex, flags, error);
857 break;
858
859 case '[':
860 json = parse_array(lex, flags, error);
861 break;
862
863 case TOKEN_INVALID:
864 error_set(error, lex, "invalid token");
865 return NULL;
866
867 default:
868 error_set(error, lex, "unexpected token");
869 return NULL;
870 }
871
872 if(!json)
873 return NULL;
874
875 lex->depth--;
876 return json;
877 }
878
parse_json(lex_t * lex,size_t flags,json_error_t * error)879 static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error)
880 {
881 json_t *result;
882
883 lex->depth = 0;
884
885 lex_scan(lex, error);
886 if(!(flags & JSON_DECODE_ANY)) {
887 if(lex->token != '[' && lex->token != '{') {
888 error_set(error, lex, "'[' or '{' expected");
889 return NULL;
890 }
891 }
892
893 result = parse_value(lex, flags, error);
894 if(!result)
895 return NULL;
896
897 if(!(flags & JSON_DISABLE_EOF_CHECK)) {
898 lex_scan(lex, error);
899 if(lex->token != TOKEN_EOF) {
900 error_set(error, lex, "end of file expected");
901 json_decref(result);
902 return NULL;
903 }
904 }
905
906 if(error) {
907 /* Save the position even though there was no error */
908 error->position = (int)lex->stream.position;
909 }
910
911 return result;
912 }
913
914 typedef struct
915 {
916 const char *data;
917 size_t pos;
918 } string_data_t;
919
string_get(void * data)920 static int string_get(void *data)
921 {
922 char c;
923 string_data_t *stream = (string_data_t *)data;
924 c = stream->data[stream->pos];
925 if(c == '\0')
926 return EOF;
927 else
928 {
929 stream->pos++;
930 return (unsigned char)c;
931 }
932 }
933
json_loads(const char * string,size_t flags,json_error_t * error)934 json_t *json_loads(const char *string, size_t flags, json_error_t *error)
935 {
936 lex_t lex;
937 json_t *result;
938 string_data_t stream_data;
939
940 jsonp_error_init(error, "<string>");
941
942 if (string == NULL) {
943 error_set(error, NULL, "wrong arguments");
944 return NULL;
945 }
946
947 stream_data.data = string;
948 stream_data.pos = 0;
949
950 if(lex_init(&lex, string_get, flags, (void *)&stream_data))
951 return NULL;
952
953 result = parse_json(&lex, flags, error);
954
955 lex_close(&lex);
956 return result;
957 }
958
959 typedef struct
960 {
961 const char *data;
962 size_t len;
963 size_t pos;
964 } buffer_data_t;
965
buffer_get(void * data)966 static int buffer_get(void *data)
967 {
968 char c;
969 buffer_data_t *stream = data;
970 if(stream->pos >= stream->len)
971 return EOF;
972
973 c = stream->data[stream->pos];
974 stream->pos++;
975 return (unsigned char)c;
976 }
977
json_loadb(const char * buffer,size_t buflen,size_t flags,json_error_t * error)978 json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error)
979 {
980 lex_t lex;
981 json_t *result;
982 buffer_data_t stream_data;
983
984 jsonp_error_init(error, "<buffer>");
985
986 if (buffer == NULL) {
987 error_set(error, NULL, "wrong arguments");
988 return NULL;
989 }
990
991 stream_data.data = buffer;
992 stream_data.pos = 0;
993 stream_data.len = buflen;
994
995 if(lex_init(&lex, buffer_get, flags, (void *)&stream_data))
996 return NULL;
997
998 result = parse_json(&lex, flags, error);
999
1000 lex_close(&lex);
1001 return result;
1002 }
1003
json_loadf(FILE * input,size_t flags,json_error_t * error)1004 json_t *json_loadf(FILE *input, size_t flags, json_error_t *error)
1005 {
1006 lex_t lex;
1007 const char *source;
1008 json_t *result;
1009
1010 if(input == stdin)
1011 source = "<stdin>";
1012 else
1013 source = "<stream>";
1014
1015 jsonp_error_init(error, source);
1016
1017 if (input == NULL) {
1018 error_set(error, NULL, "wrong arguments");
1019 return NULL;
1020 }
1021
1022 if(lex_init(&lex, (get_func)fgetc, flags, input))
1023 return NULL;
1024
1025 result = parse_json(&lex, flags, error);
1026
1027 lex_close(&lex);
1028 return result;
1029 }
1030
json_load_file(const char * path,size_t flags,json_error_t * error)1031 json_t *json_load_file(const char *path, size_t flags, json_error_t *error)
1032 {
1033 json_t *result;
1034 FILE *fp;
1035
1036 jsonp_error_init(error, path);
1037
1038 if (path == NULL) {
1039 error_set(error, NULL, "wrong arguments");
1040 return NULL;
1041 }
1042
1043 fp = fopen(path, "rb");
1044 if(!fp)
1045 {
1046 error_set(error, NULL, "unable to open %s: %s",
1047 path, strerror(errno));
1048 return NULL;
1049 }
1050
1051 result = json_loadf(fp, flags, error);
1052
1053 fclose(fp);
1054 return result;
1055 }
1056
1057 #define MAX_BUF_LEN 1024
1058
1059 typedef struct
1060 {
1061 char data[MAX_BUF_LEN];
1062 size_t len;
1063 size_t pos;
1064 json_load_callback_t callback;
1065 void *arg;
1066 } callback_data_t;
1067
callback_get(void * data)1068 static int callback_get(void *data)
1069 {
1070 char c;
1071 callback_data_t *stream = data;
1072
1073 if(stream->pos >= stream->len) {
1074 stream->pos = 0;
1075 stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg);
1076 if(stream->len == 0 || stream->len == (size_t)-1)
1077 return EOF;
1078 }
1079
1080 c = stream->data[stream->pos];
1081 stream->pos++;
1082 return (unsigned char)c;
1083 }
1084
json_load_callback(json_load_callback_t callback,void * arg,size_t flags,json_error_t * error)1085 json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error)
1086 {
1087 lex_t lex;
1088 json_t *result;
1089
1090 callback_data_t stream_data;
1091
1092 memset(&stream_data, 0, sizeof(stream_data));
1093 stream_data.callback = callback;
1094 stream_data.arg = arg;
1095
1096 jsonp_error_init(error, "<callback>");
1097
1098 if (callback == NULL) {
1099 error_set(error, NULL, "wrong arguments");
1100 return NULL;
1101 }
1102
1103 if(lex_init(&lex, (get_func)callback_get, flags, &stream_data))
1104 return NULL;
1105
1106 result = parse_json(&lex, flags, error);
1107
1108 lex_close(&lex);
1109 return result;
1110 }
1111