1 /*
2  * Copyright (c) 2009-2016 Petri Lehtinen <petri@digip.org>
3  *
4  * Jansson is free software; you can redistribute it and/or modify
5  * it under the terms of the MIT license. See LICENSE for details.
6  */
7 
8 #ifndef _GNU_SOURCE
9 #define _GNU_SOURCE
10 #endif
11 
12 #include <errno.h>
13 #include <limits.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <assert.h>
18 
19 #include "jansson.h"
20 #include "jansson_private.h"
21 #include "strbuffer.h"
22 #include "utf.h"
23 
24 #define STREAM_STATE_OK        0
25 #define STREAM_STATE_EOF      -1
26 #define STREAM_STATE_ERROR    -2
27 
28 #define TOKEN_INVALID         -1
29 #define TOKEN_EOF              0
30 #define TOKEN_STRING         256
31 #define TOKEN_INTEGER        257
32 #define TOKEN_REAL           258
33 #define TOKEN_TRUE           259
34 #define TOKEN_FALSE          260
35 #define TOKEN_NULL           261
36 
37 /* Locale independent versions of isxxx() functions */
38 #define l_isupper(c)  ('A' <= (c) && (c) <= 'Z')
39 #define l_islower(c)  ('a' <= (c) && (c) <= 'z')
40 #define l_isalpha(c)  (l_isupper(c) || l_islower(c))
41 #define l_isdigit(c)  ('0' <= (c) && (c) <= '9')
42 #define l_isxdigit(c) \
43     (l_isdigit(c) || ('A' <= (c) && (c) <= 'F') || ('a' <= (c) && (c) <= 'f'))
44 
45 /* Read one byte from stream, convert to unsigned char, then int, and
46    return. return EOF on end of file. This corresponds to the
47    behaviour of fgetc(). */
48 typedef int (*get_func)(void *data);
49 
50 typedef struct {
51     get_func get;
52     void *data;
53     char buffer[5];
54     size_t buffer_pos;
55     int state;
56     int line;
57     int column, last_column;
58     size_t position;
59 } stream_t;
60 
61 typedef struct {
62     stream_t stream;
63     strbuffer_t saved_text;
64     size_t flags;
65     size_t depth;
66     int token;
67     union {
68         struct {
69             char *val;
70             size_t len;
71         } string;
72         json_int_t integer;
73         double real;
74     } value;
75 } lex_t;
76 
77 #define stream_to_lex(stream) container_of(stream, lex_t, stream)
78 
79 
80 /*** error reporting ***/
81 
error_set(json_error_t * error,const lex_t * lex,const char * msg,...)82 static void error_set(json_error_t *error, const lex_t *lex,
83                       const char *msg, ...)
84 {
85     va_list ap;
86     char msg_text[JSON_ERROR_TEXT_LENGTH];
87     char msg_with_context[JSON_ERROR_TEXT_LENGTH];
88 
89     int line = -1, col = -1;
90     size_t pos = 0;
91     const char *result = msg_text;
92 
93     if(!error)
94         return;
95 
96     va_start(ap, msg);
97     vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap);
98     msg_text[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
99     va_end(ap);
100 
101     if(lex)
102     {
103         const char *saved_text = strbuffer_value(&lex->saved_text);
104 
105         line = lex->stream.line;
106         col = lex->stream.column;
107         pos = lex->stream.position;
108 
109         if(saved_text && saved_text[0])
110         {
111             if(lex->saved_text.length <= 20) {
112                 snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
113                          "%s near '%s'", msg_text, saved_text);
114                 msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
115                 result = msg_with_context;
116             }
117         }
118         else
119         {
120             if(lex->stream.state == STREAM_STATE_ERROR) {
121                 /* No context for UTF-8 decoding errors */
122                 result = msg_text;
123             }
124             else {
125                 snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
126                          "%s near end of file", msg_text);
127                 msg_with_context[JSON_ERROR_TEXT_LENGTH - 1] = '\0';
128                 result = msg_with_context;
129             }
130         }
131     }
132 
133     jsonp_error_set(error, line, col, pos, "%s", result);
134 }
135 
136 
137 /*** lexical analyzer ***/
138 
139 static void
stream_init(stream_t * stream,get_func get,void * data)140 stream_init(stream_t *stream, get_func get, void *data)
141 {
142     stream->get = get;
143     stream->data = data;
144     stream->buffer[0] = '\0';
145     stream->buffer_pos = 0;
146 
147     stream->state = STREAM_STATE_OK;
148     stream->line = 1;
149     stream->column = 0;
150     stream->position = 0;
151 }
152 
stream_get(stream_t * stream,json_error_t * error)153 static int stream_get(stream_t *stream, json_error_t *error)
154 {
155     int c;
156 
157     if(stream->state != STREAM_STATE_OK)
158         return stream->state;
159 
160     if(!stream->buffer[stream->buffer_pos])
161     {
162         c = stream->get(stream->data);
163         if(c == EOF) {
164             stream->state = STREAM_STATE_EOF;
165             return STREAM_STATE_EOF;
166         }
167 
168         stream->buffer[0] = c;
169         stream->buffer_pos = 0;
170 
171         if(0x80 <= c && c <= 0xFF)
172         {
173             /* multi-byte UTF-8 sequence */
174             size_t i, count;
175 
176             count = utf8_check_first(c);
177             if(!count)
178                 goto out;
179 
180             assert(count >= 2);
181 
182             for(i = 1; i < count; i++)
183                 stream->buffer[i] = stream->get(stream->data);
184 
185             if(!utf8_check_full(stream->buffer, count, NULL))
186                 goto out;
187 
188             stream->buffer[count] = '\0';
189         }
190         else
191             stream->buffer[1] = '\0';
192     }
193 
194     c = stream->buffer[stream->buffer_pos++];
195 
196     stream->position++;
197     if(c == '\n') {
198         stream->line++;
199         stream->last_column = stream->column;
200         stream->column = 0;
201     }
202     else if(utf8_check_first(c)) {
203         /* track the Unicode character column, so increment only if
204            this is the first character of a UTF-8 sequence */
205         stream->column++;
206     }
207 
208     return c;
209 
210 out:
211     stream->state = STREAM_STATE_ERROR;
212     error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x", c);
213     return STREAM_STATE_ERROR;
214 }
215 
stream_unget(stream_t * stream,int c)216 static void stream_unget(stream_t *stream, int c)
217 {
218     if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR)
219         return;
220 
221     stream->position--;
222     if(c == '\n') {
223         stream->line--;
224         stream->column = stream->last_column;
225     }
226     else if(utf8_check_first(c))
227         stream->column--;
228 
229     assert(stream->buffer_pos > 0);
230     stream->buffer_pos--;
231     assert(stream->buffer[stream->buffer_pos] == c);
232 }
233 
234 
lex_get(lex_t * lex,json_error_t * error)235 static int lex_get(lex_t *lex, json_error_t *error)
236 {
237     return stream_get(&lex->stream, error);
238 }
239 
lex_save(lex_t * lex,int c)240 static void lex_save(lex_t *lex, int c)
241 {
242     strbuffer_append_byte(&lex->saved_text, c);
243 }
244 
lex_get_save(lex_t * lex,json_error_t * error)245 static int lex_get_save(lex_t *lex, json_error_t *error)
246 {
247     int c = stream_get(&lex->stream, error);
248     if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR)
249         lex_save(lex, c);
250     return c;
251 }
252 
lex_unget(lex_t * lex,int c)253 static void lex_unget(lex_t *lex, int c)
254 {
255     stream_unget(&lex->stream, c);
256 }
257 
lex_unget_unsave(lex_t * lex,int c)258 static void lex_unget_unsave(lex_t *lex, int c)
259 {
260     if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) {
261         /* Since we treat warnings as errors, when assertions are turned
262          * off the "d" variable would be set but never used. Which is
263          * treated as an error by GCC.
264          */
265         #ifndef NDEBUG
266         char d;
267         #endif
268         stream_unget(&lex->stream, c);
269         #ifndef NDEBUG
270         d =
271         #endif
272             strbuffer_pop(&lex->saved_text);
273         assert(c == d);
274     }
275 }
276 
lex_save_cached(lex_t * lex)277 static void lex_save_cached(lex_t *lex)
278 {
279     while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')
280     {
281         lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
282         lex->stream.buffer_pos++;
283         lex->stream.position++;
284     }
285 }
286 
lex_free_string(lex_t * lex)287 static void lex_free_string(lex_t *lex)
288 {
289     jsonp_free(lex->value.string.val);
290     lex->value.string.val = NULL;
291     lex->value.string.len = 0;
292 }
293 
294 /* assumes that str points to 'u' plus at least 4 valid hex digits */
decode_unicode_escape(const char * str)295 static int32_t decode_unicode_escape(const char *str)
296 {
297     int i;
298     int32_t value = 0;
299 
300     assert(str[0] == 'u');
301 
302     for(i = 1; i <= 4; i++) {
303         char c = str[i];
304         value <<= 4;
305         if(l_isdigit(c))
306             value += c - '0';
307         else if(l_islower(c))
308             value += c - 'a' + 10;
309         else if(l_isupper(c))
310             value += c - 'A' + 10;
311         else
312             return -1;
313     }
314 
315     return value;
316 }
317 
lex_scan_string(lex_t * lex,json_error_t * error)318 static void lex_scan_string(lex_t *lex, json_error_t *error)
319 {
320     int c;
321     const char *p;
322     char *t;
323     int i;
324 
325     lex->value.string.val = NULL;
326     lex->token = TOKEN_INVALID;
327 
328     c = lex_get_save(lex, error);
329 
330     while(c != '"') {
331         if(c == STREAM_STATE_ERROR)
332             goto out;
333 
334         else if(c == STREAM_STATE_EOF) {
335             error_set(error, lex, "premature end of input");
336             goto out;
337         }
338 
339         else if(0 <= c && c <= 0x1F) {
340             /* control character */
341             lex_unget_unsave(lex, c);
342             if(c == '\n')
343                 error_set(error, lex, "unexpected newline");
344             else
345                 error_set(error, lex, "control character 0x%x", c);
346             goto out;
347         }
348 
349         else if(c == '\\') {
350             c = lex_get_save(lex, error);
351             if(c == 'u') {
352                 c = lex_get_save(lex, error);
353                 for(i = 0; i < 4; i++) {
354                     if(!l_isxdigit(c)) {
355                         error_set(error, lex, "invalid escape");
356                         goto out;
357                     }
358                     c = lex_get_save(lex, error);
359                 }
360             }
361             else if(c == '"' || c == '\\' || c == '/' || c == 'b' ||
362                     c == 'f' || c == 'n' || c == 'r' || c == 't')
363                 c = lex_get_save(lex, error);
364             else {
365                 error_set(error, lex, "invalid escape");
366                 goto out;
367             }
368         }
369         else
370             c = lex_get_save(lex, error);
371     }
372 
373     /* the actual value is at most of the same length as the source
374        string, because:
375          - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
376          - a single \uXXXX escape (length 6) is converted to at most 3 bytes
377          - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
378            are converted to 4 bytes
379     */
380     t = jsonp_malloc(lex->saved_text.length + 1);
381     if(!t) {
382         /* this is not very nice, since TOKEN_INVALID is returned */
383         goto out;
384     }
385     lex->value.string.val = t;
386 
387     /* + 1 to skip the " */
388     p = strbuffer_value(&lex->saved_text) + 1;
389 
390     while(*p != '"') {
391         if(*p == '\\') {
392             p++;
393             if(*p == 'u') {
394                 size_t length;
395                 int32_t value;
396 
397                 value = decode_unicode_escape(p);
398                 if(value < 0) {
399                     error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1);
400                     goto out;
401                 }
402                 p += 5;
403 
404                 if(0xD800 <= value && value <= 0xDBFF) {
405                     /* surrogate pair */
406                     if(*p == '\\' && *(p + 1) == 'u') {
407                         int32_t value2 = decode_unicode_escape(++p);
408                         if(value2 < 0) {
409                             error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1);
410                             goto out;
411                         }
412                         p += 5;
413 
414                         if(0xDC00 <= value2 && value2 <= 0xDFFF) {
415                             /* valid second surrogate */
416                             value =
417                                 ((value - 0xD800) << 10) +
418                                 (value2 - 0xDC00) +
419                                 0x10000;
420                         }
421                         else {
422                             /* invalid second surrogate */
423                             error_set(error, lex,
424                                       "invalid Unicode '\\u%04X\\u%04X'",
425                                       value, value2);
426                             goto out;
427                         }
428                     }
429                     else {
430                         /* no second surrogate */
431                         error_set(error, lex, "invalid Unicode '\\u%04X'",
432                                   value);
433                         goto out;
434                     }
435                 }
436                 else if(0xDC00 <= value && value <= 0xDFFF) {
437                     error_set(error, lex, "invalid Unicode '\\u%04X'", value);
438                     goto out;
439                 }
440 
441                 if(utf8_encode(value, t, &length))
442                     assert(0);
443                 t += length;
444             }
445             else {
446                 switch(*p) {
447                     case '"': case '\\': case '/':
448                         *t = *p; break;
449                     case 'b': *t = '\b'; break;
450                     case 'f': *t = '\f'; break;
451                     case 'n': *t = '\n'; break;
452                     case 'r': *t = '\r'; break;
453                     case 't': *t = '\t'; break;
454                     default: assert(0);
455                 }
456                 t++;
457                 p++;
458             }
459         }
460         else
461             *(t++) = *(p++);
462     }
463     *t = '\0';
464     lex->value.string.len = t - lex->value.string.val;
465     lex->token = TOKEN_STRING;
466     return;
467 
468 out:
469     lex_free_string(lex);
470 }
471 
472 #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */
473 #if JSON_INTEGER_IS_LONG_LONG
474 #ifdef _MSC_VER  /* Microsoft Visual Studio */
475 #define json_strtoint     _strtoi64
476 #else
477 #define json_strtoint     strtoll
478 #endif
479 #else
480 #define json_strtoint     strtol
481 #endif
482 #endif
483 
lex_scan_number(lex_t * lex,int c,json_error_t * error)484 static int lex_scan_number(lex_t *lex, int c, json_error_t *error)
485 {
486     const char *saved_text;
487     char *end;
488     double doubleval;
489 
490     lex->token = TOKEN_INVALID;
491 
492     if(c == '-')
493         c = lex_get_save(lex, error);
494 
495     if(c == '0') {
496         c = lex_get_save(lex, error);
497         if(l_isdigit(c)) {
498             lex_unget_unsave(lex, c);
499             goto out;
500         }
501     }
502     else if(l_isdigit(c)) {
503         do
504             c = lex_get_save(lex, error);
505         while(l_isdigit(c));
506     }
507     else {
508         lex_unget_unsave(lex, c);
509         goto out;
510     }
511 
512     if(!(lex->flags & JSON_DECODE_INT_AS_REAL) &&
513        c != '.' && c != 'E' && c != 'e')
514     {
515         json_int_t intval;
516 
517         lex_unget_unsave(lex, c);
518 
519         saved_text = strbuffer_value(&lex->saved_text);
520 
521         errno = 0;
522         intval = json_strtoint(saved_text, &end, 10);
523         if(errno == ERANGE) {
524             if(intval < 0)
525                 error_set(error, lex, "too big negative integer");
526             else
527                 error_set(error, lex, "too big integer");
528             goto out;
529         }
530 
531         assert(end == saved_text + lex->saved_text.length);
532 
533         lex->token = TOKEN_INTEGER;
534         lex->value.integer = intval;
535         return 0;
536     }
537 
538     if(c == '.') {
539         c = lex_get(lex, error);
540         if(!l_isdigit(c)) {
541             lex_unget(lex, c);
542             goto out;
543         }
544         lex_save(lex, c);
545 
546         do
547             c = lex_get_save(lex, error);
548         while(l_isdigit(c));
549     }
550 
551     if(c == 'E' || c == 'e') {
552         c = lex_get_save(lex, error);
553         if(c == '+' || c == '-')
554             c = lex_get_save(lex, error);
555 
556         if(!l_isdigit(c)) {
557             lex_unget_unsave(lex, c);
558             goto out;
559         }
560 
561         do
562             c = lex_get_save(lex, error);
563         while(l_isdigit(c));
564     }
565 
566     lex_unget_unsave(lex, c);
567 
568     if(jsonp_strtod(&lex->saved_text, &doubleval)) {
569         error_set(error, lex, "real number overflow");
570         goto out;
571     }
572 
573     lex->token = TOKEN_REAL;
574     lex->value.real = doubleval;
575     return 0;
576 
577 out:
578     return -1;
579 }
580 
lex_scan(lex_t * lex,json_error_t * error)581 static int lex_scan(lex_t *lex, json_error_t *error)
582 {
583     int c;
584 
585     strbuffer_clear(&lex->saved_text);
586 
587     if(lex->token == TOKEN_STRING)
588         lex_free_string(lex);
589 
590     do
591         c = lex_get(lex, error);
592     while(c == ' ' || c == '\t' || c == '\n' || c == '\r');
593 
594     if(c == STREAM_STATE_EOF) {
595         lex->token = TOKEN_EOF;
596         goto out;
597     }
598 
599     if(c == STREAM_STATE_ERROR) {
600         lex->token = TOKEN_INVALID;
601         goto out;
602     }
603 
604     lex_save(lex, c);
605 
606     if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
607         lex->token = c;
608 
609     else if(c == '"')
610         lex_scan_string(lex, error);
611 
612     else if(l_isdigit(c) || c == '-') {
613         if(lex_scan_number(lex, c, error))
614             goto out;
615     }
616 
617     else if(l_isalpha(c)) {
618         /* eat up the whole identifier for clearer error messages */
619         const char *saved_text;
620 
621         do
622             c = lex_get_save(lex, error);
623         while(l_isalpha(c));
624         lex_unget_unsave(lex, c);
625 
626         saved_text = strbuffer_value(&lex->saved_text);
627 
628         if(strcmp(saved_text, "true") == 0)
629             lex->token = TOKEN_TRUE;
630         else if(strcmp(saved_text, "false") == 0)
631             lex->token = TOKEN_FALSE;
632         else if(strcmp(saved_text, "null") == 0)
633             lex->token = TOKEN_NULL;
634         else
635             lex->token = TOKEN_INVALID;
636     }
637 
638     else {
639         /* save the rest of the input UTF-8 sequence to get an error
640            message of valid UTF-8 */
641         lex_save_cached(lex);
642         lex->token = TOKEN_INVALID;
643     }
644 
645 out:
646     return lex->token;
647 }
648 
lex_steal_string(lex_t * lex,size_t * out_len)649 static char *lex_steal_string(lex_t *lex, size_t *out_len)
650 {
651     char *result = NULL;
652     if(lex->token == TOKEN_STRING) {
653         result = lex->value.string.val;
654         *out_len = lex->value.string.len;
655         lex->value.string.val = NULL;
656         lex->value.string.len = 0;
657     }
658     return result;
659 }
660 
lex_init(lex_t * lex,get_func get,size_t flags,void * data)661 static int lex_init(lex_t *lex, get_func get, size_t flags, void *data)
662 {
663     stream_init(&lex->stream, get, data);
664     if(strbuffer_init(&lex->saved_text))
665         return -1;
666 
667     lex->flags = flags;
668     lex->token = TOKEN_INVALID;
669     return 0;
670 }
671 
lex_close(lex_t * lex)672 static void lex_close(lex_t *lex)
673 {
674     if(lex->token == TOKEN_STRING)
675         lex_free_string(lex);
676     strbuffer_close(&lex->saved_text);
677 }
678 
679 
680 /*** parser ***/
681 
682 static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error);
683 
parse_object(lex_t * lex,size_t flags,json_error_t * error)684 static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error)
685 {
686     json_t *object = json_object();
687     if(!object)
688         return NULL;
689 
690     lex_scan(lex, error);
691     if(lex->token == '}')
692         return object;
693 
694     while(1) {
695         char *key;
696         size_t len;
697         json_t *value;
698 
699         if(lex->token != TOKEN_STRING) {
700             error_set(error, lex, "string or '}' expected");
701             goto error;
702         }
703 
704         key = lex_steal_string(lex, &len);
705         if(!key)
706             return NULL;
707         if (memchr(key, '\0', len)) {
708             jsonp_free(key);
709             error_set(error, lex, "NUL byte in object key not supported");
710             goto error;
711         }
712 
713         if(flags & JSON_REJECT_DUPLICATES) {
714             if(json_object_get(object, key)) {
715                 jsonp_free(key);
716                 error_set(error, lex, "duplicate object key");
717                 goto error;
718             }
719         }
720 
721         lex_scan(lex, error);
722         if(lex->token != ':') {
723             jsonp_free(key);
724             error_set(error, lex, "':' expected");
725             goto error;
726         }
727 
728         lex_scan(lex, error);
729         value = parse_value(lex, flags, error);
730         if(!value) {
731             jsonp_free(key);
732             goto error;
733         }
734 
735         if(json_object_set_nocheck(object, key, value)) {
736             jsonp_free(key);
737             json_decref(value);
738             goto error;
739         }
740 
741         json_decref(value);
742         jsonp_free(key);
743 
744         lex_scan(lex, error);
745         if(lex->token != ',')
746             break;
747 
748         lex_scan(lex, error);
749     }
750 
751     if(lex->token != '}') {
752         error_set(error, lex, "'}' expected");
753         goto error;
754     }
755 
756     return object;
757 
758 error:
759     json_decref(object);
760     return NULL;
761 }
762 
parse_array(lex_t * lex,size_t flags,json_error_t * error)763 static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error)
764 {
765     json_t *array = json_array();
766     if(!array)
767         return NULL;
768 
769     lex_scan(lex, error);
770     if(lex->token == ']')
771         return array;
772 
773     while(lex->token) {
774         json_t *elem = parse_value(lex, flags, error);
775         if(!elem)
776             goto error;
777 
778         if(json_array_append(array, elem)) {
779             json_decref(elem);
780             goto error;
781         }
782         json_decref(elem);
783 
784         lex_scan(lex, error);
785         if(lex->token != ',')
786             break;
787 
788         lex_scan(lex, error);
789     }
790 
791     if(lex->token != ']') {
792         error_set(error, lex, "']' expected");
793         goto error;
794     }
795 
796     return array;
797 
798 error:
799     json_decref(array);
800     return NULL;
801 }
802 
parse_value(lex_t * lex,size_t flags,json_error_t * error)803 static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error)
804 {
805     json_t *json;
806 
807     lex->depth++;
808     if(lex->depth > JSON_PARSER_MAX_DEPTH) {
809         error_set(error, lex, "maximum parsing depth reached");
810         return NULL;
811     }
812 
813     switch(lex->token) {
814         case TOKEN_STRING: {
815             const char *value = lex->value.string.val;
816             size_t len = lex->value.string.len;
817 
818             if(!(flags & JSON_ALLOW_NUL)) {
819                 if(memchr(value, '\0', len)) {
820                     error_set(error, lex, "\\u0000 is not allowed without JSON_ALLOW_NUL");
821                     return NULL;
822                 }
823             }
824 
825             json = jsonp_stringn_nocheck_own(value, len);
826             if(json) {
827                 lex->value.string.val = NULL;
828                 lex->value.string.len = 0;
829             }
830             break;
831         }
832 
833         case TOKEN_INTEGER: {
834             json = json_integer(lex->value.integer);
835             break;
836         }
837 
838         case TOKEN_REAL: {
839             json = json_real(lex->value.real);
840             break;
841         }
842 
843         case TOKEN_TRUE:
844             json = json_true();
845             break;
846 
847         case TOKEN_FALSE:
848             json = json_false();
849             break;
850 
851         case TOKEN_NULL:
852             json = json_null();
853             break;
854 
855         case '{':
856             json = parse_object(lex, flags, error);
857             break;
858 
859         case '[':
860             json = parse_array(lex, flags, error);
861             break;
862 
863         case TOKEN_INVALID:
864             error_set(error, lex, "invalid token");
865             return NULL;
866 
867         default:
868             error_set(error, lex, "unexpected token");
869             return NULL;
870     }
871 
872     if(!json)
873         return NULL;
874 
875     lex->depth--;
876     return json;
877 }
878 
parse_json(lex_t * lex,size_t flags,json_error_t * error)879 static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error)
880 {
881     json_t *result;
882 
883     lex->depth = 0;
884 
885     lex_scan(lex, error);
886     if(!(flags & JSON_DECODE_ANY)) {
887         if(lex->token != '[' && lex->token != '{') {
888             error_set(error, lex, "'[' or '{' expected");
889             return NULL;
890         }
891     }
892 
893     result = parse_value(lex, flags, error);
894     if(!result)
895         return NULL;
896 
897     if(!(flags & JSON_DISABLE_EOF_CHECK)) {
898         lex_scan(lex, error);
899         if(lex->token != TOKEN_EOF) {
900             error_set(error, lex, "end of file expected");
901             json_decref(result);
902             return NULL;
903         }
904     }
905 
906     if(error) {
907         /* Save the position even though there was no error */
908         error->position = (int)lex->stream.position;
909     }
910 
911     return result;
912 }
913 
914 typedef struct
915 {
916     const char *data;
917     size_t pos;
918 } string_data_t;
919 
string_get(void * data)920 static int string_get(void *data)
921 {
922     char c;
923     string_data_t *stream = (string_data_t *)data;
924     c = stream->data[stream->pos];
925     if(c == '\0')
926         return EOF;
927     else
928     {
929         stream->pos++;
930         return (unsigned char)c;
931     }
932 }
933 
json_loads(const char * string,size_t flags,json_error_t * error)934 json_t *json_loads(const char *string, size_t flags, json_error_t *error)
935 {
936     lex_t lex;
937     json_t *result;
938     string_data_t stream_data;
939 
940     jsonp_error_init(error, "<string>");
941 
942     if (string == NULL) {
943         error_set(error, NULL, "wrong arguments");
944         return NULL;
945     }
946 
947     stream_data.data = string;
948     stream_data.pos = 0;
949 
950     if(lex_init(&lex, string_get, flags, (void *)&stream_data))
951         return NULL;
952 
953     result = parse_json(&lex, flags, error);
954 
955     lex_close(&lex);
956     return result;
957 }
958 
959 typedef struct
960 {
961     const char *data;
962     size_t len;
963     size_t pos;
964 } buffer_data_t;
965 
buffer_get(void * data)966 static int buffer_get(void *data)
967 {
968     char c;
969     buffer_data_t *stream = data;
970     if(stream->pos >= stream->len)
971       return EOF;
972 
973     c = stream->data[stream->pos];
974     stream->pos++;
975     return (unsigned char)c;
976 }
977 
json_loadb(const char * buffer,size_t buflen,size_t flags,json_error_t * error)978 json_t *json_loadb(const char *buffer, size_t buflen, size_t flags, json_error_t *error)
979 {
980     lex_t lex;
981     json_t *result;
982     buffer_data_t stream_data;
983 
984     jsonp_error_init(error, "<buffer>");
985 
986     if (buffer == NULL) {
987         error_set(error, NULL, "wrong arguments");
988         return NULL;
989     }
990 
991     stream_data.data = buffer;
992     stream_data.pos = 0;
993     stream_data.len = buflen;
994 
995     if(lex_init(&lex, buffer_get, flags, (void *)&stream_data))
996         return NULL;
997 
998     result = parse_json(&lex, flags, error);
999 
1000     lex_close(&lex);
1001     return result;
1002 }
1003 
json_loadf(FILE * input,size_t flags,json_error_t * error)1004 json_t *json_loadf(FILE *input, size_t flags, json_error_t *error)
1005 {
1006     lex_t lex;
1007     const char *source;
1008     json_t *result;
1009 
1010     if(input == stdin)
1011         source = "<stdin>";
1012     else
1013         source = "<stream>";
1014 
1015     jsonp_error_init(error, source);
1016 
1017     if (input == NULL) {
1018         error_set(error, NULL, "wrong arguments");
1019         return NULL;
1020     }
1021 
1022     if(lex_init(&lex, (get_func)fgetc, flags, input))
1023         return NULL;
1024 
1025     result = parse_json(&lex, flags, error);
1026 
1027     lex_close(&lex);
1028     return result;
1029 }
1030 
json_load_file(const char * path,size_t flags,json_error_t * error)1031 json_t *json_load_file(const char *path, size_t flags, json_error_t *error)
1032 {
1033     json_t *result;
1034     FILE *fp;
1035 
1036     jsonp_error_init(error, path);
1037 
1038     if (path == NULL) {
1039         error_set(error, NULL, "wrong arguments");
1040         return NULL;
1041     }
1042 
1043     fp = fopen(path, "rb");
1044     if(!fp)
1045     {
1046         error_set(error, NULL, "unable to open %s: %s",
1047                   path, strerror(errno));
1048         return NULL;
1049     }
1050 
1051     result = json_loadf(fp, flags, error);
1052 
1053     fclose(fp);
1054     return result;
1055 }
1056 
1057 #define MAX_BUF_LEN 1024
1058 
1059 typedef struct
1060 {
1061     char data[MAX_BUF_LEN];
1062     size_t len;
1063     size_t pos;
1064     json_load_callback_t callback;
1065     void *arg;
1066 } callback_data_t;
1067 
callback_get(void * data)1068 static int callback_get(void *data)
1069 {
1070     char c;
1071     callback_data_t *stream = data;
1072 
1073     if(stream->pos >= stream->len) {
1074         stream->pos = 0;
1075         stream->len = stream->callback(stream->data, MAX_BUF_LEN, stream->arg);
1076         if(stream->len == 0 || stream->len == (size_t)-1)
1077             return EOF;
1078     }
1079 
1080     c = stream->data[stream->pos];
1081     stream->pos++;
1082     return (unsigned char)c;
1083 }
1084 
json_load_callback(json_load_callback_t callback,void * arg,size_t flags,json_error_t * error)1085 json_t *json_load_callback(json_load_callback_t callback, void *arg, size_t flags, json_error_t *error)
1086 {
1087     lex_t lex;
1088     json_t *result;
1089 
1090     callback_data_t stream_data;
1091 
1092     memset(&stream_data, 0, sizeof(stream_data));
1093     stream_data.callback = callback;
1094     stream_data.arg = arg;
1095 
1096     jsonp_error_init(error, "<callback>");
1097 
1098     if (callback == NULL) {
1099         error_set(error, NULL, "wrong arguments");
1100         return NULL;
1101     }
1102 
1103     if(lex_init(&lex, (get_func)callback_get, flags, &stream_data))
1104         return NULL;
1105 
1106     result = parse_json(&lex, flags, error);
1107 
1108     lex_close(&lex);
1109     return result;
1110 }
1111