xref: /qemu/qobject/json-parser.c (revision 84a56f38)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "qapi/qmp/json-parser.h"
26 #include "qapi/qmp/json-lexer.h"
27 #include "qapi/qmp/json-streamer.h"
28 
29 typedef struct JSONParserContext
30 {
31     Error *err;
32     JSONToken *current;
33     GQueue *buf;
34 } JSONParserContext;
35 
36 #define BUG_ON(cond) assert(!(cond))
37 
38 /**
39  * TODO
40  *
41  * 0) make errors meaningful again
42  * 1) add geometry information to tokens
43  * 3) should we return a parsed size?
44  * 4) deal with premature EOI
45  */
46 
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48 
49 /**
50  * Error handler
51  */
52 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
53                                            JSONToken *token, const char *msg, ...)
54 {
55     va_list ap;
56     char message[1024];
57 
58     if (ctxt->err) {
59         return;
60     }
61     va_start(ap, msg);
62     vsnprintf(message, sizeof(message), msg, ap);
63     va_end(ap);
64     error_setg(&ctxt->err, "JSON parse error, %s", message);
65 }
66 
67 static int cvt4hex(const char *s)
68 {
69     int cp, i;
70 
71     cp = 0;
72     for (i = 0; i < 4; i++) {
73         if (!qemu_isxdigit(s[i])) {
74             return -1;
75         }
76         cp <<= 4;
77         if (s[i] >= '0' && s[i] <= '9') {
78             cp |= s[i] - '0';
79         } else if (s[i] >= 'a' && s[i] <= 'f') {
80             cp |= 10 + s[i] - 'a';
81         } else if (s[i] >= 'A' && s[i] <= 'F') {
82             cp |= 10 + s[i] - 'A';
83         } else {
84             return -1;
85         }
86     }
87     return cp;
88 }
89 
90 /**
91  * parse_string(): Parse a JSON string
92  *
93  * From RFC 8259 "The JavaScript Object Notation (JSON) Data
94  * Interchange Format":
95  *
96  *    char = unescaped /
97  *        escape (
98  *            %x22 /          ; "    quotation mark  U+0022
99  *            %x5C /          ; \    reverse solidus U+005C
100  *            %x2F /          ; /    solidus         U+002F
101  *            %x62 /          ; b    backspace       U+0008
102  *            %x66 /          ; f    form feed       U+000C
103  *            %x6E /          ; n    line feed       U+000A
104  *            %x72 /          ; r    carriage return U+000D
105  *            %x74 /          ; t    tab             U+0009
106  *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
107  *    escape = %x5C              ; \
108  *    quotation-mark = %x22      ; "
109  *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
110  *
111  * Extensions over RFC 8259:
112  * - Extra escape sequence in strings:
113  *   0x27 (apostrophe) is recognized after escape, too
114  * - Single-quoted strings:
115  *   Like double-quoted strings, except they're delimited by %x27
116  *   (apostrophe) instead of %x22 (quotation mark), and can't contain
117  *   unescaped apostrophe, but can contain unescaped quotation mark.
118  *
119  * Note:
120  * - Encoding is modified UTF-8.
121  * - Invalid Unicode characters are rejected.
122  * - Control characters \x00..\x1F are rejected by the lexer.
123  */
124 static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
125 {
126     const char *ptr = token->str;
127     QString *str;
128     char quote;
129     const char *beg;
130     int cp, trailing;
131     char *end;
132     ssize_t len;
133     char utf8_buf[5];
134 
135     assert(*ptr == '"' || *ptr == '\'');
136     quote = *ptr++;
137     str = qstring_new();
138 
139     while (*ptr != quote) {
140         assert(*ptr);
141         if (*ptr == '\\') {
142             beg = ptr++;
143             switch (*ptr++) {
144             case '"':
145                 qstring_append_chr(str, '"');
146                 break;
147             case '\'':
148                 qstring_append_chr(str, '\'');
149                 break;
150             case '\\':
151                 qstring_append_chr(str, '\\');
152                 break;
153             case '/':
154                 qstring_append_chr(str, '/');
155                 break;
156             case 'b':
157                 qstring_append_chr(str, '\b');
158                 break;
159             case 'f':
160                 qstring_append_chr(str, '\f');
161                 break;
162             case 'n':
163                 qstring_append_chr(str, '\n');
164                 break;
165             case 'r':
166                 qstring_append_chr(str, '\r');
167                 break;
168             case 't':
169                 qstring_append_chr(str, '\t');
170                 break;
171             case 'u':
172                 cp = cvt4hex(ptr);
173                 ptr += 4;
174 
175                 /* handle surrogate pairs */
176                 if (cp >= 0xD800 && cp <= 0xDBFF
177                     && ptr[0] == '\\' && ptr[1] == 'u') {
178                     /* leading surrogate followed by \u */
179                     cp = 0x10000 + ((cp & 0x3FF) << 10);
180                     trailing = cvt4hex(ptr + 2);
181                     if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
182                         /* followed by trailing surrogate */
183                         cp |= trailing & 0x3FF;
184                         ptr += 6;
185                     } else {
186                         cp = -1; /* invalid */
187                     }
188                 }
189 
190                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
191                     parse_error(ctxt, token,
192                                 "%.*s is not a valid Unicode character",
193                                 (int)(ptr - beg), beg);
194                     goto out;
195                 }
196                 qstring_append(str, utf8_buf);
197                 break;
198             default:
199                 parse_error(ctxt, token, "invalid escape sequence in string");
200                 goto out;
201             }
202         } else {
203             cp = mod_utf8_codepoint(ptr, 6, &end);
204             if (cp < 0) {
205                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
206                 goto out;
207             }
208             ptr = end;
209             len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
210             assert(len >= 0);
211             qstring_append(str, utf8_buf);
212         }
213     }
214 
215     return str;
216 
217 out:
218     qobject_unref(str);
219     return NULL;
220 }
221 
222 /* Note: the token object returned by parser_context_peek_token or
223  * parser_context_pop_token is deleted as soon as parser_context_pop_token
224  * is called again.
225  */
226 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
227 {
228     g_free(ctxt->current);
229     assert(!g_queue_is_empty(ctxt->buf));
230     ctxt->current = g_queue_pop_head(ctxt->buf);
231     return ctxt->current;
232 }
233 
234 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
235 {
236     assert(!g_queue_is_empty(ctxt->buf));
237     return g_queue_peek_head(ctxt->buf);
238 }
239 
240 /**
241  * Parsing rules
242  */
243 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
244 {
245     QObject *value;
246     QString *key = NULL;
247     JSONToken *peek, *token;
248 
249     peek = parser_context_peek_token(ctxt);
250     if (peek == NULL) {
251         parse_error(ctxt, NULL, "premature EOI");
252         goto out;
253     }
254 
255     key = qobject_to(QString, parse_value(ctxt, ap));
256     if (!key) {
257         parse_error(ctxt, peek, "key is not a string in object");
258         goto out;
259     }
260 
261     token = parser_context_pop_token(ctxt);
262     if (token == NULL) {
263         parse_error(ctxt, NULL, "premature EOI");
264         goto out;
265     }
266 
267     if (token->type != JSON_COLON) {
268         parse_error(ctxt, token, "missing : in object pair");
269         goto out;
270     }
271 
272     value = parse_value(ctxt, ap);
273     if (value == NULL) {
274         parse_error(ctxt, token, "Missing value in dict");
275         goto out;
276     }
277 
278     qdict_put_obj(dict, qstring_get_str(key), value);
279 
280     qobject_unref(key);
281 
282     return 0;
283 
284 out:
285     qobject_unref(key);
286 
287     return -1;
288 }
289 
290 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
291 {
292     QDict *dict = NULL;
293     JSONToken *token, *peek;
294 
295     token = parser_context_pop_token(ctxt);
296     assert(token && token->type == JSON_LCURLY);
297 
298     dict = qdict_new();
299 
300     peek = parser_context_peek_token(ctxt);
301     if (peek == NULL) {
302         parse_error(ctxt, NULL, "premature EOI");
303         goto out;
304     }
305 
306     if (peek->type != JSON_RCURLY) {
307         if (parse_pair(ctxt, dict, ap) == -1) {
308             goto out;
309         }
310 
311         token = parser_context_pop_token(ctxt);
312         if (token == NULL) {
313             parse_error(ctxt, NULL, "premature EOI");
314             goto out;
315         }
316 
317         while (token->type != JSON_RCURLY) {
318             if (token->type != JSON_COMMA) {
319                 parse_error(ctxt, token, "expected separator in dict");
320                 goto out;
321             }
322 
323             if (parse_pair(ctxt, dict, ap) == -1) {
324                 goto out;
325             }
326 
327             token = parser_context_pop_token(ctxt);
328             if (token == NULL) {
329                 parse_error(ctxt, NULL, "premature EOI");
330                 goto out;
331             }
332         }
333     } else {
334         (void)parser_context_pop_token(ctxt);
335     }
336 
337     return QOBJECT(dict);
338 
339 out:
340     qobject_unref(dict);
341     return NULL;
342 }
343 
344 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
345 {
346     QList *list = NULL;
347     JSONToken *token, *peek;
348 
349     token = parser_context_pop_token(ctxt);
350     assert(token && token->type == JSON_LSQUARE);
351 
352     list = qlist_new();
353 
354     peek = parser_context_peek_token(ctxt);
355     if (peek == NULL) {
356         parse_error(ctxt, NULL, "premature EOI");
357         goto out;
358     }
359 
360     if (peek->type != JSON_RSQUARE) {
361         QObject *obj;
362 
363         obj = parse_value(ctxt, ap);
364         if (obj == NULL) {
365             parse_error(ctxt, token, "expecting value");
366             goto out;
367         }
368 
369         qlist_append_obj(list, obj);
370 
371         token = parser_context_pop_token(ctxt);
372         if (token == NULL) {
373             parse_error(ctxt, NULL, "premature EOI");
374             goto out;
375         }
376 
377         while (token->type != JSON_RSQUARE) {
378             if (token->type != JSON_COMMA) {
379                 parse_error(ctxt, token, "expected separator in list");
380                 goto out;
381             }
382 
383             obj = parse_value(ctxt, ap);
384             if (obj == NULL) {
385                 parse_error(ctxt, token, "expecting value");
386                 goto out;
387             }
388 
389             qlist_append_obj(list, obj);
390 
391             token = parser_context_pop_token(ctxt);
392             if (token == NULL) {
393                 parse_error(ctxt, NULL, "premature EOI");
394                 goto out;
395             }
396         }
397     } else {
398         (void)parser_context_pop_token(ctxt);
399     }
400 
401     return QOBJECT(list);
402 
403 out:
404     qobject_unref(list);
405     return NULL;
406 }
407 
408 static QObject *parse_keyword(JSONParserContext *ctxt)
409 {
410     JSONToken *token;
411 
412     token = parser_context_pop_token(ctxt);
413     assert(token && token->type == JSON_KEYWORD);
414 
415     if (!strcmp(token->str, "true")) {
416         return QOBJECT(qbool_from_bool(true));
417     } else if (!strcmp(token->str, "false")) {
418         return QOBJECT(qbool_from_bool(false));
419     } else if (!strcmp(token->str, "null")) {
420         return QOBJECT(qnull());
421     }
422     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
423     return NULL;
424 }
425 
426 static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap)
427 {
428     JSONToken *token;
429 
430     token = parser_context_pop_token(ctxt);
431     assert(token && token->type == JSON_INTERP);
432 
433     if (!strcmp(token->str, "%p")) {
434         return va_arg(*ap, QObject *);
435     } else if (!strcmp(token->str, "%i")) {
436         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
437     } else if (!strcmp(token->str, "%d")) {
438         return QOBJECT(qnum_from_int(va_arg(*ap, int)));
439     } else if (!strcmp(token->str, "%ld")) {
440         return QOBJECT(qnum_from_int(va_arg(*ap, long)));
441     } else if (!strcmp(token->str, "%lld") ||
442                !strcmp(token->str, "%I64d")) {
443         return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
444     } else if (!strcmp(token->str, "%u")) {
445         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
446     } else if (!strcmp(token->str, "%lu")) {
447         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
448     } else if (!strcmp(token->str, "%llu") ||
449                !strcmp(token->str, "%I64u")) {
450         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
451     } else if (!strcmp(token->str, "%s")) {
452         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
453     } else if (!strcmp(token->str, "%f")) {
454         return QOBJECT(qnum_from_double(va_arg(*ap, double)));
455     }
456     return NULL;
457 }
458 
459 static QObject *parse_literal(JSONParserContext *ctxt)
460 {
461     JSONToken *token;
462 
463     token = parser_context_pop_token(ctxt);
464     assert(token);
465 
466     switch (token->type) {
467     case JSON_STRING:
468         return QOBJECT(parse_string(ctxt, token));
469     case JSON_INTEGER: {
470         /*
471          * Represent JSON_INTEGER as QNUM_I64 if possible, else as
472          * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
473          * and qemu_strtou64() fail with ERANGE when it's not
474          * possible.
475          *
476          * qnum_get_int() will then work for any signed 64-bit
477          * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
478          * integer, and qnum_get_double() both for any JSON_INTEGER
479          * and any JSON_FLOAT (with precision loss for integers beyond
480          * 53 bits)
481          */
482         int ret;
483         int64_t value;
484         uint64_t uvalue;
485 
486         ret = qemu_strtoi64(token->str, NULL, 10, &value);
487         if (!ret) {
488             return QOBJECT(qnum_from_int(value));
489         }
490         assert(ret == -ERANGE);
491 
492         if (token->str[0] != '-') {
493             ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
494             if (!ret) {
495                 return QOBJECT(qnum_from_uint(uvalue));
496             }
497             assert(ret == -ERANGE);
498         }
499         /* fall through to JSON_FLOAT */
500     }
501     case JSON_FLOAT:
502         /* FIXME dependent on locale; a pervasive issue in QEMU */
503         /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
504          * but those might be useful extensions beyond JSON */
505         return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
506     default:
507         abort();
508     }
509 }
510 
511 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
512 {
513     JSONToken *token;
514 
515     token = parser_context_peek_token(ctxt);
516     if (token == NULL) {
517         parse_error(ctxt, NULL, "premature EOI");
518         return NULL;
519     }
520 
521     switch (token->type) {
522     case JSON_LCURLY:
523         return parse_object(ctxt, ap);
524     case JSON_LSQUARE:
525         return parse_array(ctxt, ap);
526     case JSON_INTERP:
527         return parse_interpolation(ctxt, ap);
528     case JSON_INTEGER:
529     case JSON_FLOAT:
530     case JSON_STRING:
531         return parse_literal(ctxt);
532     case JSON_KEYWORD:
533         return parse_keyword(ctxt);
534     default:
535         parse_error(ctxt, token, "expecting value");
536         return NULL;
537     }
538 }
539 
540 QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
541 {
542     JSONParserContext ctxt = { .buf = tokens };
543     QObject *result;
544 
545     result = parse_value(&ctxt, ap);
546 
547     error_propagate(errp, ctxt.err);
548 
549     while (!g_queue_is_empty(ctxt.buf)) {
550         parser_context_pop_token(&ctxt);
551     }
552     g_free(ctxt.current);
553     g_queue_free(ctxt.buf);
554 
555     return result;
556 }
557