xref: /qemu/qobject/json-parser.c (revision 4b1c0cd7)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qemu/cutils.h"
16 #include "qemu/unicode.h"
17 #include "qapi/error.h"
18 #include "qemu-common.h"
19 #include "qapi/qmp/qbool.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qnull.h"
23 #include "qapi/qmp/qnum.h"
24 #include "qapi/qmp/qstring.h"
25 #include "qapi/qmp/json-parser.h"
26 #include "qapi/qmp/json-lexer.h"
27 #include "qapi/qmp/json-streamer.h"
28 
29 typedef struct JSONParserContext
30 {
31     Error *err;
32     JSONToken *current;
33     GQueue *buf;
34 } JSONParserContext;
35 
36 #define BUG_ON(cond) assert(!(cond))
37 
38 /**
39  * TODO
40  *
41  * 0) make errors meaningful again
42  * 1) add geometry information to tokens
43  * 3) should we return a parsed size?
44  * 4) deal with premature EOI
45  */
46 
47 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48 
49 /**
50  * Error handler
51  */
52 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
53                                            JSONToken *token, const char *msg, ...)
54 {
55     va_list ap;
56     char message[1024];
57 
58     if (ctxt->err) {
59         return;
60     }
61     va_start(ap, msg);
62     vsnprintf(message, sizeof(message), msg, ap);
63     va_end(ap);
64     error_setg(&ctxt->err, "JSON parse error, %s", message);
65 }
66 
67 /**
68  * String helpers
69  *
70  * These helpers are used to unescape strings.
71  */
72 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
73 {
74     if (wchar <= 0x007F) {
75         BUG_ON(buffer_length < 2);
76 
77         buffer[0] = wchar & 0x7F;
78         buffer[1] = 0;
79     } else if (wchar <= 0x07FF) {
80         BUG_ON(buffer_length < 3);
81 
82         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
83         buffer[1] = 0x80 | (wchar & 0x3F);
84         buffer[2] = 0;
85     } else {
86         BUG_ON(buffer_length < 4);
87 
88         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
89         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
90         buffer[2] = 0x80 | (wchar & 0x3F);
91         buffer[3] = 0;
92     }
93 }
94 
95 static int hex2decimal(char ch)
96 {
97     if (ch >= '0' && ch <= '9') {
98         return (ch - '0');
99     } else if (ch >= 'a' && ch <= 'f') {
100         return 10 + (ch - 'a');
101     } else if (ch >= 'A' && ch <= 'F') {
102         return 10 + (ch - 'A');
103     }
104 
105     return -1;
106 }
107 
108 /**
109  * parse_string(): Parse a json string and return a QObject
110  *
111  *  string
112  *      ""
113  *      " chars "
114  *  chars
115  *      char
116  *      char chars
117  *  char
118  *      any-Unicode-character-
119  *          except-"-or-\-or-
120  *          control-character
121  *      \"
122  *      \\
123  *      \/
124  *      \b
125  *      \f
126  *      \n
127  *      \r
128  *      \t
129  *      \u four-hex-digits
130  */
131 static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
132                                          JSONToken *token)
133 {
134     const char *ptr = token->str;
135     QString *str;
136     char quote;
137     int cp;
138     char *end;
139     ssize_t len;
140     char utf8_buf[5];
141 
142     assert(*ptr == '"' || *ptr == '\'');
143     quote = *ptr++;
144     str = qstring_new();
145 
146     while (*ptr != quote) {
147         assert(*ptr);
148         if (*ptr == '\\') {
149             ptr++;
150             switch (*ptr++) {
151             case '"':
152                 qstring_append(str, "\"");
153                 break;
154             case '\'':
155                 qstring_append(str, "'");
156                 break;
157             case '\\':
158                 qstring_append(str, "\\");
159                 break;
160             case '/':
161                 qstring_append(str, "/");
162                 break;
163             case 'b':
164                 qstring_append(str, "\b");
165                 break;
166             case 'f':
167                 qstring_append(str, "\f");
168                 break;
169             case 'n':
170                 qstring_append(str, "\n");
171                 break;
172             case 'r':
173                 qstring_append(str, "\r");
174                 break;
175             case 't':
176                 qstring_append(str, "\t");
177                 break;
178             case 'u': {
179                 uint16_t unicode_char = 0;
180                 char utf8_char[4];
181                 int i = 0;
182 
183                 for (i = 0; i < 4; i++) {
184                     if (qemu_isxdigit(*ptr)) {
185                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
186                     } else {
187                         parse_error(ctxt, token,
188                                     "invalid hex escape sequence in string");
189                         goto out;
190                     }
191                     ptr++;
192                 }
193 
194                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
195                 qstring_append(str, utf8_char);
196             }   break;
197             default:
198                 parse_error(ctxt, token, "invalid escape sequence in string");
199                 goto out;
200             }
201         } else {
202             cp = mod_utf8_codepoint(ptr, 6, &end);
203             if (cp < 0) {
204                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
205                 goto out;
206             }
207             ptr = end;
208             len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
209             assert(len >= 0);
210             qstring_append(str, utf8_buf);
211         }
212     }
213 
214     return str;
215 
216 out:
217     qobject_unref(str);
218     return NULL;
219 }
220 
221 /* Note: the token object returned by parser_context_peek_token or
222  * parser_context_pop_token is deleted as soon as parser_context_pop_token
223  * is called again.
224  */
225 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
226 {
227     g_free(ctxt->current);
228     assert(!g_queue_is_empty(ctxt->buf));
229     ctxt->current = g_queue_pop_head(ctxt->buf);
230     return ctxt->current;
231 }
232 
233 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
234 {
235     assert(!g_queue_is_empty(ctxt->buf));
236     return g_queue_peek_head(ctxt->buf);
237 }
238 
239 static JSONParserContext *parser_context_new(GQueue *tokens)
240 {
241     JSONParserContext *ctxt;
242 
243     if (!tokens) {
244         return NULL;
245     }
246 
247     ctxt = g_malloc0(sizeof(JSONParserContext));
248     ctxt->buf = tokens;
249 
250     return ctxt;
251 }
252 
253 /* to support error propagation, ctxt->err must be freed separately */
254 static void parser_context_free(JSONParserContext *ctxt)
255 {
256     if (ctxt) {
257         while (!g_queue_is_empty(ctxt->buf)) {
258             parser_context_pop_token(ctxt);
259         }
260         g_free(ctxt->current);
261         g_queue_free(ctxt->buf);
262         g_free(ctxt);
263     }
264 }
265 
266 /**
267  * Parsing rules
268  */
269 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
270 {
271     QObject *value;
272     QString *key = NULL;
273     JSONToken *peek, *token;
274 
275     peek = parser_context_peek_token(ctxt);
276     if (peek == NULL) {
277         parse_error(ctxt, NULL, "premature EOI");
278         goto out;
279     }
280 
281     key = qobject_to(QString, parse_value(ctxt, ap));
282     if (!key) {
283         parse_error(ctxt, peek, "key is not a string in object");
284         goto out;
285     }
286 
287     token = parser_context_pop_token(ctxt);
288     if (token == NULL) {
289         parse_error(ctxt, NULL, "premature EOI");
290         goto out;
291     }
292 
293     if (token->type != JSON_COLON) {
294         parse_error(ctxt, token, "missing : in object pair");
295         goto out;
296     }
297 
298     value = parse_value(ctxt, ap);
299     if (value == NULL) {
300         parse_error(ctxt, token, "Missing value in dict");
301         goto out;
302     }
303 
304     qdict_put_obj(dict, qstring_get_str(key), value);
305 
306     qobject_unref(key);
307 
308     return 0;
309 
310 out:
311     qobject_unref(key);
312 
313     return -1;
314 }
315 
316 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
317 {
318     QDict *dict = NULL;
319     JSONToken *token, *peek;
320 
321     token = parser_context_pop_token(ctxt);
322     assert(token && token->type == JSON_LCURLY);
323 
324     dict = qdict_new();
325 
326     peek = parser_context_peek_token(ctxt);
327     if (peek == NULL) {
328         parse_error(ctxt, NULL, "premature EOI");
329         goto out;
330     }
331 
332     if (peek->type != JSON_RCURLY) {
333         if (parse_pair(ctxt, dict, ap) == -1) {
334             goto out;
335         }
336 
337         token = parser_context_pop_token(ctxt);
338         if (token == NULL) {
339             parse_error(ctxt, NULL, "premature EOI");
340             goto out;
341         }
342 
343         while (token->type != JSON_RCURLY) {
344             if (token->type != JSON_COMMA) {
345                 parse_error(ctxt, token, "expected separator in dict");
346                 goto out;
347             }
348 
349             if (parse_pair(ctxt, dict, ap) == -1) {
350                 goto out;
351             }
352 
353             token = parser_context_pop_token(ctxt);
354             if (token == NULL) {
355                 parse_error(ctxt, NULL, "premature EOI");
356                 goto out;
357             }
358         }
359     } else {
360         (void)parser_context_pop_token(ctxt);
361     }
362 
363     return QOBJECT(dict);
364 
365 out:
366     qobject_unref(dict);
367     return NULL;
368 }
369 
370 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
371 {
372     QList *list = NULL;
373     JSONToken *token, *peek;
374 
375     token = parser_context_pop_token(ctxt);
376     assert(token && token->type == JSON_LSQUARE);
377 
378     list = qlist_new();
379 
380     peek = parser_context_peek_token(ctxt);
381     if (peek == NULL) {
382         parse_error(ctxt, NULL, "premature EOI");
383         goto out;
384     }
385 
386     if (peek->type != JSON_RSQUARE) {
387         QObject *obj;
388 
389         obj = parse_value(ctxt, ap);
390         if (obj == NULL) {
391             parse_error(ctxt, token, "expecting value");
392             goto out;
393         }
394 
395         qlist_append_obj(list, obj);
396 
397         token = parser_context_pop_token(ctxt);
398         if (token == NULL) {
399             parse_error(ctxt, NULL, "premature EOI");
400             goto out;
401         }
402 
403         while (token->type != JSON_RSQUARE) {
404             if (token->type != JSON_COMMA) {
405                 parse_error(ctxt, token, "expected separator in list");
406                 goto out;
407             }
408 
409             obj = parse_value(ctxt, ap);
410             if (obj == NULL) {
411                 parse_error(ctxt, token, "expecting value");
412                 goto out;
413             }
414 
415             qlist_append_obj(list, obj);
416 
417             token = parser_context_pop_token(ctxt);
418             if (token == NULL) {
419                 parse_error(ctxt, NULL, "premature EOI");
420                 goto out;
421             }
422         }
423     } else {
424         (void)parser_context_pop_token(ctxt);
425     }
426 
427     return QOBJECT(list);
428 
429 out:
430     qobject_unref(list);
431     return NULL;
432 }
433 
434 static QObject *parse_keyword(JSONParserContext *ctxt)
435 {
436     JSONToken *token;
437 
438     token = parser_context_pop_token(ctxt);
439     assert(token && token->type == JSON_KEYWORD);
440 
441     if (!strcmp(token->str, "true")) {
442         return QOBJECT(qbool_from_bool(true));
443     } else if (!strcmp(token->str, "false")) {
444         return QOBJECT(qbool_from_bool(false));
445     } else if (!strcmp(token->str, "null")) {
446         return QOBJECT(qnull());
447     }
448     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
449     return NULL;
450 }
451 
452 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
453 {
454     JSONToken *token;
455 
456     if (ap == NULL) {
457         return NULL;
458     }
459 
460     token = parser_context_pop_token(ctxt);
461     assert(token && token->type == JSON_ESCAPE);
462 
463     if (!strcmp(token->str, "%p")) {
464         return va_arg(*ap, QObject *);
465     } else if (!strcmp(token->str, "%i")) {
466         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
467     } else if (!strcmp(token->str, "%d")) {
468         return QOBJECT(qnum_from_int(va_arg(*ap, int)));
469     } else if (!strcmp(token->str, "%ld")) {
470         return QOBJECT(qnum_from_int(va_arg(*ap, long)));
471     } else if (!strcmp(token->str, "%lld") ||
472                !strcmp(token->str, "%I64d")) {
473         return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
474     } else if (!strcmp(token->str, "%u")) {
475         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
476     } else if (!strcmp(token->str, "%lu")) {
477         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
478     } else if (!strcmp(token->str, "%llu") ||
479                !strcmp(token->str, "%I64u")) {
480         return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
481     } else if (!strcmp(token->str, "%s")) {
482         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
483     } else if (!strcmp(token->str, "%f")) {
484         return QOBJECT(qnum_from_double(va_arg(*ap, double)));
485     }
486     return NULL;
487 }
488 
489 static QObject *parse_literal(JSONParserContext *ctxt)
490 {
491     JSONToken *token;
492 
493     token = parser_context_pop_token(ctxt);
494     assert(token);
495 
496     switch (token->type) {
497     case JSON_STRING:
498         return QOBJECT(qstring_from_escaped_str(ctxt, token));
499     case JSON_INTEGER: {
500         /*
501          * Represent JSON_INTEGER as QNUM_I64 if possible, else as
502          * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
503          * and qemu_strtou64() fail with ERANGE when it's not
504          * possible.
505          *
506          * qnum_get_int() will then work for any signed 64-bit
507          * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
508          * integer, and qnum_get_double() both for any JSON_INTEGER
509          * and any JSON_FLOAT (with precision loss for integers beyond
510          * 53 bits)
511          */
512         int ret;
513         int64_t value;
514         uint64_t uvalue;
515 
516         ret = qemu_strtoi64(token->str, NULL, 10, &value);
517         if (!ret) {
518             return QOBJECT(qnum_from_int(value));
519         }
520         assert(ret == -ERANGE);
521 
522         if (token->str[0] != '-') {
523             ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
524             if (!ret) {
525                 return QOBJECT(qnum_from_uint(uvalue));
526             }
527             assert(ret == -ERANGE);
528         }
529         /* fall through to JSON_FLOAT */
530     }
531     case JSON_FLOAT:
532         /* FIXME dependent on locale; a pervasive issue in QEMU */
533         /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
534          * but those might be useful extensions beyond JSON */
535         return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
536     default:
537         abort();
538     }
539 }
540 
541 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
542 {
543     JSONToken *token;
544 
545     token = parser_context_peek_token(ctxt);
546     if (token == NULL) {
547         parse_error(ctxt, NULL, "premature EOI");
548         return NULL;
549     }
550 
551     switch (token->type) {
552     case JSON_LCURLY:
553         return parse_object(ctxt, ap);
554     case JSON_LSQUARE:
555         return parse_array(ctxt, ap);
556     case JSON_ESCAPE:
557         return parse_escape(ctxt, ap);
558     case JSON_INTEGER:
559     case JSON_FLOAT:
560     case JSON_STRING:
561         return parse_literal(ctxt);
562     case JSON_KEYWORD:
563         return parse_keyword(ctxt);
564     default:
565         parse_error(ctxt, token, "expecting value");
566         return NULL;
567     }
568 }
569 
570 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
571 {
572     return json_parser_parse_err(tokens, ap, NULL);
573 }
574 
575 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
576 {
577     JSONParserContext *ctxt = parser_context_new(tokens);
578     QObject *result;
579 
580     if (!ctxt) {
581         return NULL;
582     }
583 
584     result = parse_value(ctxt, ap);
585 
586     error_propagate(errp, ctxt->err);
587 
588     parser_context_free(ctxt);
589 
590     return result;
591 }
592