xref: /qemu/qobject/json-parser.c (revision 6f061ea1)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 
16 #include "qapi/error.h"
17 #include "qemu-common.h"
18 #include "qapi/qmp/qstring.h"
19 #include "qapi/qmp/qint.h"
20 #include "qapi/qmp/qdict.h"
21 #include "qapi/qmp/qlist.h"
22 #include "qapi/qmp/qfloat.h"
23 #include "qapi/qmp/qbool.h"
24 #include "qapi/qmp/json-parser.h"
25 #include "qapi/qmp/json-lexer.h"
26 #include "qapi/qmp/json-streamer.h"
27 
28 typedef struct JSONParserContext
29 {
30     Error *err;
31     JSONToken *current;
32     GQueue *buf;
33 } JSONParserContext;
34 
35 #define BUG_ON(cond) assert(!(cond))
36 
37 /**
38  * TODO
39  *
40  * 0) make errors meaningful again
41  * 1) add geometry information to tokens
42  * 3) should we return a parsed size?
43  * 4) deal with premature EOI
44  */
45 
46 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
47 
48 /**
49  * Error handler
50  */
51 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
52                                            JSONToken *token, const char *msg, ...)
53 {
54     va_list ap;
55     char message[1024];
56     va_start(ap, msg);
57     vsnprintf(message, sizeof(message), msg, ap);
58     va_end(ap);
59     if (ctxt->err) {
60         error_free(ctxt->err);
61         ctxt->err = NULL;
62     }
63     error_setg(&ctxt->err, "JSON parse error, %s", message);
64 }
65 
66 /**
67  * String helpers
68  *
69  * These helpers are used to unescape strings.
70  */
71 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
72 {
73     if (wchar <= 0x007F) {
74         BUG_ON(buffer_length < 2);
75 
76         buffer[0] = wchar & 0x7F;
77         buffer[1] = 0;
78     } else if (wchar <= 0x07FF) {
79         BUG_ON(buffer_length < 3);
80 
81         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
82         buffer[1] = 0x80 | (wchar & 0x3F);
83         buffer[2] = 0;
84     } else {
85         BUG_ON(buffer_length < 4);
86 
87         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
88         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
89         buffer[2] = 0x80 | (wchar & 0x3F);
90         buffer[3] = 0;
91     }
92 }
93 
94 static int hex2decimal(char ch)
95 {
96     if (ch >= '0' && ch <= '9') {
97         return (ch - '0');
98     } else if (ch >= 'a' && ch <= 'f') {
99         return 10 + (ch - 'a');
100     } else if (ch >= 'A' && ch <= 'F') {
101         return 10 + (ch - 'A');
102     }
103 
104     return -1;
105 }
106 
107 /**
108  * parse_string(): Parse a json string and return a QObject
109  *
110  *  string
111  *      ""
112  *      " chars "
113  *  chars
114  *      char
115  *      char chars
116  *  char
117  *      any-Unicode-character-
118  *          except-"-or-\-or-
119  *          control-character
120  *      \"
121  *      \\
122  *      \/
123  *      \b
124  *      \f
125  *      \n
126  *      \r
127  *      \t
128  *      \u four-hex-digits
129  */
130 static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
131                                          JSONToken *token)
132 {
133     const char *ptr = token->str;
134     QString *str;
135     int double_quote = 1;
136 
137     if (*ptr == '"') {
138         double_quote = 1;
139     } else {
140         double_quote = 0;
141     }
142     ptr++;
143 
144     str = qstring_new();
145     while (*ptr &&
146            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
147         if (*ptr == '\\') {
148             ptr++;
149 
150             switch (*ptr) {
151             case '"':
152                 qstring_append(str, "\"");
153                 ptr++;
154                 break;
155             case '\'':
156                 qstring_append(str, "'");
157                 ptr++;
158                 break;
159             case '\\':
160                 qstring_append(str, "\\");
161                 ptr++;
162                 break;
163             case '/':
164                 qstring_append(str, "/");
165                 ptr++;
166                 break;
167             case 'b':
168                 qstring_append(str, "\b");
169                 ptr++;
170                 break;
171             case 'f':
172                 qstring_append(str, "\f");
173                 ptr++;
174                 break;
175             case 'n':
176                 qstring_append(str, "\n");
177                 ptr++;
178                 break;
179             case 'r':
180                 qstring_append(str, "\r");
181                 ptr++;
182                 break;
183             case 't':
184                 qstring_append(str, "\t");
185                 ptr++;
186                 break;
187             case 'u': {
188                 uint16_t unicode_char = 0;
189                 char utf8_char[4];
190                 int i = 0;
191 
192                 ptr++;
193 
194                 for (i = 0; i < 4; i++) {
195                     if (qemu_isxdigit(*ptr)) {
196                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
197                     } else {
198                         parse_error(ctxt, token,
199                                     "invalid hex escape sequence in string");
200                         goto out;
201                     }
202                     ptr++;
203                 }
204 
205                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
206                 qstring_append(str, utf8_char);
207             }   break;
208             default:
209                 parse_error(ctxt, token, "invalid escape sequence in string");
210                 goto out;
211             }
212         } else {
213             char dummy[2];
214 
215             dummy[0] = *ptr++;
216             dummy[1] = 0;
217 
218             qstring_append(str, dummy);
219         }
220     }
221 
222     return str;
223 
224 out:
225     QDECREF(str);
226     return NULL;
227 }
228 
229 /* Note: the token object returned by parser_context_peek_token or
230  * parser_context_pop_token is deleted as soon as parser_context_pop_token
231  * is called again.
232  */
233 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
234 {
235     g_free(ctxt->current);
236     assert(!g_queue_is_empty(ctxt->buf));
237     ctxt->current = g_queue_pop_head(ctxt->buf);
238     return ctxt->current;
239 }
240 
241 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
242 {
243     assert(!g_queue_is_empty(ctxt->buf));
244     return g_queue_peek_head(ctxt->buf);
245 }
246 
247 static JSONParserContext *parser_context_new(GQueue *tokens)
248 {
249     JSONParserContext *ctxt;
250 
251     if (!tokens) {
252         return NULL;
253     }
254 
255     ctxt = g_malloc0(sizeof(JSONParserContext));
256     ctxt->buf = tokens;
257 
258     return ctxt;
259 }
260 
261 /* to support error propagation, ctxt->err must be freed separately */
262 static void parser_context_free(JSONParserContext *ctxt)
263 {
264     if (ctxt) {
265         while (!g_queue_is_empty(ctxt->buf)) {
266             parser_context_pop_token(ctxt);
267         }
268         g_free(ctxt->current);
269         g_queue_free(ctxt->buf);
270         g_free(ctxt);
271     }
272 }
273 
274 /**
275  * Parsing rules
276  */
277 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
278 {
279     QObject *key = NULL, *value;
280     JSONToken *peek, *token;
281 
282     peek = parser_context_peek_token(ctxt);
283     if (peek == NULL) {
284         parse_error(ctxt, NULL, "premature EOI");
285         goto out;
286     }
287 
288     key = parse_value(ctxt, ap);
289     if (!key || qobject_type(key) != QTYPE_QSTRING) {
290         parse_error(ctxt, peek, "key is not a string in object");
291         goto out;
292     }
293 
294     token = parser_context_pop_token(ctxt);
295     if (token == NULL) {
296         parse_error(ctxt, NULL, "premature EOI");
297         goto out;
298     }
299 
300     if (token->type != JSON_COLON) {
301         parse_error(ctxt, token, "missing : in object pair");
302         goto out;
303     }
304 
305     value = parse_value(ctxt, ap);
306     if (value == NULL) {
307         parse_error(ctxt, token, "Missing value in dict");
308         goto out;
309     }
310 
311     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
312 
313     qobject_decref(key);
314 
315     return 0;
316 
317 out:
318     qobject_decref(key);
319 
320     return -1;
321 }
322 
323 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
324 {
325     QDict *dict = NULL;
326     JSONToken *token, *peek;
327 
328     token = parser_context_pop_token(ctxt);
329     assert(token && token->type == JSON_LCURLY);
330 
331     dict = qdict_new();
332 
333     peek = parser_context_peek_token(ctxt);
334     if (peek == NULL) {
335         parse_error(ctxt, NULL, "premature EOI");
336         goto out;
337     }
338 
339     if (peek->type != JSON_RCURLY) {
340         if (parse_pair(ctxt, dict, ap) == -1) {
341             goto out;
342         }
343 
344         token = parser_context_pop_token(ctxt);
345         if (token == NULL) {
346             parse_error(ctxt, NULL, "premature EOI");
347             goto out;
348         }
349 
350         while (token->type != JSON_RCURLY) {
351             if (token->type != JSON_COMMA) {
352                 parse_error(ctxt, token, "expected separator in dict");
353                 goto out;
354             }
355 
356             if (parse_pair(ctxt, dict, ap) == -1) {
357                 goto out;
358             }
359 
360             token = parser_context_pop_token(ctxt);
361             if (token == NULL) {
362                 parse_error(ctxt, NULL, "premature EOI");
363                 goto out;
364             }
365         }
366     } else {
367         (void)parser_context_pop_token(ctxt);
368     }
369 
370     return QOBJECT(dict);
371 
372 out:
373     QDECREF(dict);
374     return NULL;
375 }
376 
377 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
378 {
379     QList *list = NULL;
380     JSONToken *token, *peek;
381 
382     token = parser_context_pop_token(ctxt);
383     assert(token && token->type == JSON_LSQUARE);
384 
385     list = qlist_new();
386 
387     peek = parser_context_peek_token(ctxt);
388     if (peek == NULL) {
389         parse_error(ctxt, NULL, "premature EOI");
390         goto out;
391     }
392 
393     if (peek->type != JSON_RSQUARE) {
394         QObject *obj;
395 
396         obj = parse_value(ctxt, ap);
397         if (obj == NULL) {
398             parse_error(ctxt, token, "expecting value");
399             goto out;
400         }
401 
402         qlist_append_obj(list, obj);
403 
404         token = parser_context_pop_token(ctxt);
405         if (token == NULL) {
406             parse_error(ctxt, NULL, "premature EOI");
407             goto out;
408         }
409 
410         while (token->type != JSON_RSQUARE) {
411             if (token->type != JSON_COMMA) {
412                 parse_error(ctxt, token, "expected separator in list");
413                 goto out;
414             }
415 
416             obj = parse_value(ctxt, ap);
417             if (obj == NULL) {
418                 parse_error(ctxt, token, "expecting value");
419                 goto out;
420             }
421 
422             qlist_append_obj(list, obj);
423 
424             token = parser_context_pop_token(ctxt);
425             if (token == NULL) {
426                 parse_error(ctxt, NULL, "premature EOI");
427                 goto out;
428             }
429         }
430     } else {
431         (void)parser_context_pop_token(ctxt);
432     }
433 
434     return QOBJECT(list);
435 
436 out:
437     QDECREF(list);
438     return NULL;
439 }
440 
441 static QObject *parse_keyword(JSONParserContext *ctxt)
442 {
443     JSONToken *token;
444 
445     token = parser_context_pop_token(ctxt);
446     assert(token && token->type == JSON_KEYWORD);
447 
448     if (!strcmp(token->str, "true")) {
449         return QOBJECT(qbool_from_bool(true));
450     } else if (!strcmp(token->str, "false")) {
451         return QOBJECT(qbool_from_bool(false));
452     } else if (!strcmp(token->str, "null")) {
453         return qnull();
454     }
455     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
456     return NULL;
457 }
458 
459 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
460 {
461     JSONToken *token;
462 
463     if (ap == NULL) {
464         return NULL;
465     }
466 
467     token = parser_context_pop_token(ctxt);
468     assert(token && token->type == JSON_ESCAPE);
469 
470     if (!strcmp(token->str, "%p")) {
471         return va_arg(*ap, QObject *);
472     } else if (!strcmp(token->str, "%i")) {
473         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
474     } else if (!strcmp(token->str, "%d")) {
475         return QOBJECT(qint_from_int(va_arg(*ap, int)));
476     } else if (!strcmp(token->str, "%ld")) {
477         return QOBJECT(qint_from_int(va_arg(*ap, long)));
478     } else if (!strcmp(token->str, "%lld") ||
479                !strcmp(token->str, "%I64d")) {
480         return QOBJECT(qint_from_int(va_arg(*ap, long long)));
481     } else if (!strcmp(token->str, "%s")) {
482         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
483     } else if (!strcmp(token->str, "%f")) {
484         return QOBJECT(qfloat_from_double(va_arg(*ap, double)));
485     }
486     return NULL;
487 }
488 
489 static QObject *parse_literal(JSONParserContext *ctxt)
490 {
491     JSONToken *token;
492 
493     token = parser_context_pop_token(ctxt);
494     assert(token);
495 
496     switch (token->type) {
497     case JSON_STRING:
498         return QOBJECT(qstring_from_escaped_str(ctxt, token));
499     case JSON_INTEGER: {
500         /* A possibility exists that this is a whole-valued float where the
501          * fractional part was left out due to being 0 (.0). It's not a big
502          * deal to treat these as ints in the parser, so long as users of the
503          * resulting QObject know to expect a QInt in place of a QFloat in
504          * cases like these.
505          *
506          * However, in some cases these values will overflow/underflow a
507          * QInt/int64 container, thus we should assume these are to be handled
508          * as QFloats/doubles rather than silently changing their values.
509          *
510          * strtoll() indicates these instances by setting errno to ERANGE
511          */
512         int64_t value;
513 
514         errno = 0; /* strtoll doesn't set errno on success */
515         value = strtoll(token->str, NULL, 10);
516         if (errno != ERANGE) {
517             return QOBJECT(qint_from_int(value));
518         }
519         /* fall through to JSON_FLOAT */
520     }
521     case JSON_FLOAT:
522         /* FIXME dependent on locale; a pervasive issue in QEMU */
523         /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
524          * but those might be useful extensions beyond JSON */
525         return QOBJECT(qfloat_from_double(strtod(token->str, NULL)));
526     default:
527         abort();
528     }
529 }
530 
531 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
532 {
533     JSONToken *token;
534 
535     token = parser_context_peek_token(ctxt);
536     if (token == NULL) {
537         parse_error(ctxt, NULL, "premature EOI");
538         return NULL;
539     }
540 
541     switch (token->type) {
542     case JSON_LCURLY:
543         return parse_object(ctxt, ap);
544     case JSON_LSQUARE:
545         return parse_array(ctxt, ap);
546     case JSON_ESCAPE:
547         return parse_escape(ctxt, ap);
548     case JSON_INTEGER:
549     case JSON_FLOAT:
550     case JSON_STRING:
551         return parse_literal(ctxt);
552     case JSON_KEYWORD:
553         return parse_keyword(ctxt);
554     default:
555         parse_error(ctxt, token, "expecting value");
556         return NULL;
557     }
558 }
559 
560 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
561 {
562     return json_parser_parse_err(tokens, ap, NULL);
563 }
564 
565 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
566 {
567     JSONParserContext *ctxt = parser_context_new(tokens);
568     QObject *result;
569 
570     if (!ctxt) {
571         return NULL;
572     }
573 
574     result = parse_value(ctxt, ap);
575 
576     error_propagate(errp, ctxt->err);
577 
578     parser_context_free(ctxt);
579 
580     return result;
581 }
582