xref: /qemu/qobject/json-parser.c (revision 85aad98a)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "qapi/qmp/types.h"
18 #include "qapi/qmp/json-parser.h"
19 #include "qapi/qmp/json-lexer.h"
20 #include "qapi/qmp/json-streamer.h"
21 
22 typedef struct JSONParserContext
23 {
24     Error *err;
25     JSONToken *current;
26     GQueue *buf;
27 } JSONParserContext;
28 
29 #define BUG_ON(cond) assert(!(cond))
30 
31 /**
32  * TODO
33  *
34  * 0) make errors meaningful again
35  * 1) add geometry information to tokens
36  * 3) should we return a parsed size?
37  * 4) deal with premature EOI
38  */
39 
40 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
41 
42 /**
43  * Error handler
44  */
45 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
46                                            JSONToken *token, const char *msg, ...)
47 {
48     va_list ap;
49     char message[1024];
50     va_start(ap, msg);
51     vsnprintf(message, sizeof(message), msg, ap);
52     va_end(ap);
53     if (ctxt->err) {
54         error_free(ctxt->err);
55         ctxt->err = NULL;
56     }
57     error_setg(&ctxt->err, "JSON parse error, %s", message);
58 }
59 
60 /**
61  * String helpers
62  *
63  * These helpers are used to unescape strings.
64  */
65 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
66 {
67     if (wchar <= 0x007F) {
68         BUG_ON(buffer_length < 2);
69 
70         buffer[0] = wchar & 0x7F;
71         buffer[1] = 0;
72     } else if (wchar <= 0x07FF) {
73         BUG_ON(buffer_length < 3);
74 
75         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
76         buffer[1] = 0x80 | (wchar & 0x3F);
77         buffer[2] = 0;
78     } else {
79         BUG_ON(buffer_length < 4);
80 
81         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
82         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
83         buffer[2] = 0x80 | (wchar & 0x3F);
84         buffer[3] = 0;
85     }
86 }
87 
88 static int hex2decimal(char ch)
89 {
90     if (ch >= '0' && ch <= '9') {
91         return (ch - '0');
92     } else if (ch >= 'a' && ch <= 'f') {
93         return 10 + (ch - 'a');
94     } else if (ch >= 'A' && ch <= 'F') {
95         return 10 + (ch - 'A');
96     }
97 
98     return -1;
99 }
100 
101 /**
102  * parse_string(): Parse a json string and return a QObject
103  *
104  *  string
105  *      ""
106  *      " chars "
107  *  chars
108  *      char
109  *      char chars
110  *  char
111  *      any-Unicode-character-
112  *          except-"-or-\-or-
113  *          control-character
114  *      \"
115  *      \\
116  *      \/
117  *      \b
118  *      \f
119  *      \n
120  *      \r
121  *      \t
122  *      \u four-hex-digits
123  */
124 static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
125                                          JSONToken *token)
126 {
127     const char *ptr = token->str;
128     QString *str;
129     int double_quote = 1;
130 
131     if (*ptr == '"') {
132         double_quote = 1;
133     } else {
134         double_quote = 0;
135     }
136     ptr++;
137 
138     str = qstring_new();
139     while (*ptr &&
140            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
141         if (*ptr == '\\') {
142             ptr++;
143 
144             switch (*ptr) {
145             case '"':
146                 qstring_append(str, "\"");
147                 ptr++;
148                 break;
149             case '\'':
150                 qstring_append(str, "'");
151                 ptr++;
152                 break;
153             case '\\':
154                 qstring_append(str, "\\");
155                 ptr++;
156                 break;
157             case '/':
158                 qstring_append(str, "/");
159                 ptr++;
160                 break;
161             case 'b':
162                 qstring_append(str, "\b");
163                 ptr++;
164                 break;
165             case 'f':
166                 qstring_append(str, "\f");
167                 ptr++;
168                 break;
169             case 'n':
170                 qstring_append(str, "\n");
171                 ptr++;
172                 break;
173             case 'r':
174                 qstring_append(str, "\r");
175                 ptr++;
176                 break;
177             case 't':
178                 qstring_append(str, "\t");
179                 ptr++;
180                 break;
181             case 'u': {
182                 uint16_t unicode_char = 0;
183                 char utf8_char[4];
184                 int i = 0;
185 
186                 ptr++;
187 
188                 for (i = 0; i < 4; i++) {
189                     if (qemu_isxdigit(*ptr)) {
190                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
191                     } else {
192                         parse_error(ctxt, token,
193                                     "invalid hex escape sequence in string");
194                         goto out;
195                     }
196                     ptr++;
197                 }
198 
199                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
200                 qstring_append(str, utf8_char);
201             }   break;
202             default:
203                 parse_error(ctxt, token, "invalid escape sequence in string");
204                 goto out;
205             }
206         } else {
207             char dummy[2];
208 
209             dummy[0] = *ptr++;
210             dummy[1] = 0;
211 
212             qstring_append(str, dummy);
213         }
214     }
215 
216     return str;
217 
218 out:
219     QDECREF(str);
220     return NULL;
221 }
222 
223 /* Note: the token object returned by parser_context_peek_token or
224  * parser_context_pop_token is deleted as soon as parser_context_pop_token
225  * is called again.
226  */
227 static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
228 {
229     g_free(ctxt->current);
230     assert(!g_queue_is_empty(ctxt->buf));
231     ctxt->current = g_queue_pop_head(ctxt->buf);
232     return ctxt->current;
233 }
234 
235 static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
236 {
237     assert(!g_queue_is_empty(ctxt->buf));
238     return g_queue_peek_head(ctxt->buf);
239 }
240 
241 static JSONParserContext *parser_context_new(GQueue *tokens)
242 {
243     JSONParserContext *ctxt;
244 
245     if (!tokens) {
246         return NULL;
247     }
248 
249     ctxt = g_malloc0(sizeof(JSONParserContext));
250     ctxt->buf = tokens;
251 
252     return ctxt;
253 }
254 
255 /* to support error propagation, ctxt->err must be freed separately */
256 static void parser_context_free(JSONParserContext *ctxt)
257 {
258     if (ctxt) {
259         while (!g_queue_is_empty(ctxt->buf)) {
260             parser_context_pop_token(ctxt);
261         }
262         g_free(ctxt->current);
263         g_queue_free(ctxt->buf);
264         g_free(ctxt);
265     }
266 }
267 
268 /**
269  * Parsing rules
270  */
271 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
272 {
273     QObject *key = NULL, *value;
274     JSONToken *peek, *token;
275 
276     peek = parser_context_peek_token(ctxt);
277     if (peek == NULL) {
278         parse_error(ctxt, NULL, "premature EOI");
279         goto out;
280     }
281 
282     key = parse_value(ctxt, ap);
283     if (!key || qobject_type(key) != QTYPE_QSTRING) {
284         parse_error(ctxt, peek, "key is not a string in object");
285         goto out;
286     }
287 
288     token = parser_context_pop_token(ctxt);
289     if (token == NULL) {
290         parse_error(ctxt, NULL, "premature EOI");
291         goto out;
292     }
293 
294     if (token->type != JSON_COLON) {
295         parse_error(ctxt, token, "missing : in object pair");
296         goto out;
297     }
298 
299     value = parse_value(ctxt, ap);
300     if (value == NULL) {
301         parse_error(ctxt, token, "Missing value in dict");
302         goto out;
303     }
304 
305     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
306 
307     qobject_decref(key);
308 
309     return 0;
310 
311 out:
312     qobject_decref(key);
313 
314     return -1;
315 }
316 
317 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
318 {
319     QDict *dict = NULL;
320     JSONToken *token, *peek;
321 
322     token = parser_context_pop_token(ctxt);
323     assert(token && token->type == JSON_LCURLY);
324 
325     dict = qdict_new();
326 
327     peek = parser_context_peek_token(ctxt);
328     if (peek == NULL) {
329         parse_error(ctxt, NULL, "premature EOI");
330         goto out;
331     }
332 
333     if (peek->type != JSON_RCURLY) {
334         if (parse_pair(ctxt, dict, ap) == -1) {
335             goto out;
336         }
337 
338         token = parser_context_pop_token(ctxt);
339         if (token == NULL) {
340             parse_error(ctxt, NULL, "premature EOI");
341             goto out;
342         }
343 
344         while (token->type != JSON_RCURLY) {
345             if (token->type != JSON_COMMA) {
346                 parse_error(ctxt, token, "expected separator in dict");
347                 goto out;
348             }
349 
350             if (parse_pair(ctxt, dict, ap) == -1) {
351                 goto out;
352             }
353 
354             token = parser_context_pop_token(ctxt);
355             if (token == NULL) {
356                 parse_error(ctxt, NULL, "premature EOI");
357                 goto out;
358             }
359         }
360     } else {
361         (void)parser_context_pop_token(ctxt);
362     }
363 
364     return QOBJECT(dict);
365 
366 out:
367     QDECREF(dict);
368     return NULL;
369 }
370 
371 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
372 {
373     QList *list = NULL;
374     JSONToken *token, *peek;
375 
376     token = parser_context_pop_token(ctxt);
377     assert(token && token->type == JSON_LSQUARE);
378 
379     list = qlist_new();
380 
381     peek = parser_context_peek_token(ctxt);
382     if (peek == NULL) {
383         parse_error(ctxt, NULL, "premature EOI");
384         goto out;
385     }
386 
387     if (peek->type != JSON_RSQUARE) {
388         QObject *obj;
389 
390         obj = parse_value(ctxt, ap);
391         if (obj == NULL) {
392             parse_error(ctxt, token, "expecting value");
393             goto out;
394         }
395 
396         qlist_append_obj(list, obj);
397 
398         token = parser_context_pop_token(ctxt);
399         if (token == NULL) {
400             parse_error(ctxt, NULL, "premature EOI");
401             goto out;
402         }
403 
404         while (token->type != JSON_RSQUARE) {
405             if (token->type != JSON_COMMA) {
406                 parse_error(ctxt, token, "expected separator in list");
407                 goto out;
408             }
409 
410             obj = parse_value(ctxt, ap);
411             if (obj == NULL) {
412                 parse_error(ctxt, token, "expecting value");
413                 goto out;
414             }
415 
416             qlist_append_obj(list, obj);
417 
418             token = parser_context_pop_token(ctxt);
419             if (token == NULL) {
420                 parse_error(ctxt, NULL, "premature EOI");
421                 goto out;
422             }
423         }
424     } else {
425         (void)parser_context_pop_token(ctxt);
426     }
427 
428     return QOBJECT(list);
429 
430 out:
431     QDECREF(list);
432     return NULL;
433 }
434 
435 static QObject *parse_keyword(JSONParserContext *ctxt)
436 {
437     JSONToken *token;
438 
439     token = parser_context_pop_token(ctxt);
440     assert(token && token->type == JSON_KEYWORD);
441 
442     if (!strcmp(token->str, "true")) {
443         return QOBJECT(qbool_from_bool(true));
444     } else if (!strcmp(token->str, "false")) {
445         return QOBJECT(qbool_from_bool(false));
446     } else if (!strcmp(token->str, "null")) {
447         return qnull();
448     }
449     parse_error(ctxt, token, "invalid keyword '%s'", token->str);
450     return NULL;
451 }
452 
453 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
454 {
455     JSONToken *token;
456 
457     if (ap == NULL) {
458         return NULL;
459     }
460 
461     token = parser_context_pop_token(ctxt);
462     assert(token && token->type == JSON_ESCAPE);
463 
464     if (!strcmp(token->str, "%p")) {
465         return va_arg(*ap, QObject *);
466     } else if (!strcmp(token->str, "%i")) {
467         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
468     } else if (!strcmp(token->str, "%d")) {
469         return QOBJECT(qint_from_int(va_arg(*ap, int)));
470     } else if (!strcmp(token->str, "%ld")) {
471         return QOBJECT(qint_from_int(va_arg(*ap, long)));
472     } else if (!strcmp(token->str, "%lld") ||
473                !strcmp(token->str, "%I64d")) {
474         return QOBJECT(qint_from_int(va_arg(*ap, long long)));
475     } else if (!strcmp(token->str, "%s")) {
476         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
477     } else if (!strcmp(token->str, "%f")) {
478         return QOBJECT(qfloat_from_double(va_arg(*ap, double)));
479     }
480     return NULL;
481 }
482 
483 static QObject *parse_literal(JSONParserContext *ctxt)
484 {
485     JSONToken *token;
486 
487     token = parser_context_pop_token(ctxt);
488     assert(token);
489 
490     switch (token->type) {
491     case JSON_STRING:
492         return QOBJECT(qstring_from_escaped_str(ctxt, token));
493     case JSON_INTEGER: {
494         /* A possibility exists that this is a whole-valued float where the
495          * fractional part was left out due to being 0 (.0). It's not a big
496          * deal to treat these as ints in the parser, so long as users of the
497          * resulting QObject know to expect a QInt in place of a QFloat in
498          * cases like these.
499          *
500          * However, in some cases these values will overflow/underflow a
501          * QInt/int64 container, thus we should assume these are to be handled
502          * as QFloats/doubles rather than silently changing their values.
503          *
504          * strtoll() indicates these instances by setting errno to ERANGE
505          */
506         int64_t value;
507 
508         errno = 0; /* strtoll doesn't set errno on success */
509         value = strtoll(token->str, NULL, 10);
510         if (errno != ERANGE) {
511             return QOBJECT(qint_from_int(value));
512         }
513         /* fall through to JSON_FLOAT */
514     }
515     case JSON_FLOAT:
516         /* FIXME dependent on locale; a pervasive issue in QEMU */
517         /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
518          * but those might be useful extensions beyond JSON */
519         return QOBJECT(qfloat_from_double(strtod(token->str, NULL)));
520     default:
521         abort();
522     }
523 }
524 
525 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
526 {
527     JSONToken *token;
528 
529     token = parser_context_peek_token(ctxt);
530     if (token == NULL) {
531         parse_error(ctxt, NULL, "premature EOI");
532         return NULL;
533     }
534 
535     switch (token->type) {
536     case JSON_LCURLY:
537         return parse_object(ctxt, ap);
538     case JSON_LSQUARE:
539         return parse_array(ctxt, ap);
540     case JSON_ESCAPE:
541         return parse_escape(ctxt, ap);
542     case JSON_INTEGER:
543     case JSON_FLOAT:
544     case JSON_STRING:
545         return parse_literal(ctxt);
546     case JSON_KEYWORD:
547         return parse_keyword(ctxt);
548     default:
549         parse_error(ctxt, token, "expecting value");
550         return NULL;
551     }
552 }
553 
554 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
555 {
556     return json_parser_parse_err(tokens, ap, NULL);
557 }
558 
559 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
560 {
561     JSONParserContext *ctxt = parser_context_new(tokens);
562     QObject *result;
563 
564     if (!ctxt) {
565         return NULL;
566     }
567 
568     result = parse_value(ctxt, ap);
569 
570     error_propagate(errp, ctxt->err);
571 
572     parser_context_free(ctxt);
573 
574     return result;
575 }
576