xref: /qemu/qobject/json-parser.c (revision 95385fe9)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include <stdarg.h>
15 
16 #include "qemu-common.h"
17 #include "qapi/qmp/qstring.h"
18 #include "qapi/qmp/qint.h"
19 #include "qapi/qmp/qdict.h"
20 #include "qapi/qmp/qlist.h"
21 #include "qapi/qmp/qfloat.h"
22 #include "qapi/qmp/qbool.h"
23 #include "qapi/qmp/json-parser.h"
24 #include "qapi/qmp/json-lexer.h"
25 
26 typedef struct JSONParserContext
27 {
28     Error *err;
29     QObject *current;
30     GQueue *buf;
31 } JSONParserContext;
32 
33 #define BUG_ON(cond) assert(!(cond))
34 
35 /**
36  * TODO
37  *
38  * 0) make errors meaningful again
39  * 1) add geometry information to tokens
40  * 3) should we return a parsed size?
41  * 4) deal with premature EOI
42  */
43 
44 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
45 
46 /**
47  * Token manipulators
48  *
49  * tokens are dictionaries that contain a type, a string value, and geometry information
50  * about a token identified by the lexer.  These are routines that make working with
51  * these objects a bit easier.
52  */
53 static const char *token_get_value(QObject *obj)
54 {
55     return qdict_get_str(qobject_to_qdict(obj), "token");
56 }
57 
58 static JSONTokenType token_get_type(QObject *obj)
59 {
60     return qdict_get_int(qobject_to_qdict(obj), "type");
61 }
62 
63 /**
64  * Error handler
65  */
66 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
67                                            QObject *token, const char *msg, ...)
68 {
69     va_list ap;
70     char message[1024];
71     va_start(ap, msg);
72     vsnprintf(message, sizeof(message), msg, ap);
73     va_end(ap);
74     if (ctxt->err) {
75         error_free(ctxt->err);
76         ctxt->err = NULL;
77     }
78     error_setg(&ctxt->err, "JSON parse error, %s", message);
79 }
80 
81 /**
82  * String helpers
83  *
84  * These helpers are used to unescape strings.
85  */
86 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
87 {
88     if (wchar <= 0x007F) {
89         BUG_ON(buffer_length < 2);
90 
91         buffer[0] = wchar & 0x7F;
92         buffer[1] = 0;
93     } else if (wchar <= 0x07FF) {
94         BUG_ON(buffer_length < 3);
95 
96         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
97         buffer[1] = 0x80 | (wchar & 0x3F);
98         buffer[2] = 0;
99     } else {
100         BUG_ON(buffer_length < 4);
101 
102         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
103         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
104         buffer[2] = 0x80 | (wchar & 0x3F);
105         buffer[3] = 0;
106     }
107 }
108 
109 static int hex2decimal(char ch)
110 {
111     if (ch >= '0' && ch <= '9') {
112         return (ch - '0');
113     } else if (ch >= 'a' && ch <= 'f') {
114         return 10 + (ch - 'a');
115     } else if (ch >= 'A' && ch <= 'F') {
116         return 10 + (ch - 'A');
117     }
118 
119     return -1;
120 }
121 
122 /**
123  * parse_string(): Parse a json string and return a QObject
124  *
125  *  string
126  *      ""
127  *      " chars "
128  *  chars
129  *      char
130  *      char chars
131  *  char
132  *      any-Unicode-character-
133  *          except-"-or-\-or-
134  *          control-character
135  *      \"
136  *      \\
137  *      \/
138  *      \b
139  *      \f
140  *      \n
141  *      \r
142  *      \t
143  *      \u four-hex-digits
144  */
145 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
146 {
147     const char *ptr = token_get_value(token);
148     QString *str;
149     int double_quote = 1;
150 
151     if (*ptr == '"') {
152         double_quote = 1;
153     } else {
154         double_quote = 0;
155     }
156     ptr++;
157 
158     str = qstring_new();
159     while (*ptr &&
160            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
161         if (*ptr == '\\') {
162             ptr++;
163 
164             switch (*ptr) {
165             case '"':
166                 qstring_append(str, "\"");
167                 ptr++;
168                 break;
169             case '\'':
170                 qstring_append(str, "'");
171                 ptr++;
172                 break;
173             case '\\':
174                 qstring_append(str, "\\");
175                 ptr++;
176                 break;
177             case '/':
178                 qstring_append(str, "/");
179                 ptr++;
180                 break;
181             case 'b':
182                 qstring_append(str, "\b");
183                 ptr++;
184                 break;
185             case 'f':
186                 qstring_append(str, "\f");
187                 ptr++;
188                 break;
189             case 'n':
190                 qstring_append(str, "\n");
191                 ptr++;
192                 break;
193             case 'r':
194                 qstring_append(str, "\r");
195                 ptr++;
196                 break;
197             case 't':
198                 qstring_append(str, "\t");
199                 ptr++;
200                 break;
201             case 'u': {
202                 uint16_t unicode_char = 0;
203                 char utf8_char[4];
204                 int i = 0;
205 
206                 ptr++;
207 
208                 for (i = 0; i < 4; i++) {
209                     if (qemu_isxdigit(*ptr)) {
210                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
211                     } else {
212                         parse_error(ctxt, token,
213                                     "invalid hex escape sequence in string");
214                         goto out;
215                     }
216                     ptr++;
217                 }
218 
219                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
220                 qstring_append(str, utf8_char);
221             }   break;
222             default:
223                 parse_error(ctxt, token, "invalid escape sequence in string");
224                 goto out;
225             }
226         } else {
227             char dummy[2];
228 
229             dummy[0] = *ptr++;
230             dummy[1] = 0;
231 
232             qstring_append(str, dummy);
233         }
234     }
235 
236     return str;
237 
238 out:
239     QDECREF(str);
240     return NULL;
241 }
242 
243 /* Note: unless the token object returned by parser_context_peek_token
244  * or parser_context_pop_token is explicitly incref'd, it will be
245  * deleted as soon as parser_context_pop_token is called again.
246  */
247 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
248 {
249     qobject_decref(ctxt->current);
250     assert(!g_queue_is_empty(ctxt->buf));
251     ctxt->current = g_queue_pop_head(ctxt->buf);
252     return ctxt->current;
253 }
254 
255 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
256 {
257     assert(!g_queue_is_empty(ctxt->buf));
258     return g_queue_peek_head(ctxt->buf);
259 }
260 
261 static JSONParserContext *parser_context_new(GQueue *tokens)
262 {
263     JSONParserContext *ctxt;
264 
265     if (!tokens) {
266         return NULL;
267     }
268 
269     ctxt = g_malloc0(sizeof(JSONParserContext));
270     ctxt->buf = tokens;
271 
272     return ctxt;
273 }
274 
275 /* to support error propagation, ctxt->err must be freed separately */
276 static void parser_context_free(JSONParserContext *ctxt)
277 {
278     if (ctxt) {
279         while (!g_queue_is_empty(ctxt->buf)) {
280             parser_context_pop_token(ctxt);
281         }
282         qobject_decref(ctxt->current);
283         g_queue_free(ctxt->buf);
284         g_free(ctxt);
285     }
286 }
287 
288 /**
289  * Parsing rules
290  */
291 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
292 {
293     QObject *key = NULL, *token = NULL, *value, *peek;
294 
295     peek = parser_context_peek_token(ctxt);
296     if (peek == NULL) {
297         parse_error(ctxt, NULL, "premature EOI");
298         goto out;
299     }
300 
301     key = parse_value(ctxt, ap);
302     if (!key || qobject_type(key) != QTYPE_QSTRING) {
303         parse_error(ctxt, peek, "key is not a string in object");
304         goto out;
305     }
306 
307     token = parser_context_pop_token(ctxt);
308     if (token == NULL) {
309         parse_error(ctxt, NULL, "premature EOI");
310         goto out;
311     }
312 
313     if (token_get_type(token) != JSON_COLON) {
314         parse_error(ctxt, token, "missing : in object pair");
315         goto out;
316     }
317 
318     value = parse_value(ctxt, ap);
319     if (value == NULL) {
320         parse_error(ctxt, token, "Missing value in dict");
321         goto out;
322     }
323 
324     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
325 
326     qobject_decref(key);
327 
328     return 0;
329 
330 out:
331     qobject_decref(key);
332 
333     return -1;
334 }
335 
336 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
337 {
338     QDict *dict = NULL;
339     QObject *token, *peek;
340 
341     token = parser_context_pop_token(ctxt);
342     assert(token && token_get_type(token) == JSON_LCURLY);
343 
344     dict = qdict_new();
345 
346     peek = parser_context_peek_token(ctxt);
347     if (peek == NULL) {
348         parse_error(ctxt, NULL, "premature EOI");
349         goto out;
350     }
351 
352     if (token_get_type(peek) != JSON_RCURLY) {
353         if (parse_pair(ctxt, dict, ap) == -1) {
354             goto out;
355         }
356 
357         token = parser_context_pop_token(ctxt);
358         if (token == NULL) {
359             parse_error(ctxt, NULL, "premature EOI");
360             goto out;
361         }
362 
363         while (token_get_type(token) != JSON_RCURLY) {
364             if (token_get_type(token) != JSON_COMMA) {
365                 parse_error(ctxt, token, "expected separator in dict");
366                 goto out;
367             }
368 
369             if (parse_pair(ctxt, dict, ap) == -1) {
370                 goto out;
371             }
372 
373             token = parser_context_pop_token(ctxt);
374             if (token == NULL) {
375                 parse_error(ctxt, NULL, "premature EOI");
376                 goto out;
377             }
378         }
379     } else {
380         (void)parser_context_pop_token(ctxt);
381     }
382 
383     return QOBJECT(dict);
384 
385 out:
386     QDECREF(dict);
387     return NULL;
388 }
389 
390 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
391 {
392     QList *list = NULL;
393     QObject *token, *peek;
394 
395     token = parser_context_pop_token(ctxt);
396     assert(token && token_get_type(token) == JSON_LSQUARE);
397 
398     list = qlist_new();
399 
400     peek = parser_context_peek_token(ctxt);
401     if (peek == NULL) {
402         parse_error(ctxt, NULL, "premature EOI");
403         goto out;
404     }
405 
406     if (token_get_type(peek) != JSON_RSQUARE) {
407         QObject *obj;
408 
409         obj = parse_value(ctxt, ap);
410         if (obj == NULL) {
411             parse_error(ctxt, token, "expecting value");
412             goto out;
413         }
414 
415         qlist_append_obj(list, obj);
416 
417         token = parser_context_pop_token(ctxt);
418         if (token == NULL) {
419             parse_error(ctxt, NULL, "premature EOI");
420             goto out;
421         }
422 
423         while (token_get_type(token) != JSON_RSQUARE) {
424             if (token_get_type(token) != JSON_COMMA) {
425                 parse_error(ctxt, token, "expected separator in list");
426                 goto out;
427             }
428 
429             obj = parse_value(ctxt, ap);
430             if (obj == NULL) {
431                 parse_error(ctxt, token, "expecting value");
432                 goto out;
433             }
434 
435             qlist_append_obj(list, obj);
436 
437             token = parser_context_pop_token(ctxt);
438             if (token == NULL) {
439                 parse_error(ctxt, NULL, "premature EOI");
440                 goto out;
441             }
442         }
443     } else {
444         (void)parser_context_pop_token(ctxt);
445     }
446 
447     return QOBJECT(list);
448 
449 out:
450     QDECREF(list);
451     return NULL;
452 }
453 
454 static QObject *parse_keyword(JSONParserContext *ctxt)
455 {
456     QObject *token;
457     const char *val;
458 
459     token = parser_context_pop_token(ctxt);
460     assert(token && token_get_type(token) == JSON_KEYWORD);
461     val = token_get_value(token);
462 
463     if (!strcmp(val, "true")) {
464         return QOBJECT(qbool_from_bool(true));
465     } else if (!strcmp(val, "false")) {
466         return QOBJECT(qbool_from_bool(false));
467     } else if (!strcmp(val, "null")) {
468         return qnull();
469     }
470     parse_error(ctxt, token, "invalid keyword '%s'", val);
471     return NULL;
472 }
473 
474 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
475 {
476     QObject *token;
477     const char *val;
478 
479     if (ap == NULL) {
480         return NULL;
481     }
482 
483     token = parser_context_pop_token(ctxt);
484     assert(token && token_get_type(token) == JSON_ESCAPE);
485     val = token_get_value(token);
486 
487     if (!strcmp(val, "%p")) {
488         return va_arg(*ap, QObject *);
489     } else if (!strcmp(val, "%i")) {
490         return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
491     } else if (!strcmp(val, "%d")) {
492         return QOBJECT(qint_from_int(va_arg(*ap, int)));
493     } else if (!strcmp(val, "%ld")) {
494         return QOBJECT(qint_from_int(va_arg(*ap, long)));
495     } else if (!strcmp(val, "%lld") ||
496                !strcmp(val, "%I64d")) {
497         return QOBJECT(qint_from_int(va_arg(*ap, long long)));
498     } else if (!strcmp(val, "%s")) {
499         return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
500     } else if (!strcmp(val, "%f")) {
501         return QOBJECT(qfloat_from_double(va_arg(*ap, double)));
502     }
503     return NULL;
504 }
505 
506 static QObject *parse_literal(JSONParserContext *ctxt)
507 {
508     QObject *token;
509 
510     token = parser_context_pop_token(ctxt);
511     assert(token);
512 
513     switch (token_get_type(token)) {
514     case JSON_STRING:
515         return QOBJECT(qstring_from_escaped_str(ctxt, token));
516     case JSON_INTEGER: {
517         /* A possibility exists that this is a whole-valued float where the
518          * fractional part was left out due to being 0 (.0). It's not a big
519          * deal to treat these as ints in the parser, so long as users of the
520          * resulting QObject know to expect a QInt in place of a QFloat in
521          * cases like these.
522          *
523          * However, in some cases these values will overflow/underflow a
524          * QInt/int64 container, thus we should assume these are to be handled
525          * as QFloats/doubles rather than silently changing their values.
526          *
527          * strtoll() indicates these instances by setting errno to ERANGE
528          */
529         int64_t value;
530 
531         errno = 0; /* strtoll doesn't set errno on success */
532         value = strtoll(token_get_value(token), NULL, 10);
533         if (errno != ERANGE) {
534             return QOBJECT(qint_from_int(value));
535         }
536         /* fall through to JSON_FLOAT */
537     }
538     case JSON_FLOAT:
539         /* FIXME dependent on locale */
540         return QOBJECT(qfloat_from_double(strtod(token_get_value(token),
541                                                  NULL)));
542     default:
543         abort();
544     }
545 }
546 
547 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
548 {
549     QObject *token;
550 
551     token = parser_context_peek_token(ctxt);
552     if (token == NULL) {
553         parse_error(ctxt, NULL, "premature EOI");
554         return NULL;
555     }
556 
557     switch (token_get_type(token)) {
558     case JSON_LCURLY:
559         return parse_object(ctxt, ap);
560     case JSON_LSQUARE:
561         return parse_array(ctxt, ap);
562     case JSON_ESCAPE:
563         return parse_escape(ctxt, ap);
564     case JSON_INTEGER:
565     case JSON_FLOAT:
566     case JSON_STRING:
567         return parse_literal(ctxt);
568     case JSON_KEYWORD:
569         return parse_keyword(ctxt);
570     default:
571         parse_error(ctxt, token, "expecting value");
572         return NULL;
573     }
574 }
575 
576 QObject *json_parser_parse(GQueue *tokens, va_list *ap)
577 {
578     return json_parser_parse_err(tokens, ap, NULL);
579 }
580 
581 QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
582 {
583     JSONParserContext *ctxt = parser_context_new(tokens);
584     QObject *result;
585 
586     if (!ctxt) {
587         return NULL;
588     }
589 
590     result = parse_value(ctxt, ap);
591 
592     error_propagate(errp, ctxt->err);
593 
594     parser_context_free(ctxt);
595 
596     return result;
597 }
598