1 /*
2  * This file is part of the MicroPython project, http://micropython.org/
3  *
4  * The MIT License (MIT)
5  *
6  * Copyright (c) 2014-2019 Damien P. George
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  */
26 
27 #include <stdio.h>
28 
29 #include "py/objlist.h"
30 #include "py/objstringio.h"
31 #include "py/parsenum.h"
32 #include "py/runtime.h"
33 #include "py/stream.h"
34 
35 #if MICROPY_PY_UJSON
36 
37 #if MICROPY_PY_UJSON_SEPARATORS
38 
39 enum {
40     DUMP_MODE_TO_STRING = 1,
41     DUMP_MODE_TO_STREAM = 2,
42 };
43 
mod_ujson_dump_helper(size_t n_args,const mp_obj_t * pos_args,mp_map_t * kw_args,unsigned int mode)44 STATIC mp_obj_t mod_ujson_dump_helper(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, unsigned int mode) {
45     enum { ARG_separators };
46     static const mp_arg_t allowed_args[] = {
47         { MP_QSTR_separators, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
48     };
49 
50     mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
51     mp_arg_parse_all(n_args - mode, pos_args + mode, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
52 
53     mp_print_ext_t print_ext;
54 
55     if (args[ARG_separators].u_obj == mp_const_none) {
56         print_ext.item_separator = ", ";
57         print_ext.key_separator = ": ";
58     } else {
59         mp_obj_t *items;
60         mp_obj_get_array_fixed_n(args[ARG_separators].u_obj, 2, &items);
61         print_ext.item_separator = mp_obj_str_get_str(items[0]);
62         print_ext.key_separator = mp_obj_str_get_str(items[1]);
63     }
64 
65     if (mode == DUMP_MODE_TO_STRING) {
66         // dumps(obj)
67         vstr_t vstr;
68         vstr_init_print(&vstr, 8, &print_ext.base);
69         mp_obj_print_helper(&print_ext.base, pos_args[0], PRINT_JSON);
70         return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
71     } else {
72         // dump(obj, stream)
73         print_ext.base.data = MP_OBJ_TO_PTR(pos_args[1]);
74         print_ext.base.print_strn = mp_stream_write_adaptor;
75         mp_get_stream_raise(pos_args[1], MP_STREAM_OP_WRITE);
76         mp_obj_print_helper(&print_ext.base, pos_args[0], PRINT_JSON);
77         return mp_const_none;
78     }
79 }
80 
mod_ujson_dump(size_t n_args,const mp_obj_t * pos_args,mp_map_t * kw_args)81 STATIC mp_obj_t mod_ujson_dump(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
82     return mod_ujson_dump_helper(n_args, pos_args, kw_args, DUMP_MODE_TO_STREAM);
83 }
84 STATIC MP_DEFINE_CONST_FUN_OBJ_KW(mod_ujson_dump_obj, 2, mod_ujson_dump);
85 
mod_ujson_dumps(size_t n_args,const mp_obj_t * pos_args,mp_map_t * kw_args)86 STATIC mp_obj_t mod_ujson_dumps(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
87     return mod_ujson_dump_helper(n_args, pos_args, kw_args, DUMP_MODE_TO_STRING);
88 }
89 STATIC MP_DEFINE_CONST_FUN_OBJ_KW(mod_ujson_dumps_obj, 1, mod_ujson_dumps);
90 
91 #else
92 
mod_ujson_dump(mp_obj_t obj,mp_obj_t stream)93 STATIC mp_obj_t mod_ujson_dump(mp_obj_t obj, mp_obj_t stream) {
94     mp_get_stream_raise(stream, MP_STREAM_OP_WRITE);
95     mp_print_t print = {MP_OBJ_TO_PTR(stream), mp_stream_write_adaptor};
96     mp_obj_print_helper(&print, obj, PRINT_JSON);
97     return mp_const_none;
98 }
99 STATIC MP_DEFINE_CONST_FUN_OBJ_2(mod_ujson_dump_obj, mod_ujson_dump);
100 
mod_ujson_dumps(mp_obj_t obj)101 STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
102     vstr_t vstr;
103     mp_print_t print;
104     vstr_init_print(&vstr, 8, &print);
105     mp_obj_print_helper(&print, obj, PRINT_JSON);
106     return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
107 }
108 STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
109 
110 #endif
111 
112 // The function below implements a simple non-recursive JSON parser.
113 //
114 // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
115 // The parser here will parse any valid JSON and return the correct
116 // corresponding Python object.  It allows through a superset of JSON, since
117 // it treats commas and colons as "whitespace", and doesn't care if
118 // brackets/braces are correctly paired.  It will raise a ValueError if the
119 // input is outside it's specs.
120 //
121 // Most of the work is parsing the primitives (null, false, true, numbers,
122 // strings).  It does 1 pass over the input stream.  It tries to be fast and
123 // small in code size, while not using more RAM than necessary.
124 
125 typedef struct _ujson_stream_t {
126     mp_obj_t stream_obj;
127     mp_uint_t (*read)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode);
128     int errcode;
129     byte cur;
130 } ujson_stream_t;
131 
132 #define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker
133 #define S_END(s) ((s).cur == S_EOF)
134 #define S_CUR(s) ((s).cur)
135 #define S_NEXT(s) (ujson_stream_next(&(s)))
136 
ujson_stream_next(ujson_stream_t * s)137 STATIC byte ujson_stream_next(ujson_stream_t *s) {
138     mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode);
139     if (s->errcode != 0) {
140         mp_raise_OSError(s->errcode);
141     }
142     if (ret == 0) {
143         s->cur = S_EOF;
144     }
145     return s->cur;
146 }
147 
mod_ujson_load(mp_obj_t stream_obj)148 STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
149     const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
150     ujson_stream_t s = {stream_obj, stream_p->read, 0, 0};
151     vstr_t vstr;
152     vstr_init(&vstr, 8);
153     mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
154     stack.len = 0;
155     stack.items = NULL;
156     mp_obj_t stack_top = MP_OBJ_NULL;
157     const mp_obj_type_t *stack_top_type = NULL;
158     mp_obj_t stack_key = MP_OBJ_NULL;
159     S_NEXT(s);
160     for (;;) {
161     cont:
162         if (S_END(s)) {
163             break;
164         }
165         mp_obj_t next = MP_OBJ_NULL;
166         bool enter = false;
167         byte cur = S_CUR(s);
168         S_NEXT(s);
169         switch (cur) {
170             case ',':
171             case ':':
172             case ' ':
173             case '\t':
174             case '\n':
175             case '\r':
176                 goto cont;
177             case 'n':
178                 if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') {
179                     S_NEXT(s);
180                     next = mp_const_none;
181                 } else {
182                     goto fail;
183                 }
184                 break;
185             case 'f':
186                 if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') {
187                     S_NEXT(s);
188                     next = mp_const_false;
189                 } else {
190                     goto fail;
191                 }
192                 break;
193             case 't':
194                 if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') {
195                     S_NEXT(s);
196                     next = mp_const_true;
197                 } else {
198                     goto fail;
199                 }
200                 break;
201             case '"':
202                 vstr_reset(&vstr);
203                 for (; !S_END(s) && S_CUR(s) != '"';) {
204                     byte c = S_CUR(s);
205                     if (c == '\\') {
206                         c = S_NEXT(s);
207                         switch (c) {
208                             case 'b':
209                                 c = 0x08;
210                                 break;
211                             case 'f':
212                                 c = 0x0c;
213                                 break;
214                             case 'n':
215                                 c = 0x0a;
216                                 break;
217                             case 'r':
218                                 c = 0x0d;
219                                 break;
220                             case 't':
221                                 c = 0x09;
222                                 break;
223                             case 'u': {
224                                 mp_uint_t num = 0;
225                                 for (int i = 0; i < 4; i++) {
226                                     c = (S_NEXT(s) | 0x20) - '0';
227                                     if (c > 9) {
228                                         c -= ('a' - ('9' + 1));
229                                     }
230                                     num = (num << 4) | c;
231                                 }
232                                 vstr_add_char(&vstr, num);
233                                 goto str_cont;
234                             }
235                         }
236                     }
237                     vstr_add_byte(&vstr, c);
238                 str_cont:
239                     S_NEXT(s);
240                 }
241                 if (S_END(s)) {
242                     goto fail;
243                 }
244                 S_NEXT(s);
245                 next = mp_obj_new_str(vstr.buf, vstr.len);
246                 break;
247             case '-':
248             case '0':
249             case '1':
250             case '2':
251             case '3':
252             case '4':
253             case '5':
254             case '6':
255             case '7':
256             case '8':
257             case '9': {
258                 bool flt = false;
259                 vstr_reset(&vstr);
260                 for (;;) {
261                     vstr_add_byte(&vstr, cur);
262                     cur = S_CUR(s);
263                     if (cur == '.' || cur == 'E' || cur == 'e') {
264                         flt = true;
265                     } else if (cur == '+' || cur == '-' || unichar_isdigit(cur)) {
266                         // pass
267                     } else {
268                         break;
269                     }
270                     S_NEXT(s);
271                 }
272                 if (flt) {
273                     next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL);
274                 } else {
275                     next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
276                 }
277                 break;
278             }
279             case '[':
280                 next = mp_obj_new_list(0, NULL);
281                 enter = true;
282                 break;
283             case '{':
284                 next = mp_obj_new_dict(0);
285                 enter = true;
286                 break;
287             case '}':
288             case ']': {
289                 if (stack_top == MP_OBJ_NULL) {
290                     // no object at all
291                     goto fail;
292                 }
293                 if (stack.len == 0) {
294                     // finished; compound object
295                     goto success;
296                 }
297                 stack.len -= 1;
298                 stack_top = stack.items[stack.len];
299                 stack_top_type = mp_obj_get_type(stack_top);
300                 goto cont;
301             }
302             default:
303                 goto fail;
304         }
305         if (stack_top == MP_OBJ_NULL) {
306             stack_top = next;
307             stack_top_type = mp_obj_get_type(stack_top);
308             if (!enter) {
309                 // finished; single primitive only
310                 goto success;
311             }
312         } else {
313             // append to list or dict
314             if (stack_top_type == &mp_type_list) {
315                 mp_obj_list_append(stack_top, next);
316             } else {
317                 if (stack_key == MP_OBJ_NULL) {
318                     stack_key = next;
319                     if (enter) {
320                         goto fail;
321                     }
322                 } else {
323                     mp_obj_dict_store(stack_top, stack_key, next);
324                     stack_key = MP_OBJ_NULL;
325                 }
326             }
327             if (enter) {
328                 if (stack.items == NULL) {
329                     mp_obj_list_init(&stack, 1);
330                     stack.items[0] = stack_top;
331                 } else {
332                     mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top);
333                 }
334                 stack_top = next;
335                 stack_top_type = mp_obj_get_type(stack_top);
336             }
337         }
338     }
339 success:
340     // eat trailing whitespace
341     while (unichar_isspace(S_CUR(s))) {
342         S_NEXT(s);
343     }
344     if (!S_END(s)) {
345         // unexpected chars
346         goto fail;
347     }
348     if (stack_top == MP_OBJ_NULL || stack.len != 0) {
349         // not exactly 1 object
350         goto fail;
351     }
352     vstr_clear(&vstr);
353     return stack_top;
354 
355 fail:
356     mp_raise_ValueError(MP_ERROR_TEXT("syntax error in JSON"));
357 }
358 STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_load_obj, mod_ujson_load);
359 
mod_ujson_loads(mp_obj_t obj)360 STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
361     mp_buffer_info_t bufinfo;
362     mp_get_buffer_raise(obj, &bufinfo, MP_BUFFER_READ);
363     vstr_t vstr = {bufinfo.len, bufinfo.len, (char *)bufinfo.buf, true};
364     mp_obj_stringio_t sio = {{&mp_type_stringio}, &vstr, 0, MP_OBJ_NULL};
365     return mod_ujson_load(MP_OBJ_FROM_PTR(&sio));
366 }
367 STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_loads_obj, mod_ujson_loads);
368 
369 STATIC const mp_rom_map_elem_t mp_module_ujson_globals_table[] = {
370     { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ujson) },
371     { MP_ROM_QSTR(MP_QSTR_dump), MP_ROM_PTR(&mod_ujson_dump_obj) },
372     { MP_ROM_QSTR(MP_QSTR_dumps), MP_ROM_PTR(&mod_ujson_dumps_obj) },
373     { MP_ROM_QSTR(MP_QSTR_load), MP_ROM_PTR(&mod_ujson_load_obj) },
374     { MP_ROM_QSTR(MP_QSTR_loads), MP_ROM_PTR(&mod_ujson_loads_obj) },
375 };
376 
377 STATIC MP_DEFINE_CONST_DICT(mp_module_ujson_globals, mp_module_ujson_globals_table);
378 
379 const mp_obj_module_t mp_module_ujson = {
380     .base = { &mp_type_module },
381     .globals = (mp_obj_dict_t *)&mp_module_ujson_globals,
382 };
383 
384 #endif // MICROPY_PY_UJSON
385