1 /* radare - LGPL - Copyright 2020 - thestr4ng3r, Yaroslav Stavnichiy */
2
3 #include <errno.h>
4
5 #include <r_util/r_utf8.h>
6 #include <r_util/r_hex.h>
7 #include <r_util/r_json.h>
8
9 #if 0
10 // optional error printing
11 #define R_JSON_REPORT_ERROR(msg, p) fprintf(stderr, "R_JSON PARSE ERROR (%d): " msg " at %s\n", __LINE__, p)
12 #else
13 #define R_JSON_REPORT_ERROR(msg, p) do { (void)(msg); (void)(p); } while (0)
14 #endif
15
json_new(void)16 static RJson *json_new(void) {
17 return R_NEW0 (RJson);
18 }
19
create_json(RJsonType type,const char * key,RJson * parent)20 static RJson *create_json(RJsonType type, const char *key, RJson *parent) {
21 RJson *js = json_new ();
22 if (!js) {
23 return NULL;
24 }
25 js->type = type;
26 js->key = key;
27 if (!parent->children.last) {
28 parent->children.first = parent->children.last = js;
29 } else {
30 parent->children.last->next = js;
31 parent->children.last = js;
32 }
33 parent->children.count++;
34 return js;
35 }
36
r_json_free(RJson * js)37 R_API void r_json_free(RJson *js) {
38 if (!js) {
39 return;
40 }
41 if (js->type == R_JSON_OBJECT || js->type == R_JSON_ARRAY) {
42 RJson *p = js->children.first;
43 RJson *p1;
44 while (p) {
45 p1 = p->next;
46 r_json_free (p);
47 p = p1;
48 }
49 }
50 free (js);
51 }
52
unescape_string(char * s,char ** end)53 static char *unescape_string(char *s, char **end) {
54 char *p = s;
55 char *d = s;
56 char c;
57 while ((c = *p++)) {
58 if (c == '"') {
59 *d = '\0';
60 *end = p;
61 return s;
62 }
63 if (c == '\\') {
64 switch (*p) {
65 case '\\':
66 case '/':
67 case '"':
68 *d++ = *p++;
69 break;
70 case 'b':
71 *d++ = '\b';
72 p++;
73 break;
74 case 'f':
75 *d++ = '\f';
76 p++;
77 break;
78 case 'n':
79 *d++ = '\n';
80 p++;
81 break;
82 case 'r':
83 *d++ = '\r';
84 p++;
85 break;
86 case 't':
87 *d++ = '\t';
88 p++;
89 break;
90 case 'u': { // unicode
91 char *ps = p - 1;
92 ut8 high = 0, low = 0;
93 if (r_hex_to_byte (&high, p[1]) || r_hex_to_byte (&high, p[2])
94 || r_hex_to_byte (&low, p[3]) || r_hex_to_byte (&low, p[4])) {
95 R_JSON_REPORT_ERROR ("invalid unicode escape", p - 1);
96 return NULL;
97 }
98 RRune codepoint = (RRune)high << 8 | (RRune)low;
99 if ((codepoint & 0xfc00) == 0xd800) { // high surrogate; need one more unicode to succeed
100 p += 6;
101 high = low = 0;
102 if (p[-1] != '\\' || *p != 'u'
103 || r_hex_to_byte (&high, p[1]) || r_hex_to_byte (&high, p[2])
104 || r_hex_to_byte (&low, p[3]) || r_hex_to_byte (&low, p[4])) {
105 R_JSON_REPORT_ERROR ("invalid unicode surrogate", ps);
106 return NULL;
107 }
108 RRune codepoint2 = (RRune)high << 8 | (RRune)low;
109 if ((codepoint2 & 0xfc00) != 0xdc00) {
110 R_JSON_REPORT_ERROR ("invalid unicode surrogate", ps);
111 return NULL;
112 }
113 codepoint = 0x10000 + ((codepoint - 0xd800) << 10) + (codepoint2 - 0xdc00);
114 }
115 int sz = r_utf8_encode ((ut8 *)d, codepoint);
116 if (!s) {
117 R_JSON_REPORT_ERROR ("invalid codepoint", ps);
118 return NULL;
119 }
120 d += sz;
121 p += 5;
122 break;
123 }
124 default:
125 // leave untouched
126 *d++ = c;
127 break;
128 }
129 } else {
130 *d++ = c;
131 }
132 }
133 R_JSON_REPORT_ERROR ("no closing quote for string", s);
134 return NULL;
135 }
136
skip_block_comment(char * ps)137 static char *skip_block_comment(char *ps) {
138 // ps is at "/* ..."
139 // caller must ensure that ps[0], ps[1] and ps[2] are valid.
140 char *p = ps + 2;
141 if (!*p) {
142 R_JSON_REPORT_ERROR ("endless comment", ps);
143 return NULL;
144 }
145 REPEAT:
146 p = strchr (p + 1, '/');
147 if (!p) {
148 R_JSON_REPORT_ERROR ("endless comment", ps);
149 return NULL;
150 }
151 if (p[-1] != '*') {
152 goto REPEAT;
153 }
154 return p + 1;
155 }
156
skip_whitespace(char * p)157 static char *skip_whitespace(char *p) {
158 while (*p) {
159 if (*p == '/') {
160 if (p[1] == '/') { // line comment
161 char *ps = p;
162 p = strchr (p + 2, '\n');
163 if (!p) {
164 R_JSON_REPORT_ERROR ("endless comment", ps);
165 return NULL; // error
166 }
167 p++;
168 } else if (p[1] == '*') { // block comment
169 p = skip_block_comment (p);
170 if (!p) {
171 return NULL;
172 }
173 continue;
174 } else {
175 R_JSON_REPORT_ERROR ("unexpected chars", p);
176 return NULL; // error
177 }
178 continue;
179 } else if (!IS_WHITECHAR (*p)) {
180 break;
181 }
182 p++;
183 }
184 return p;
185 }
186
parse_key(const char ** key,char * p)187 static char *parse_key(const char **key, char *p) {
188 // on '}' return with *p=='}'
189 p = skip_whitespace (p);
190 if (!p) {
191 return NULL;
192 }
193 char c;
194 while ((c = *p++)) {
195 if (c == '"') {
196 *key = unescape_string (p, &p);
197 if (!*key) {
198 return NULL; // propagate error
199 }
200 p = skip_whitespace (p);
201 if (!p) {
202 return NULL;
203 }
204 if (*p == ':') {
205 return p + 1;
206 }
207 R_JSON_REPORT_ERROR ("unexpected chars", p);
208 return NULL;
209 }
210 if (c == '}') {
211 return p - 1;
212 }
213 R_JSON_REPORT_ERROR ("unexpected chars", p - 1);
214 return NULL; // error
215 }
216 R_JSON_REPORT_ERROR ("unexpected chars", p - 1);
217 return NULL; // error
218 }
219
parse_value(RJson * parent,const char * key,char * p)220 static char *parse_value(RJson *parent, const char *key, char *p) {
221 RJson *js;
222 p = skip_whitespace (p);
223 if (!p) {
224 return NULL;
225 }
226 switch (*p) {
227 case '\0':
228 R_JSON_REPORT_ERROR ("unexpected end of text", p);
229 return NULL; // error
230 case '{':
231 js = create_json (R_JSON_OBJECT, key, parent);
232 p++;
233 while (1) {
234 const char *new_key = NULL;
235 p = parse_key (&new_key, p);
236 if (!p) {
237 return NULL; // error
238 }
239 if (*p != '}') {
240 p = parse_value (js, new_key, p);
241 if (!p) {
242 return NULL; // error
243 }
244 }
245 p = skip_whitespace (p);
246 if (!p) {
247 return NULL;
248 }
249 if (*p == ',') {
250 char *commapos = p;
251 p++;
252 p = skip_whitespace (p);
253 if (!p) {
254 return NULL;
255 }
256 if (*p == '}') {
257 R_JSON_REPORT_ERROR ("trailing comma", commapos);
258 return NULL;
259 }
260 } else if (*p == '}') {
261 return p + 1; // end of object
262 } else {
263 R_JSON_REPORT_ERROR ("unexpected chars", p);
264 return NULL;
265 }
266 }
267 case '[':
268 js = create_json (R_JSON_ARRAY, key, parent);
269 p++;
270 while (1) {
271 p = parse_value (js, 0, p);
272 if (!p) {
273 return NULL; // error
274 }
275 p = skip_whitespace (p);
276 if (!p) {
277 return NULL;
278 }
279 if (*p == ',') {
280 char *commapos = p;
281 p++;
282 p = skip_whitespace (p);
283 if (!p) {
284 return NULL;
285 }
286 if (*p == ']') {
287 R_JSON_REPORT_ERROR ("trailing comma", commapos);
288 return NULL;
289 }
290 } else if (*p == ']') {
291 return p + 1; // end of array
292 } else {
293 R_JSON_REPORT_ERROR ("unexpected chars", p);
294 return NULL;
295 }
296 }
297 case ']':
298 return p;
299 case '"':
300 p++;
301 js = create_json (R_JSON_STRING, key, parent);
302 js->str_value = unescape_string (p, &p);
303 if (!js->str_value) {
304 return NULL; // propagate error
305 }
306 return p;
307 case '-':
308 case '0':
309 case '1':
310 case '2':
311 case '3':
312 case '4':
313 case '5':
314 case '6':
315 case '7':
316 case '8':
317 case '9': {
318 js = create_json (R_JSON_INTEGER, key, parent);
319 errno = 0;
320 char *pe;
321 if (*p == '-') {
322 js->num.s_value = (st64)strtoll (p, &pe, 10);
323 } else {
324 js->num.u_value = (ut64)strtoull (p, &pe, 10);
325 }
326 if (pe == p || errno == ERANGE) {
327 R_JSON_REPORT_ERROR ("invalid number", p);
328 return NULL; // error
329 }
330 if (*pe == '.' || *pe == 'e' || *pe == 'E') { // double value
331 js->type = R_JSON_DOUBLE;
332 errno = 0;
333 js->num.dbl_value = strtod (p, &pe);
334 if (pe == p || errno == ERANGE) {
335 R_JSON_REPORT_ERROR ("invalid fractional number", p);
336 return NULL; // error
337 }
338 } else {
339 if (*p == '-') {
340 js->num.dbl_value = js->num.s_value;
341 } else {
342 js->num.dbl_value = js->num.u_value;
343 }
344 }
345 return pe;
346 }
347 case 't':
348 if (!strncmp (p, "true", 4)) {
349 js = create_json (R_JSON_BOOLEAN, key, parent);
350 js->num.u_value = 1;
351 return p + 4;
352 }
353 R_JSON_REPORT_ERROR ("unexpected chars", p);
354 return NULL; // error
355 case 'f':
356 if (!strncmp (p, "false", 5)) {
357 js = create_json (R_JSON_BOOLEAN, key, parent);
358 js->num.u_value = 0;
359 return p + 5;
360 }
361 R_JSON_REPORT_ERROR ("unexpected chars", p);
362 return NULL; // error
363 case 'n':
364 if (!strncmp (p, "null", 4)) {
365 create_json (R_JSON_NULL, key, parent);
366 return p + 4;
367 }
368 R_JSON_REPORT_ERROR ("unexpected chars", p);
369 return NULL; // error
370 default:
371 R_JSON_REPORT_ERROR ("unexpected chars", p);
372 return NULL; // error
373 }
374 return NULL;
375 }
376
r_json_parse(char * text)377 R_API RJson *r_json_parse(char *text) {
378 RJson js = {0};
379 if (!parse_value (&js, 0, text)) {
380 if (js.children.first) {
381 r_json_free (js.children.first);
382 }
383 return 0;
384 }
385 return js.children.first;
386 }
387
r_json_get(const RJson * json,const char * key)388 R_API const RJson *r_json_get(const RJson *json, const char *key) {
389 RJson *js;
390 for (js = json->children.first; js; js = js->next) {
391 if (js->key && !strcmp (js->key, key)) {
392 return js;
393 }
394 }
395 return NULL;
396 }
397
r_json_item(const RJson * json,size_t idx)398 R_API const RJson *r_json_item(const RJson *json, size_t idx) {
399 RJson *js;
400 for (js = json->children.first; js; js = js->next) {
401 if (!idx--) {
402 return js;
403 }
404 }
405 return NULL;
406 }
407
408