1 /* Copyright (C) 2010-2018 The RetroArch team
2 *
3 * ---------------------------------------------------------------------------------------
4 * The following license statement only applies to this file (rxml.c).
5 * ---------------------------------------------------------------------------------------
6 *
7 * Permission is hereby granted, free of charge,
8 * to any person obtaining a copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation the rights to
10 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
11 * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <ctype.h>
30
31 #include <boolean.h>
32 #include <streams/file_stream.h>
33 #include <compat/posix_string.h>
34 #include <string/stdstring.h>
35
36 #include <formats/rxml.h>
37
38 struct rxml_document
39 {
40 struct rxml_node *root_node;
41 };
42
rxml_root_node(rxml_document_t * doc)43 struct rxml_node *rxml_root_node(rxml_document_t *doc)
44 {
45 if (doc)
46 return doc->root_node;
47 return NULL;
48 }
49
rxml_free_node(struct rxml_node * node)50 static void rxml_free_node(struct rxml_node *node)
51 {
52 struct rxml_node *head = NULL;
53 struct rxml_attrib_node *attrib_node_head = NULL;
54
55 if (!node)
56 return;
57
58 for (head = node->children; head; )
59 {
60 struct rxml_node *next_node = (struct rxml_node*)head->next;
61 rxml_free_node(head);
62 head = next_node;
63 }
64
65 for (attrib_node_head = node->attrib; attrib_node_head; )
66 {
67 struct rxml_attrib_node *next_attrib = NULL;
68
69 next_attrib = (struct rxml_attrib_node*)attrib_node_head->next;
70
71 if (next_attrib)
72 {
73 if (attrib_node_head->attrib)
74 free(attrib_node_head->attrib);
75 if (attrib_node_head->value)
76 free(attrib_node_head->value);
77 if (attrib_node_head)
78 free(attrib_node_head);
79 }
80
81 attrib_node_head = next_attrib;
82 }
83
84 if (node->name)
85 free(node->name);
86 if (node->data)
87 free(node->data);
88 if (node)
89 free(node);
90 }
91
validate_header(const char ** ptr)92 static bool validate_header(const char **ptr)
93 {
94 if (memcmp(*ptr, "<?xml", 5) == 0)
95 {
96 const char *eol = strstr(*ptr, "?>\n");
97 if (!eol)
98 return false;
99
100 /* Always use UTF-8. Don't really care to check. */
101 *ptr = eol + 3;
102 return true;
103 }
104 return true;
105 }
106
range_is_space(const char * begin,const char * end)107 static bool range_is_space(const char *begin, const char *end)
108 {
109 for (; begin < end; begin++)
110 if (!isspace(*begin))
111 return false;
112
113 return true;
114 }
115
rxml_skip_spaces(const char ** ptr_)116 static void rxml_skip_spaces(const char **ptr_)
117 {
118 const char *ptr = *ptr_;
119 while (isspace(*ptr))
120 ptr++;
121
122 *ptr_ = ptr;
123 }
124
strdup_range(const char * begin,const char * end)125 static char *strdup_range(const char *begin, const char *end)
126 {
127 ptrdiff_t len = end - begin;
128 char *ret = (char*)malloc(len + 1);
129
130 if (!ret)
131 return NULL;
132
133 memcpy(ret, begin, len);
134 ret[len] = '\0';
135 return ret;
136 }
137
strdup_range_escape(const char * begin,const char * end)138 static char *strdup_range_escape(const char *begin, const char *end)
139 {
140 /* Escaping is ignored. Assume we don't deal with that. */
141 return strdup_range(begin, end);
142 }
143
rxml_parse_attrs(const char * str)144 static struct rxml_attrib_node *rxml_parse_attrs(const char *str)
145 {
146 const char *elem;
147 struct rxml_attrib_node *list = NULL;
148 struct rxml_attrib_node *tail = NULL;
149 char *attrib = NULL;
150 char *value = NULL;
151 char *last_char = NULL;
152 char *save = NULL;
153 char *copy = strdup(str);
154 if (!copy)
155 return NULL;
156
157 last_char = copy + strlen(copy) - 1;
158 if (*last_char == '/')
159 *last_char = '\0';
160
161 elem = strtok_r(copy, " \n\t\f\v\r", &save);
162 while (elem)
163 {
164 const char *end;
165 struct rxml_attrib_node *new_node;
166 const char *eq = strstr(elem, "=\"");
167 if (!eq)
168 goto end;
169
170 end = strrchr(eq + 2, '\"');
171 if (!end || end != (elem + strlen(elem) - 1))
172 goto end;
173
174 attrib = strdup_range_escape(elem, eq);
175 value = strdup_range_escape(eq + 2, end);
176 if (!attrib || !value)
177 goto end;
178
179 new_node =
180 (struct rxml_attrib_node*)calloc(1, sizeof(*new_node));
181 if (!new_node)
182 goto end;
183
184 new_node->attrib = attrib;
185 new_node->value = value;
186 attrib = NULL;
187 value = NULL;
188
189 if (tail)
190 {
191 tail->next = new_node;
192 tail = new_node;
193 }
194 else
195 list = tail = new_node;
196
197 elem = strtok_r(NULL, " \n\t\f\v\r", &save);
198 }
199
200 end:
201 if (copy)
202 free(copy);
203 if (attrib)
204 free(attrib);
205 if (value)
206 free(value);
207 return list;
208 }
209
find_first_space(const char * str)210 static char *find_first_space(const char *str)
211 {
212 while (*str && !isspace(*str))
213 str++;
214
215 return isspace(*str) ? (char*)str : NULL;
216 }
217
rxml_parse_tag(struct rxml_node * node,const char * str)218 static bool rxml_parse_tag(struct rxml_node *node, const char *str)
219 {
220 const char *name_end;
221 const char *str_ptr = str;
222 rxml_skip_spaces(&str_ptr);
223
224 name_end = find_first_space(str_ptr);
225 if (name_end)
226 {
227 node->name = strdup_range(str_ptr, name_end);
228 if (!node->name || !*node->name)
229 return false;
230
231 node->attrib = rxml_parse_attrs(name_end);
232 return true;
233 }
234 else
235 {
236 node->name = strdup(str_ptr);
237 return node->name && *node->name;
238 }
239 }
240
rxml_parse_node(const char ** ptr_)241 static struct rxml_node *rxml_parse_node(const char **ptr_)
242 {
243 const char *ptr = NULL;
244 const char *closing = NULL;
245 char *str = NULL;
246 bool is_closing = false;
247
248 struct rxml_node *node = (struct rxml_node*)calloc(1, sizeof(*node));
249 if (!node)
250 return NULL;
251
252 rxml_skip_spaces(ptr_);
253
254 ptr = *ptr_;
255 if (*ptr != '<')
256 goto error;
257
258 closing = strchr(ptr, '>');
259 if (!closing)
260 goto error;
261
262 str = strdup_range(ptr + 1, closing);
263 if (!str)
264 goto error;
265
266 if (!rxml_parse_tag(node, str))
267 goto error;
268
269 /* Are spaces between / and > allowed? */
270 is_closing = strstr(ptr, "/>") + 1 == closing;
271
272 /* Look for more data. Either child nodes or data. */
273 if (!is_closing)
274 {
275 size_t closing_tag_size = strlen(node->name) + 4;
276 char *closing_tag = (char*)malloc(closing_tag_size);
277
278 const char *cdata_start = NULL;
279 const char *child_start = NULL;
280 const char *closing_start = NULL;
281
282 if (!closing_tag)
283 goto error;
284
285 snprintf(closing_tag, closing_tag_size, "</%s>", node->name);
286
287 cdata_start = strstr(closing + 1, "<![CDATA[");
288 child_start = strchr(closing + 1, '<');
289 closing_start = strstr(closing + 1, closing_tag);
290
291 if (!closing_start)
292 {
293 free(closing_tag);
294 goto error;
295 }
296
297 if (cdata_start && range_is_space(closing + 1, cdata_start))
298 {
299 /* CDATA section */
300 const char *cdata_end = strstr(cdata_start, "]]>");
301 if (!cdata_end)
302 {
303 free(closing_tag);
304 goto error;
305 }
306
307 node->data = strdup_range(cdata_start +
308 STRLEN_CONST("<![CDATA["), cdata_end);
309 }
310 else if (closing_start && closing_start == child_start) /* Simple Data */
311 node->data = strdup_range(closing + 1, closing_start);
312 else
313 {
314 /* Parse all child nodes. */
315 struct rxml_node *list = NULL;
316 struct rxml_node *tail = NULL;
317 const char *first_start = NULL;
318 const char *first_closing = NULL;
319
320 ptr = child_start;
321 first_start = strchr(ptr, '<');
322 first_closing = strstr(ptr, "</");
323
324 while (
325 first_start &&
326 first_closing &&
327 (first_start < first_closing)
328 )
329 {
330 struct rxml_node *new_node = rxml_parse_node(&ptr);
331
332 if (!new_node)
333 {
334 free(closing_tag);
335 goto error;
336 }
337
338 if (tail)
339 {
340 tail->next = new_node;
341 tail = new_node;
342 }
343 else
344 list = tail = new_node;
345
346 first_start = strchr(ptr, '<');
347 first_closing = strstr(ptr, "</");
348 }
349
350 node->children = list;
351
352 closing_start = strstr(ptr, closing_tag);
353 if (!closing_start)
354 {
355 free(closing_tag);
356 goto error;
357 }
358 }
359
360 *ptr_ = closing_start + strlen(closing_tag);
361 free(closing_tag);
362 }
363 else
364 *ptr_ = closing + 1;
365
366 if (str)
367 free(str);
368 return node;
369
370 error:
371 if (str)
372 free(str);
373 rxml_free_node(node);
374 return NULL;
375 }
376
purge_xml_comments(const char * str)377 static char *purge_xml_comments(const char *str)
378 {
379 char *copy_dest;
380 const char *copy_src;
381 size_t len = strlen(str);
382 char *new_str = (char*)malloc(len + 1);
383 if (!new_str)
384 return NULL;
385
386 new_str[len] = '\0';
387
388 copy_dest = new_str;
389 copy_src = str;
390
391 for (;;)
392 {
393 ptrdiff_t copy_len;
394 const char *comment_start = strstr(copy_src, "<!--");
395 const char *comment_end = strstr(copy_src, "-->");
396
397 if (!comment_start || !comment_end)
398 break;
399
400 copy_len = comment_start - copy_src;
401 memcpy(copy_dest, copy_src, copy_len);
402
403 copy_dest += copy_len;
404 copy_src = comment_end + STRLEN_CONST("-->");
405 }
406
407 /* Avoid strcpy() as OpenBSD is anal and hates you
408 * for using it even when it's perfectly safe. */
409 len = strlen(copy_src);
410 memcpy(copy_dest, copy_src, len);
411 copy_dest[len] = '\0';
412
413 return new_str;
414 }
415
rxml_load_document(const char * path)416 rxml_document_t *rxml_load_document(const char *path)
417 {
418 rxml_document_t *doc;
419 char *memory_buffer = NULL;
420 long len = 0;
421 RFILE *file = filestream_open(path,
422 RETRO_VFS_FILE_ACCESS_READ,
423 RETRO_VFS_FILE_ACCESS_HINT_NONE);
424 if (!file)
425 return NULL;
426
427 len = filestream_get_size(file);
428 memory_buffer = (char*)malloc(len + 1);
429 if (!memory_buffer)
430 goto error;
431
432 memory_buffer[len] = '\0';
433 if (filestream_read(file, memory_buffer, len) != (size_t)len)
434 goto error;
435
436 filestream_close(file);
437 file = NULL;
438
439 doc = rxml_load_document_string(memory_buffer);
440
441 free(memory_buffer);
442 return doc;
443
444 error:
445 free(memory_buffer);
446 if(file)
447 filestream_close(file);
448 return NULL;
449 }
450
rxml_load_document_string(const char * str)451 rxml_document_t *rxml_load_document_string(const char *str)
452 {
453 rxml_document_t *doc;
454 char *memory_buffer = NULL;
455 const char *mem_ptr = NULL;
456
457 doc = (rxml_document_t*)calloc(1, sizeof(*doc));
458 if (!doc)
459 goto error;
460
461 mem_ptr = str;
462
463 if (!validate_header(&mem_ptr))
464 goto error;
465
466 memory_buffer = purge_xml_comments(mem_ptr);
467 if (!memory_buffer)
468 goto error;
469
470 mem_ptr = memory_buffer;
471
472 doc->root_node = rxml_parse_node(&mem_ptr);
473 if (!doc->root_node)
474 goto error;
475
476 free(memory_buffer);
477 return doc;
478
479 error:
480 free(memory_buffer);
481 rxml_free_document(doc);
482 return NULL;
483 }
484
rxml_free_document(rxml_document_t * doc)485 void rxml_free_document(rxml_document_t *doc)
486 {
487 if (!doc)
488 return;
489
490 if (doc->root_node)
491 rxml_free_node(doc->root_node);
492
493 free(doc);
494 }
495
rxml_node_attrib(struct rxml_node * node,const char * attrib)496 const char *rxml_node_attrib(struct rxml_node *node, const char *attrib)
497 {
498 struct rxml_attrib_node *attribs = NULL;
499 for (attribs = node->attrib; attribs; attribs = attribs->next)
500 {
501 if (string_is_equal(attrib, attribs->attrib))
502 return attribs->value;
503 }
504
505 return NULL;
506 }
507