1 /* Copyright  (C) 2010-2018 The RetroArch team
2  *
3  * ---------------------------------------------------------------------------------------
4  * The following license statement only applies to this file (rxml.c).
5  * ---------------------------------------------------------------------------------------
6  *
7  * Permission is hereby granted, free of charge,
8  * to any person obtaining a copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation the rights to
10  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
11  * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include <ctype.h>
30 
31 #include <boolean.h>
32 #include <streams/file_stream.h>
33 #include <compat/posix_string.h>
34 #include <string/stdstring.h>
35 
36 #include <formats/rxml.h>
37 
38 struct rxml_document
39 {
40    struct rxml_node *root_node;
41 };
42 
rxml_root_node(rxml_document_t * doc)43 struct rxml_node *rxml_root_node(rxml_document_t *doc)
44 {
45    if (doc)
46       return doc->root_node;
47    return NULL;
48 }
49 
rxml_free_node(struct rxml_node * node)50 static void rxml_free_node(struct rxml_node *node)
51 {
52    struct rxml_node *head = NULL;
53    struct rxml_attrib_node *attrib_node_head = NULL;
54 
55    if (!node)
56       return;
57 
58    for (head = node->children; head; )
59    {
60       struct rxml_node *next_node = (struct rxml_node*)head->next;
61       rxml_free_node(head);
62       head = next_node;
63    }
64 
65    for (attrib_node_head = node->attrib; attrib_node_head; )
66    {
67       struct rxml_attrib_node *next_attrib = NULL;
68 
69       next_attrib = (struct rxml_attrib_node*)attrib_node_head->next;
70 
71       if (next_attrib)
72       {
73          if (attrib_node_head->attrib)
74             free(attrib_node_head->attrib);
75          if (attrib_node_head->value)
76             free(attrib_node_head->value);
77          if (attrib_node_head)
78             free(attrib_node_head);
79       }
80 
81       attrib_node_head = next_attrib;
82    }
83 
84    if (node->name)
85       free(node->name);
86    if (node->data)
87       free(node->data);
88    if (node)
89       free(node);
90 }
91 
validate_header(const char ** ptr)92 static bool validate_header(const char **ptr)
93 {
94    if (memcmp(*ptr, "<?xml", 5) == 0)
95    {
96       const char *eol = strstr(*ptr, "?>\n");
97       if (!eol)
98          return false;
99 
100       /* Always use UTF-8. Don't really care to check. */
101       *ptr = eol + 3;
102       return true;
103    }
104    return true;
105 }
106 
range_is_space(const char * begin,const char * end)107 static bool range_is_space(const char *begin, const char *end)
108 {
109    for (; begin < end; begin++)
110       if (!isspace(*begin))
111          return false;
112 
113    return true;
114 }
115 
rxml_skip_spaces(const char ** ptr_)116 static void rxml_skip_spaces(const char **ptr_)
117 {
118    const char *ptr = *ptr_;
119    while (isspace(*ptr))
120       ptr++;
121 
122    *ptr_ = ptr;
123 }
124 
strdup_range(const char * begin,const char * end)125 static char *strdup_range(const char *begin, const char *end)
126 {
127    ptrdiff_t len = end - begin;
128    char *ret = (char*)malloc(len + 1);
129 
130    if (!ret)
131       return NULL;
132 
133    memcpy(ret, begin, len);
134    ret[len] = '\0';
135    return ret;
136 }
137 
strdup_range_escape(const char * begin,const char * end)138 static char *strdup_range_escape(const char *begin, const char *end)
139 {
140    /* Escaping is ignored. Assume we don't deal with that. */
141    return strdup_range(begin, end);
142 }
143 
rxml_parse_attrs(const char * str)144 static struct rxml_attrib_node *rxml_parse_attrs(const char *str)
145 {
146    const char *elem;
147    struct rxml_attrib_node *list = NULL;
148    struct rxml_attrib_node *tail = NULL;
149    char *attrib                  = NULL;
150    char *value                   = NULL;
151    char *last_char               = NULL;
152    char *save                    = NULL;
153    char *copy                    = strdup(str);
154    if (!copy)
155       return NULL;
156 
157    last_char = copy + strlen(copy) - 1;
158    if (*last_char == '/')
159       *last_char = '\0';
160 
161    elem = strtok_r(copy, " \n\t\f\v\r", &save);
162    while (elem)
163    {
164       const char *end;
165       struct rxml_attrib_node *new_node;
166       const char *eq = strstr(elem, "=\"");
167       if (!eq)
168          goto end;
169 
170       end = strrchr(eq + 2, '\"');
171       if (!end || end != (elem + strlen(elem) - 1))
172          goto end;
173 
174       attrib = strdup_range_escape(elem, eq);
175       value  = strdup_range_escape(eq + 2, end);
176       if (!attrib || !value)
177          goto end;
178 
179       new_node =
180          (struct rxml_attrib_node*)calloc(1, sizeof(*new_node));
181       if (!new_node)
182          goto end;
183 
184       new_node->attrib = attrib;
185       new_node->value  = value;
186       attrib           = NULL;
187       value            = NULL;
188 
189       if (tail)
190       {
191          tail->next = new_node;
192          tail = new_node;
193       }
194       else
195          list = tail = new_node;
196 
197       elem = strtok_r(NULL, " \n\t\f\v\r", &save);
198    }
199 
200 end:
201    if (copy)
202       free(copy);
203    if (attrib)
204       free(attrib);
205    if (value)
206       free(value);
207    return list;
208 }
209 
find_first_space(const char * str)210 static char *find_first_space(const char *str)
211 {
212    while (*str && !isspace(*str))
213       str++;
214 
215    return isspace(*str) ? (char*)str : NULL;
216 }
217 
rxml_parse_tag(struct rxml_node * node,const char * str)218 static bool rxml_parse_tag(struct rxml_node *node, const char *str)
219 {
220    const char *name_end;
221    const char *str_ptr = str;
222    rxml_skip_spaces(&str_ptr);
223 
224    name_end = find_first_space(str_ptr);
225    if (name_end)
226    {
227       node->name = strdup_range(str_ptr, name_end);
228       if (!node->name || !*node->name)
229          return false;
230 
231       node->attrib = rxml_parse_attrs(name_end);
232       return true;
233    }
234    else
235    {
236       node->name = strdup(str_ptr);
237       return node->name && *node->name;
238    }
239 }
240 
rxml_parse_node(const char ** ptr_)241 static struct rxml_node *rxml_parse_node(const char **ptr_)
242 {
243    const char *ptr     = NULL;
244    const char *closing = NULL;
245    char *str           = NULL;
246    bool is_closing     = false;
247 
248    struct rxml_node *node = (struct rxml_node*)calloc(1, sizeof(*node));
249    if (!node)
250       return NULL;
251 
252    rxml_skip_spaces(ptr_);
253 
254    ptr = *ptr_;
255    if (*ptr != '<')
256       goto error;
257 
258    closing = strchr(ptr, '>');
259    if (!closing)
260       goto error;
261 
262    str = strdup_range(ptr + 1, closing);
263    if (!str)
264       goto error;
265 
266    if (!rxml_parse_tag(node, str))
267       goto error;
268 
269    /* Are spaces between / and > allowed? */
270    is_closing = strstr(ptr, "/>") + 1 == closing;
271 
272    /* Look for more data. Either child nodes or data. */
273    if (!is_closing)
274    {
275       size_t closing_tag_size = strlen(node->name) + 4;
276       char *closing_tag = (char*)malloc(closing_tag_size);
277 
278       const char *cdata_start = NULL;
279       const char *child_start = NULL;
280       const char *closing_start = NULL;
281 
282       if (!closing_tag)
283          goto error;
284 
285       snprintf(closing_tag, closing_tag_size, "</%s>", node->name);
286 
287       cdata_start   = strstr(closing + 1, "<![CDATA[");
288       child_start   = strchr(closing + 1, '<');
289       closing_start = strstr(closing + 1, closing_tag);
290 
291       if (!closing_start)
292       {
293          free(closing_tag);
294          goto error;
295       }
296 
297       if (cdata_start && range_is_space(closing + 1, cdata_start))
298       {
299          /* CDATA section */
300          const char *cdata_end = strstr(cdata_start, "]]>");
301          if (!cdata_end)
302          {
303             free(closing_tag);
304             goto error;
305          }
306 
307          node->data = strdup_range(cdata_start +
308                STRLEN_CONST("<![CDATA["), cdata_end);
309       }
310       else if (closing_start && closing_start == child_start) /* Simple Data */
311          node->data = strdup_range(closing + 1, closing_start);
312       else
313       {
314          /* Parse all child nodes. */
315          struct rxml_node *list = NULL;
316          struct rxml_node *tail = NULL;
317          const char *first_start = NULL;
318          const char *first_closing = NULL;
319 
320          ptr           = child_start;
321          first_start   = strchr(ptr, '<');
322          first_closing = strstr(ptr, "</");
323 
324          while (
325                 first_start &&
326                 first_closing &&
327                 (first_start < first_closing)
328                 )
329          {
330             struct rxml_node *new_node = rxml_parse_node(&ptr);
331 
332             if (!new_node)
333             {
334                free(closing_tag);
335                goto error;
336             }
337 
338             if (tail)
339             {
340                tail->next = new_node;
341                tail = new_node;
342             }
343             else
344                list = tail = new_node;
345 
346             first_start   = strchr(ptr, '<');
347             first_closing = strstr(ptr, "</");
348          }
349 
350          node->children = list;
351 
352          closing_start = strstr(ptr, closing_tag);
353          if (!closing_start)
354          {
355             free(closing_tag);
356             goto error;
357          }
358       }
359 
360       *ptr_ = closing_start + strlen(closing_tag);
361       free(closing_tag);
362    }
363    else
364       *ptr_ = closing + 1;
365 
366    if (str)
367       free(str);
368    return node;
369 
370 error:
371    if (str)
372       free(str);
373    rxml_free_node(node);
374    return NULL;
375 }
376 
purge_xml_comments(const char * str)377 static char *purge_xml_comments(const char *str)
378 {
379    char *copy_dest;
380    const char *copy_src;
381    size_t len    = strlen(str);
382    char *new_str = (char*)malloc(len + 1);
383    if (!new_str)
384       return NULL;
385 
386    new_str[len]          = '\0';
387 
388    copy_dest             = new_str;
389    copy_src              = str;
390 
391    for (;;)
392    {
393       ptrdiff_t copy_len;
394       const char *comment_start = strstr(copy_src, "<!--");
395       const char *comment_end   = strstr(copy_src, "-->");
396 
397       if (!comment_start || !comment_end)
398          break;
399 
400       copy_len = comment_start - copy_src;
401       memcpy(copy_dest, copy_src, copy_len);
402 
403       copy_dest += copy_len;
404       copy_src   = comment_end + STRLEN_CONST("-->");
405    }
406 
407    /* Avoid strcpy() as OpenBSD is anal and hates you
408     * for using it even when it's perfectly safe. */
409    len = strlen(copy_src);
410    memcpy(copy_dest, copy_src, len);
411    copy_dest[len] = '\0';
412 
413    return new_str;
414 }
415 
rxml_load_document(const char * path)416 rxml_document_t *rxml_load_document(const char *path)
417 {
418    rxml_document_t *doc;
419    char *memory_buffer     = NULL;
420    long len                = 0;
421    RFILE *file             = filestream_open(path,
422          RETRO_VFS_FILE_ACCESS_READ,
423          RETRO_VFS_FILE_ACCESS_HINT_NONE);
424    if (!file)
425       return NULL;
426 
427    len           = filestream_get_size(file);
428    memory_buffer = (char*)malloc(len + 1);
429    if (!memory_buffer)
430       goto error;
431 
432    memory_buffer[len] = '\0';
433    if (filestream_read(file, memory_buffer, len) != (size_t)len)
434       goto error;
435 
436    filestream_close(file);
437    file = NULL;
438 
439    doc = rxml_load_document_string(memory_buffer);
440 
441    free(memory_buffer);
442    return doc;
443 
444 error:
445    free(memory_buffer);
446    if(file)
447       filestream_close(file);
448    return NULL;
449 }
450 
rxml_load_document_string(const char * str)451 rxml_document_t *rxml_load_document_string(const char *str)
452 {
453    rxml_document_t *doc;
454    char *memory_buffer = NULL;
455    const char *mem_ptr = NULL;
456 
457    doc = (rxml_document_t*)calloc(1, sizeof(*doc));
458    if (!doc)
459       goto error;
460 
461    mem_ptr = str;
462 
463    if (!validate_header(&mem_ptr))
464       goto error;
465 
466    memory_buffer = purge_xml_comments(mem_ptr);
467    if (!memory_buffer)
468       goto error;
469 
470    mem_ptr = memory_buffer;
471 
472    doc->root_node = rxml_parse_node(&mem_ptr);
473    if (!doc->root_node)
474       goto error;
475 
476    free(memory_buffer);
477    return doc;
478 
479 error:
480    free(memory_buffer);
481    rxml_free_document(doc);
482    return NULL;
483 }
484 
rxml_free_document(rxml_document_t * doc)485 void rxml_free_document(rxml_document_t *doc)
486 {
487    if (!doc)
488       return;
489 
490    if (doc->root_node)
491       rxml_free_node(doc->root_node);
492 
493    free(doc);
494 }
495 
rxml_node_attrib(struct rxml_node * node,const char * attrib)496 const char *rxml_node_attrib(struct rxml_node *node, const char *attrib)
497 {
498    struct rxml_attrib_node *attribs = NULL;
499    for (attribs = node->attrib; attribs; attribs = attribs->next)
500    {
501       if (string_is_equal(attrib, attribs->attrib))
502          return attribs->value;
503    }
504 
505    return NULL;
506 }
507