1 /*
2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #include "claws-features.h"
23 #endif
24
25 #include <glib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29
30 #include "xml.h"
31 #include "utils.h"
32 #include "codeconv.h"
33 #include "file-utils.h"
34
35 #define SPARSE_MEMORY
36 /* if this is defined all attr.names and tag.names are stored
37 * in a hash table */
38 #if defined(SPARSE_MEMORY)
39 #include "stringtable.h"
40
41 static StringTable *xml_string_table;
42 static XMLTag *xml_copy_tag (XMLTag *tag);
43 static XMLAttr *xml_copy_attr (XMLAttr *attr);
44 static void xml_free_node (XMLNode *node);
45 static void xml_free_tag (XMLTag *tag);
46 static void xml_pop_tag (XMLFile *file);
47 static void xml_push_tag (XMLFile *file,
48 XMLTag *tag);
49 static gint xml_read_line (XMLFile *file);
50 static void xml_truncate_buf (XMLFile *file);
51 static gint xml_unescape_str (gchar *str);
52
xml_string_table_create(void)53 static void xml_string_table_create(void)
54 {
55 if (xml_string_table == NULL)
56 xml_string_table = string_table_new();
57 }
58 #define XML_STRING_ADD(str) \
59 string_table_insert_string(xml_string_table, (str))
60 #define XML_STRING_FREE(str) \
61 string_table_free_string(xml_string_table, (str))
62
63 #define XML_STRING_TABLE_CREATE() \
64 xml_string_table_create()
65
66 #else /* !SPARSE_MEMORY */
67
68 #define XML_STRING_ADD(str) \
69 g_strdup(str)
70 #define XML_STRING_FREE(str) \
71 g_free(str)
72
73 #define XML_STRING_TABLE_CREATE()
74
75 #endif /* SPARSE_MEMORY */
76
77 static gint xml_get_parenthesis (XMLFile *file,
78 gchar *buf,
79 gint len);
80
xml_open_file(const gchar * path)81 XMLFile *xml_open_file(const gchar *path)
82 {
83 XMLFile *newfile;
84
85 cm_return_val_if_fail(path != NULL, NULL);
86
87 newfile = g_new(XMLFile, 1);
88
89 newfile->fp = claws_fopen(path, "rb");
90 if (!newfile->fp) {
91 FILE_OP_ERROR(path, "fopen");
92 g_free(newfile);
93 return NULL;
94 }
95
96 XML_STRING_TABLE_CREATE();
97
98 newfile->buf = g_string_new(NULL);
99 newfile->bufp = newfile->buf->str;
100
101 newfile->dtd = NULL;
102 newfile->encoding = NULL;
103 newfile->tag_stack = NULL;
104 newfile->level = 0;
105 newfile->is_empty_element = FALSE;
106
107 newfile->path = g_strdup(path);
108
109 return newfile;
110 }
111
xml_close_file(XMLFile * file)112 void xml_close_file(XMLFile *file)
113 {
114 cm_return_if_fail(file != NULL);
115
116 if (file->fp) claws_fclose(file->fp);
117
118 g_string_free(file->buf, TRUE);
119
120 g_free(file->dtd);
121 g_free(file->encoding);
122 g_free(file->path);
123
124 while (file->tag_stack != NULL)
125 xml_pop_tag(file);
126
127 g_free(file);
128 }
129
xml_build_tree(XMLFile * file,GNode * parent,guint level)130 static GNode *xml_build_tree(XMLFile *file, GNode *parent, guint level)
131 {
132 GNode *node = NULL;
133 XMLNode *xmlnode;
134 XMLTag *tag;
135
136 while (xml_parse_next_tag(file) == 0) {
137 if (file->level < level) break;
138 if (file->level == level) {
139 g_warning("xml_build_tree(): Parse error in %s", file->path);
140 break;
141 }
142
143 tag = xml_get_current_tag(file);
144 if (!tag) break;
145 xmlnode = xml_node_new(xml_copy_tag(tag), NULL);
146 xmlnode->element = xml_get_element(file);
147 if (!parent)
148 node = g_node_new(xmlnode);
149 else
150 node = g_node_append_data(parent, xmlnode);
151
152 xml_build_tree(file, node, file->level);
153 if (file->level == 0) break;
154 }
155
156 return node;
157 }
158
xml_parse_file(const gchar * path)159 GNode *xml_parse_file(const gchar *path)
160 {
161 XMLFile *file;
162 GNode *node;
163
164 file = xml_open_file(path);
165 if (file == NULL)
166 return NULL;
167
168 xml_get_dtd(file);
169
170 node = xml_build_tree(file, NULL, file->level);
171
172 xml_close_file(file);
173
174 #if defined(SPARSE_MEMORY)
175 if (debug_get_mode())
176 string_table_get_stats(xml_string_table);
177 #endif
178
179 return node;
180 }
181
xml_get_dtd(XMLFile * file)182 gint xml_get_dtd(XMLFile *file)
183 {
184 gchar buf[XMLBUFSIZE];
185 gchar *bufp = buf;
186
187 if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) return -1;
188
189 if ((*bufp++ == '?') &&
190 (bufp = strcasestr(bufp, "xml")) &&
191 (bufp = strcasestr(bufp + 3, "version")) &&
192 (bufp = strchr(bufp + 7, '?'))) {
193 file->dtd = g_strdup(buf);
194 if ((bufp = strcasestr(buf, "encoding=\""))) {
195 bufp += 9;
196 extract_quote(bufp, '"');
197 file->encoding = g_strdup(bufp);
198 file->need_codeconv =
199 g_strcmp0(bufp, CS_INTERNAL);
200 } else {
201 file->encoding = g_strdup(CS_INTERNAL);
202 file->need_codeconv = FALSE;
203 }
204 } else {
205 g_warning("Can't get XML DTD in %s", file->path);
206 return -1;
207 }
208
209 return 0;
210 }
211
xml_parse_next_tag(XMLFile * file)212 gint xml_parse_next_tag(XMLFile *file)
213 {
214 gchar buf[XMLBUFSIZE];
215 gchar *bufp = buf;
216 gchar *tag_str;
217 XMLTag *tag;
218 gint len;
219
220 next:
221 if (file->is_empty_element == TRUE) {
222 file->is_empty_element = FALSE;
223 xml_pop_tag(file);
224 return 0;
225 }
226
227 if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) {
228 g_warning("xml_parse_next_tag(): Can't parse next tag in %s", file->path);
229 return -1;
230 }
231
232 len = strlen(buf);
233
234 /* end-tag */
235 if (buf[0] == '/') {
236 if (strcmp(xml_get_current_tag(file)->tag, buf + 1) != 0) {
237 g_warning("xml_parse_next_tag(): Tag name mismatch in %s : %s (%s)", file->path, buf, xml_get_current_tag(file)->tag);
238 return -1;
239 }
240 xml_pop_tag(file);
241 return 0;
242 }
243
244 if (len >= 7 && !strncmp(buf, "!-- ", 4) && !strncmp(buf+len-3, " --", 3)) {
245 /* skip comment */
246 goto next;
247 }
248
249 tag = xml_tag_new(NULL);
250 xml_push_tag(file, tag);
251
252 if (len > 0 && buf[len - 1] == '/') {
253 file->is_empty_element = TRUE;
254 buf[len - 1] = '\0';
255 g_strchomp(buf);
256 }
257
258 if (strlen(buf) == 0) {
259 g_warning("xml_parse_next_tag(): Tag name is empty in %s", file->path);
260 return -1;
261 }
262
263 while (*bufp != '\0' && !g_ascii_isspace(*bufp)) bufp++;
264 if (*bufp == '\0') {
265 if (file->need_codeconv) {
266 tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
267 if (tag_str) {
268 tag->tag = XML_STRING_ADD(tag_str);
269 g_free(tag_str);
270 } else
271 tag->tag = XML_STRING_ADD(buf);
272 } else
273 tag->tag = XML_STRING_ADD(buf);
274 return 0;
275 } else {
276 *bufp++ = '\0';
277 if (file->need_codeconv) {
278 tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
279 if (tag_str) {
280 tag->tag = XML_STRING_ADD(tag_str);
281 g_free(tag_str);
282 } else
283 tag->tag = XML_STRING_ADD(buf);
284 } else
285 tag->tag = XML_STRING_ADD(buf);
286 }
287
288 /* parse attributes ( name=value ) */
289 while (*bufp) {
290 XMLAttr *attr;
291 gchar *attr_name;
292 gchar *attr_value;
293 gchar *utf8_attr_name;
294 gchar *utf8_attr_value;
295 gchar *p;
296 gchar quote;
297
298 while (g_ascii_isspace(*bufp)) bufp++;
299 attr_name = bufp;
300 if ((p = strchr(attr_name, '=')) == NULL) {
301 g_warning("xml_parse_next_tag(): Syntax error in %s, tag (a) %s", file->path, attr_name);
302 return -1;
303 }
304 bufp = p;
305 *bufp++ = '\0';
306 while (g_ascii_isspace(*bufp)) bufp++;
307
308 if (*bufp != '"' && *bufp != '\'') {
309 g_warning("xml_parse_next_tag(): Syntax error in %s, tag (b) %s", file->path, bufp);
310 return -1;
311 }
312 quote = *bufp;
313 bufp++;
314 attr_value = bufp;
315 if ((p = strchr(attr_value, quote)) == NULL) {
316 g_warning("xml_parse_next_tag(): Syntax error in %s, tag (c) %s", file->path, attr_value);
317 return -1;
318 }
319 bufp = p;
320 *bufp++ = '\0';
321
322 g_strchomp(attr_name);
323 xml_unescape_str(attr_value);
324 if (file->need_codeconv) {
325 utf8_attr_name = conv_codeset_strdup
326 (attr_name, file->encoding, CS_INTERNAL);
327 utf8_attr_value = conv_codeset_strdup
328 (attr_value, file->encoding, CS_INTERNAL);
329 if (!utf8_attr_name)
330 utf8_attr_name = g_strdup(attr_name);
331 if (!utf8_attr_value)
332 utf8_attr_value = g_strdup(attr_value);
333
334 attr = xml_attr_new(utf8_attr_name, utf8_attr_value);
335 g_free(utf8_attr_value);
336 g_free(utf8_attr_name);
337 } else {
338 attr = xml_attr_new(attr_name, attr_value);
339 }
340 xml_tag_add_attr(tag, attr);
341
342 }
343 tag->attr = g_list_reverse(tag->attr);
344
345 return 0;
346 }
347
xml_push_tag(XMLFile * file,XMLTag * tag)348 static void xml_push_tag(XMLFile *file, XMLTag *tag)
349 {
350 cm_return_if_fail(tag != NULL);
351
352 file->tag_stack = g_list_prepend(file->tag_stack, tag);
353 file->level++;
354 }
355
xml_pop_tag(XMLFile * file)356 static void xml_pop_tag(XMLFile *file)
357 {
358 XMLTag *tag;
359
360 if (!file->tag_stack) return;
361
362 tag = (XMLTag *)file->tag_stack->data;
363
364 xml_free_tag(tag);
365 file->tag_stack = g_list_remove(file->tag_stack, tag);
366 file->level--;
367 }
368
xml_get_current_tag(XMLFile * file)369 XMLTag *xml_get_current_tag(XMLFile *file)
370 {
371 if (file->tag_stack)
372 return (XMLTag *)file->tag_stack->data;
373 else
374 return NULL;
375 }
376
xml_get_current_tag_attr(XMLFile * file)377 GList *xml_get_current_tag_attr(XMLFile *file)
378 {
379 XMLTag *tag;
380
381 tag = xml_get_current_tag(file);
382 if (!tag) return NULL;
383
384 return tag->attr;
385 }
386
xml_get_element(XMLFile * file)387 gchar *xml_get_element(XMLFile *file)
388 {
389 gchar *str;
390 gchar *new_str;
391 gchar *end;
392
393 while ((end = strchr(file->bufp, '<')) == NULL)
394 if (xml_read_line(file) < 0) return NULL;
395
396 if (end == file->bufp)
397 return NULL;
398
399 str = g_strndup(file->bufp, end - file->bufp);
400 /* this is not XML1.0 strict */
401 g_strstrip(str);
402 xml_unescape_str(str);
403
404 file->bufp = end;
405 xml_truncate_buf(file);
406
407 if (str[0] == '\0') {
408 g_free(str);
409 return NULL;
410 }
411
412 if (!file->need_codeconv)
413 return str;
414
415 new_str = conv_codeset_strdup(str, file->encoding, CS_INTERNAL);
416 if (!new_str)
417 new_str = g_strdup(str);
418 g_free(str);
419
420 return new_str;
421 }
422
xml_read_line(XMLFile * file)423 static gint xml_read_line(XMLFile *file)
424 {
425 gchar buf[XMLBUFSIZE];
426 gint index;
427
428 if (claws_fgets(buf, sizeof(buf), file->fp) == NULL)
429 return -1;
430
431 index = file->bufp - file->buf->str;
432
433 g_string_append(file->buf, buf);
434
435 file->bufp = file->buf->str + index;
436
437 return 0;
438 }
439
xml_truncate_buf(XMLFile * file)440 static void xml_truncate_buf(XMLFile *file)
441 {
442 gint len;
443
444 len = file->bufp - file->buf->str;
445 if (len > 0) {
446 g_string_erase(file->buf, 0, len);
447 file->bufp = file->buf->str;
448 }
449 }
450
xml_compare_tag(XMLFile * file,const gchar * name)451 gboolean xml_compare_tag(XMLFile *file, const gchar *name)
452 {
453 XMLTag *tag;
454
455 tag = xml_get_current_tag(file);
456
457 if (tag && strcmp(tag->tag, name) == 0)
458 return TRUE;
459 else
460 return FALSE;
461 }
462
xml_node_new(XMLTag * tag,const gchar * text)463 XMLNode *xml_node_new(XMLTag *tag, const gchar *text)
464 {
465 XMLNode *node;
466
467 node = g_new(XMLNode, 1);
468 node->tag = tag;
469 node->element = g_strdup(text);
470
471 return node;
472 }
473
xml_tag_new(const gchar * tag)474 XMLTag *xml_tag_new(const gchar *tag)
475 {
476 XMLTag *new_tag;
477
478 new_tag = g_new(XMLTag, 1);
479 if (tag)
480 new_tag->tag = XML_STRING_ADD(tag);
481 else
482 new_tag->tag = NULL;
483 new_tag->attr = NULL;
484
485 return new_tag;
486 }
487
xml_attr_new(const gchar * name,const gchar * value)488 XMLAttr *xml_attr_new(const gchar *name, const gchar *value)
489 {
490 XMLAttr *new_attr;
491
492 new_attr = g_new(XMLAttr, 1);
493 new_attr->name = XML_STRING_ADD(name);
494 new_attr->value = g_strdup(value);
495
496 return new_attr;
497 }
498
xml_attr_new_int(const gchar * name,const gint value)499 XMLAttr *xml_attr_new_int(const gchar *name, const gint value)
500 {
501 XMLAttr *new_attr;
502 gchar *valuestr;
503
504 valuestr = g_strdup_printf("%d", value);
505
506 new_attr = g_new(XMLAttr, 1);
507 new_attr->name = XML_STRING_ADD(name);
508 new_attr->value = valuestr;
509
510 return new_attr;
511 }
512
xml_tag_add_attr(XMLTag * tag,XMLAttr * attr)513 void xml_tag_add_attr(XMLTag *tag, XMLAttr *attr)
514 {
515 tag->attr = g_list_prepend(tag->attr, attr);
516 }
517
xml_copy_tag(XMLTag * tag)518 static XMLTag *xml_copy_tag(XMLTag *tag)
519 {
520 XMLTag *new_tag;
521 XMLAttr *attr;
522 GList *list;
523
524 new_tag = xml_tag_new(tag->tag);
525 for (list = tag->attr; list != NULL; list = list->next) {
526 attr = xml_copy_attr((XMLAttr *)list->data);
527 xml_tag_add_attr(new_tag, attr);
528 }
529 tag->attr = g_list_reverse(tag->attr);
530
531 return new_tag;
532 }
533
xml_copy_attr(XMLAttr * attr)534 static XMLAttr *xml_copy_attr(XMLAttr *attr)
535 {
536 return xml_attr_new(attr->name, attr->value);
537 }
538
xml_unescape_str(gchar * str)539 static gint xml_unescape_str(gchar *str)
540 {
541 gchar *start;
542 gchar *end;
543 gchar *p = str;
544 gchar *esc_str;
545 gchar ch;
546 gint len;
547
548 while ((start = strchr(p, '&')) != NULL) {
549 if ((end = strchr(start + 1, ';')) == NULL) {
550 g_warning("Unescaped '&' appeared");
551 p = start + 1;
552 continue;
553 }
554 len = end - start + 1;
555 if (len < 3) {
556 p = end + 1;
557 continue;
558 }
559
560 Xstrndup_a(esc_str, start, len, return -1);
561 if (!strcmp(esc_str, "<"))
562 ch = '<';
563 else if (!strcmp(esc_str, ">"))
564 ch = '>';
565 else if (!strcmp(esc_str, "&"))
566 ch = '&';
567 else if (!strcmp(esc_str, "'"))
568 ch = '\'';
569 else if (!strcmp(esc_str, """))
570 ch = '\"';
571 else {
572 p = end + 1;
573 continue;
574 }
575
576 *start = ch;
577 memmove(start + 1, end + 1, strlen(end + 1) + 1);
578 p = start + 1;
579 }
580
581 return 0;
582 }
583
xml_file_put_escape_str(FILE * fp,const gchar * str)584 gint xml_file_put_escape_str(FILE *fp, const gchar *str)
585 {
586 const gchar *p;
587 int result = 0;
588 cm_return_val_if_fail(fp != NULL, -1);
589
590 if (!str) return 0;
591
592 for (p = str; *p != '\0'; p++) {
593 switch (*p) {
594 case '<':
595 result = claws_fputs("<", fp);
596 break;
597 case '>':
598 result = claws_fputs(">", fp);
599 break;
600 case '&':
601 result = claws_fputs("&", fp);
602 break;
603 case '\'':
604 result = claws_fputs("'", fp);
605 break;
606 case '\"':
607 result = claws_fputs(""", fp);
608 break;
609 default:
610 result = claws_fputc(*p, fp);
611 }
612 }
613
614 return (result == EOF ? -1 : 0);
615 }
616
xml_file_put_xml_decl(FILE * fp)617 gint xml_file_put_xml_decl(FILE *fp)
618 {
619 cm_return_val_if_fail(fp != NULL, -1);
620 XML_STRING_TABLE_CREATE();
621
622 return fprintf(fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", CS_INTERNAL);
623 }
624
xml_free_node(XMLNode * node)625 static void xml_free_node(XMLNode *node)
626 {
627 if (!node) return;
628
629 xml_free_tag(node->tag);
630 g_free(node->element);
631 g_free(node);
632 }
633
xml_free_func(GNode * node,gpointer data)634 static gboolean xml_free_func(GNode *node, gpointer data)
635 {
636 XMLNode *xmlnode = node->data;
637
638 xml_free_node(xmlnode);
639 return FALSE;
640 }
641
xml_free_tree(GNode * node)642 void xml_free_tree(GNode *node)
643 {
644 cm_return_if_fail(node != NULL);
645
646 g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, xml_free_func,
647 NULL);
648
649 g_node_destroy(node);
650 }
651
xml_free_tag(XMLTag * tag)652 static void xml_free_tag(XMLTag *tag)
653 {
654 if (!tag) return;
655
656 XML_STRING_FREE(tag->tag);
657 while (tag->attr != NULL) {
658 XMLAttr *attr = (XMLAttr *)tag->attr->data;
659 tag->attr = g_list_remove(tag->attr, tag->attr->data);
660 XML_STRING_FREE(attr->name);
661 g_free(attr->value); /* __not__ XML_STRING_FREE */
662 g_free(attr);
663 }
664 g_free(tag);
665 }
666
xml_get_parenthesis(XMLFile * file,gchar * buf,gint len)667 static gint xml_get_parenthesis(XMLFile *file, gchar *buf, gint len)
668 {
669 gchar *start;
670 gchar *end;
671
672 buf[0] = '\0';
673
674 while ((start = strchr(file->bufp, '<')) == NULL)
675 if (xml_read_line(file) < 0) return -1;
676
677 start++;
678 file->bufp = start;
679
680 while ((end = strchr(file->bufp, '>')) == NULL)
681 if (xml_read_line(file) < 0) return -1;
682
683 strncpy2(buf, file->bufp, MIN(end - file->bufp + 1, len));
684 g_strstrip(buf);
685 file->bufp = end + 1;
686 xml_truncate_buf(file);
687
688 return 0;
689 }
690
691 #define TRY(func) \
692 if (!(func)) \
693 { \
694 g_warning("failed to write part of XML tree"); \
695 return -1; \
696 } \
697
xml_write_tree_recursive(GNode * node,FILE * fp)698 static int xml_write_tree_recursive(GNode *node, FILE *fp)
699 {
700 gint i, depth;
701 XMLTag *tag;
702 GList *cur;
703
704 cm_return_val_if_fail(node != NULL, -1);
705 cm_return_val_if_fail(fp != NULL, -1);
706
707 depth = g_node_depth(node) - 1;
708 for (i = 0; i < depth; i++)
709 TRY(claws_fputs(" ", fp) != EOF);
710
711 tag = ((XMLNode *) node->data)->tag;
712
713 TRY(fprintf(fp, "<%s", tag->tag) > 0);
714
715 for (cur = tag->attr; cur != NULL; cur = g_list_next(cur)) {
716 XMLAttr *attr = (XMLAttr *) cur->data;
717
718 TRY(fprintf(fp, " %s=\"", attr->name) > 0);
719 TRY(xml_file_put_escape_str(fp, attr->value) == 0);
720 TRY(claws_fputs("\"", fp) != EOF);
721
722 }
723
724 if (node->children) {
725 GNode *child;
726 TRY(claws_fputs(">\n", fp) != EOF);
727
728 child = node->children;
729 while (child) {
730 GNode *cur;
731
732 cur = child;
733 child = cur->next;
734 TRY(xml_write_tree_recursive(cur, fp) == 0);
735 }
736
737 for (i = 0; i < depth; i++)
738 TRY(claws_fputs(" ", fp) != EOF);
739 TRY(fprintf(fp, "</%s>\n", tag->tag) > 0);
740 } else
741 TRY(claws_fputs(" />\n", fp) != EOF);
742
743 return 0;
744 }
745
746 #undef TRY
747
xml_write_tree(GNode * node,FILE * fp)748 int xml_write_tree(GNode *node, FILE *fp)
749 {
750 return xml_write_tree_recursive(node, fp);
751 }
752
copy_node_func(gpointer nodedata,gpointer data)753 static gpointer copy_node_func(gpointer nodedata, gpointer data)
754 {
755 XMLNode *xmlnode = (XMLNode *) nodedata;
756 XMLNode *newxmlnode;
757
758 newxmlnode = g_new0(XMLNode, 1);
759 newxmlnode->tag = xml_copy_tag(xmlnode->tag);
760 newxmlnode->element = g_strdup(xmlnode->element);
761
762 return newxmlnode;
763 }
764
xml_copy_tree(GNode * node)765 GNode *xml_copy_tree(GNode *node)
766 {
767 return g_node_map(node, copy_node_func, NULL);
768 }
769