1 /*****************************************************************************
2  * xmlwriter - A base library for libxlsxwriter libraries.
3  *
4  * Used in conjunction with the libxlsxwriter library.
5  *
6  * Copyright 2014-2021, John McNamara, jmcnamara@cpan.org. See LICENSE.txt.
7  *
8  */
9 
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <ctype.h>
14 #include "xlsxwriter/xmlwriter.h"
15 
16 #define LXW_AMP  "&amp;"
17 #define LXW_LT   "&lt;"
18 #define LXW_GT   "&gt;"
19 #define LXW_QUOT "&quot;"
20 #define LXW_NL   "&#xA;"
21 
22 /* Defines. */
23 #define LXW_MAX_ENCODED_ATTRIBUTE_LENGTH (LXW_MAX_ATTRIBUTE_LENGTH*6)
24 
25 /* Forward declarations. */
26 STATIC char *_escape_attributes(struct xml_attribute *attribute);
27 
28 char *lxw_escape_data(const char *data);
29 
30 STATIC void _fprint_escaped_attributes(FILE * xmlfile,
31                                        struct xml_attribute_list *attributes);
32 
33 STATIC void _fprint_escaped_data(FILE * xmlfile, const char *data);
34 
35 /*
36  * Write the XML declaration.
37  */
38 void
lxw_xml_declaration(FILE * xmlfile)39 lxw_xml_declaration(FILE * xmlfile)
40 {
41     fprintf(xmlfile, "<?xml version=\"1.0\" "
42             "encoding=\"UTF-8\" standalone=\"yes\"?>\n");
43 }
44 
45 /*
46  * Write an XML start tag with optional attributes.
47  */
48 void
lxw_xml_start_tag(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)49 lxw_xml_start_tag(FILE * xmlfile,
50                   const char *tag, struct xml_attribute_list *attributes)
51 {
52     fprintf(xmlfile, "<%s", tag);
53 
54     _fprint_escaped_attributes(xmlfile, attributes);
55 
56     fprintf(xmlfile, ">");
57 }
58 
59 /*
60  * Write an XML start tag with optional, unencoded, attributes.
61  * This is a minor speed optimization for elements that don't need encoding.
62  */
63 void
lxw_xml_start_tag_unencoded(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)64 lxw_xml_start_tag_unencoded(FILE * xmlfile,
65                             const char *tag,
66                             struct xml_attribute_list *attributes)
67 {
68     struct xml_attribute *attribute;
69 
70     fprintf(xmlfile, "<%s", tag);
71 
72     if (attributes) {
73         STAILQ_FOREACH(attribute, attributes, list_entries) {
74             fprintf(xmlfile, " %s=\"%s\"", attribute->key, attribute->value);
75         }
76     }
77 
78     fprintf(xmlfile, ">");
79 }
80 
81 /*
82  * Write an XML end tag.
83  */
84 void
lxw_xml_end_tag(FILE * xmlfile,const char * tag)85 lxw_xml_end_tag(FILE * xmlfile, const char *tag)
86 {
87     fprintf(xmlfile, "</%s>", tag);
88 }
89 
90 /*
91  * Write an empty XML tag with optional attributes.
92  */
93 void
lxw_xml_empty_tag(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)94 lxw_xml_empty_tag(FILE * xmlfile,
95                   const char *tag, struct xml_attribute_list *attributes)
96 {
97     fprintf(xmlfile, "<%s", tag);
98 
99     _fprint_escaped_attributes(xmlfile, attributes);
100 
101     fprintf(xmlfile, "/>");
102 }
103 
104 /*
105  * Write an XML start tag with optional, unencoded, attributes.
106  * This is a minor speed optimization for elements that don't need encoding.
107  */
108 void
lxw_xml_empty_tag_unencoded(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)109 lxw_xml_empty_tag_unencoded(FILE * xmlfile,
110                             const char *tag,
111                             struct xml_attribute_list *attributes)
112 {
113     struct xml_attribute *attribute;
114 
115     fprintf(xmlfile, "<%s", tag);
116 
117     if (attributes) {
118         STAILQ_FOREACH(attribute, attributes, list_entries) {
119             fprintf(xmlfile, " %s=\"%s\"", attribute->key, attribute->value);
120         }
121     }
122 
123     fprintf(xmlfile, "/>");
124 }
125 
126 /*
127  * Write an XML element containing data with optional attributes.
128  */
129 void
lxw_xml_data_element(FILE * xmlfile,const char * tag,const char * data,struct xml_attribute_list * attributes)130 lxw_xml_data_element(FILE * xmlfile,
131                      const char *tag,
132                      const char *data, struct xml_attribute_list *attributes)
133 {
134     fprintf(xmlfile, "<%s", tag);
135 
136     _fprint_escaped_attributes(xmlfile, attributes);
137 
138     fprintf(xmlfile, ">");
139 
140     _fprint_escaped_data(xmlfile, data);
141 
142     fprintf(xmlfile, "</%s>", tag);
143 }
144 
145 /*
146  * Write an XML <si> element for rich strings, without encoding.
147  */
148 void
lxw_xml_rich_si_element(FILE * xmlfile,const char * string)149 lxw_xml_rich_si_element(FILE * xmlfile, const char *string)
150 {
151     fprintf(xmlfile, "<si>%s</si>", string);
152 }
153 
154 /*
155  * Escape XML characters in attributes.
156  */
157 STATIC char *
_escape_attributes(struct xml_attribute * attribute)158 _escape_attributes(struct xml_attribute *attribute)
159 {
160     char *encoded = (char *) calloc(LXW_MAX_ENCODED_ATTRIBUTE_LENGTH, 1);
161     char *p_encoded = encoded;
162     char *p_attr = attribute->value;
163 
164     while (*p_attr) {
165         switch (*p_attr) {
166             case '&':
167                 memcpy(p_encoded, LXW_AMP, sizeof(LXW_AMP) - 1);
168                 p_encoded += sizeof(LXW_AMP) - 1;
169                 break;
170             case '<':
171                 memcpy(p_encoded, LXW_LT, sizeof(LXW_LT) - 1);
172                 p_encoded += sizeof(LXW_LT) - 1;
173                 break;
174             case '>':
175                 memcpy(p_encoded, LXW_GT, sizeof(LXW_GT) - 1);
176                 p_encoded += sizeof(LXW_GT) - 1;
177                 break;
178             case '"':
179                 memcpy(p_encoded, LXW_QUOT, sizeof(LXW_QUOT) - 1);
180                 p_encoded += sizeof(LXW_QUOT) - 1;
181                 break;
182             case '\n':
183                 memcpy(p_encoded, LXW_NL, sizeof(LXW_NL) - 1);
184                 p_encoded += sizeof(LXW_NL) - 1;
185                 break;
186             default:
187                 *p_encoded = *p_attr;
188                 p_encoded++;
189                 break;
190         }
191         p_attr++;
192     }
193 
194     return encoded;
195 }
196 
197 /*
198  * Escape XML characters in data sections of tags.
199  * Note, this is different from _escape_attributes()
200  * in that double quotes are not escaped by Excel.
201  */
202 char *
lxw_escape_data(const char * data)203 lxw_escape_data(const char *data)
204 {
205     size_t encoded_len = (strlen(data) * 5 + 1);
206 
207     char *encoded = (char *) calloc(encoded_len, 1);
208     char *p_encoded = encoded;
209 
210     while (*data) {
211         switch (*data) {
212             case '&':
213                 memcpy(p_encoded, LXW_AMP, sizeof(LXW_AMP) - 1);
214                 p_encoded += sizeof(LXW_AMP) - 1;
215                 break;
216             case '<':
217                 memcpy(p_encoded, LXW_LT, sizeof(LXW_LT) - 1);
218                 p_encoded += sizeof(LXW_LT) - 1;
219                 break;
220             case '>':
221                 memcpy(p_encoded, LXW_GT, sizeof(LXW_GT) - 1);
222                 p_encoded += sizeof(LXW_GT) - 1;
223                 break;
224             default:
225                 *p_encoded = *data;
226                 p_encoded++;
227                 break;
228         }
229         data++;
230     }
231 
232     return encoded;
233 }
234 
235 /*
236  * Check for control characters in strings.
237  */
238 uint8_t
lxw_has_control_characters(const char * string)239 lxw_has_control_characters(const char *string)
240 {
241     while (string) {
242         /* 0xE0 == 0b11100000 masks values > 0x19 == 0b00011111. */
243         if (!(*string & 0xE0) && *string != 0x0A && *string != 0x09)
244             return LXW_TRUE;
245 
246         string++;
247     }
248 
249     return LXW_FALSE;
250 }
251 
252 /*
253  * Escape control characters in strings with _xHHHH_.
254  */
255 char *
lxw_escape_control_characters(const char * string)256 lxw_escape_control_characters(const char *string)
257 {
258     size_t escape_len = sizeof("_xHHHH_") - 1;
259     size_t encoded_len = (strlen(string) * escape_len + 1);
260 
261     char *encoded = (char *) calloc(encoded_len, 1);
262     char *p_encoded = encoded;
263 
264     while (*string) {
265         switch (*string) {
266             case '\x01':
267             case '\x02':
268             case '\x03':
269             case '\x04':
270             case '\x05':
271             case '\x06':
272             case '\x07':
273             case '\x08':
274             case '\x0B':
275             case '\x0C':
276             case '\x0D':
277             case '\x0E':
278             case '\x0F':
279             case '\x10':
280             case '\x11':
281             case '\x12':
282             case '\x13':
283             case '\x14':
284             case '\x15':
285             case '\x16':
286             case '\x17':
287             case '\x18':
288             case '\x19':
289             case '\x1A':
290             case '\x1B':
291             case '\x1C':
292             case '\x1D':
293             case '\x1E':
294             case '\x1F':
295                 lxw_snprintf(p_encoded, escape_len + 1, "_x%04X_", *string);
296                 p_encoded += escape_len;
297                 break;
298             default:
299                 *p_encoded = *string;
300                 p_encoded++;
301                 break;
302         }
303         string++;
304     }
305 
306     return encoded;
307 }
308 
309 /*
310  * Escape special characters in URL strings with with %XX.
311  */
312 char *
lxw_escape_url_characters(const char * string,uint8_t escape_hash)313 lxw_escape_url_characters(const char *string, uint8_t escape_hash)
314 {
315 
316     size_t escape_len = sizeof("%XX") - 1;
317     size_t encoded_len = (strlen(string) * escape_len + 1);
318 
319     char *encoded = (char *) calloc(encoded_len, 1);
320     char *p_encoded = encoded;
321 
322     while (*string) {
323         switch (*string) {
324             case ' ':
325             case '"':
326             case '<':
327             case '>':
328             case '[':
329             case ']':
330             case '`':
331             case '^':
332             case '{':
333             case '}':
334                 lxw_snprintf(p_encoded, escape_len + 1, "%%%2x", *string);
335                 p_encoded += escape_len;
336                 break;
337             case '#':
338                 /* This is only escaped for "external:" style links. */
339                 if (escape_hash) {
340                     lxw_snprintf(p_encoded, escape_len + 1, "%%%2x", *string);
341                     p_encoded += escape_len;
342                 }
343                 else {
344                     *p_encoded = *string;
345                     p_encoded++;
346                 }
347                 break;
348             case '%':
349                 /* Only escape % if it isn't already an escape. */
350                 if (!isxdigit(*(string + 1)) || !isxdigit(*(string + 2))) {
351                     lxw_snprintf(p_encoded, escape_len + 1, "%%%2x", *string);
352                     p_encoded += escape_len;
353                 }
354                 else {
355                     *p_encoded = *string;
356                     p_encoded++;
357                 }
358                 break;
359             default:
360                 *p_encoded = *string;
361                 p_encoded++;
362                 break;
363         }
364         string++;
365     }
366 
367     return encoded;
368 }
369 
370 /* Write out escaped attributes. */
371 STATIC void
_fprint_escaped_attributes(FILE * xmlfile,struct xml_attribute_list * attributes)372 _fprint_escaped_attributes(FILE * xmlfile,
373                            struct xml_attribute_list *attributes)
374 {
375     struct xml_attribute *attribute;
376 
377     if (attributes) {
378         STAILQ_FOREACH(attribute, attributes, list_entries) {
379             fprintf(xmlfile, " %s=", attribute->key);
380 
381             if (!strpbrk(attribute->value, "&<>\"\n")) {
382                 fprintf(xmlfile, "\"%s\"", attribute->value);
383             }
384             else {
385                 char *encoded = _escape_attributes(attribute);
386 
387                 if (encoded) {
388                     fprintf(xmlfile, "\"%s\"", encoded);
389 
390                     free(encoded);
391                 }
392             }
393         }
394     }
395 }
396 
397 /* Write out escaped XML data. */
398 STATIC void
_fprint_escaped_data(FILE * xmlfile,const char * data)399 _fprint_escaped_data(FILE * xmlfile, const char *data)
400 {
401     /* Escape the data section of the XML element. */
402     if (!strpbrk(data, "&<>")) {
403         fprintf(xmlfile, "%s", data);
404     }
405     else {
406         char *encoded = lxw_escape_data(data);
407         if (encoded) {
408             fprintf(xmlfile, "%s", encoded);
409             free(encoded);
410         }
411     }
412 }
413 
414 /* Create a new string XML attribute. */
415 struct xml_attribute *
lxw_new_attribute_str(const char * key,const char * value)416 lxw_new_attribute_str(const char *key, const char *value)
417 {
418     struct xml_attribute *attribute = malloc(sizeof(struct xml_attribute));
419 
420     LXW_ATTRIBUTE_COPY(attribute->key, key);
421     LXW_ATTRIBUTE_COPY(attribute->value, value);
422 
423     return attribute;
424 }
425 
426 /* Create a new integer XML attribute. */
427 struct xml_attribute *
lxw_new_attribute_int(const char * key,uint32_t value)428 lxw_new_attribute_int(const char *key, uint32_t value)
429 {
430     struct xml_attribute *attribute = malloc(sizeof(struct xml_attribute));
431 
432     LXW_ATTRIBUTE_COPY(attribute->key, key);
433     lxw_snprintf(attribute->value, LXW_MAX_ATTRIBUTE_LENGTH, "%d", value);
434 
435     return attribute;
436 }
437 
438 /* Create a new double XML attribute. */
439 struct xml_attribute *
lxw_new_attribute_dbl(const char * key,double value)440 lxw_new_attribute_dbl(const char *key, double value)
441 {
442     struct xml_attribute *attribute = malloc(sizeof(struct xml_attribute));
443 
444     LXW_ATTRIBUTE_COPY(attribute->key, key);
445     lxw_sprintf_dbl(attribute->value, value);
446 
447     return attribute;
448 }
449