1 /*****************************************************************************
2 * xmlwriter - A base library for libxlsxwriter libraries.
3 *
4 * Used in conjunction with the libxlsxwriter library.
5 *
6 * Copyright 2014-2021, John McNamara, jmcnamara@cpan.org. See LICENSE.txt.
7 *
8 */
9
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <ctype.h>
14 #include "xlsxwriter/xmlwriter.h"
15
16 #define LXW_AMP "&"
17 #define LXW_LT "<"
18 #define LXW_GT ">"
19 #define LXW_QUOT """
20 #define LXW_NL "
"
21
22 /* Defines. */
23 #define LXW_MAX_ENCODED_ATTRIBUTE_LENGTH (LXW_MAX_ATTRIBUTE_LENGTH*6)
24
25 /* Forward declarations. */
26 STATIC char *_escape_attributes(struct xml_attribute *attribute);
27
28 char *lxw_escape_data(const char *data);
29
30 STATIC void _fprint_escaped_attributes(FILE * xmlfile,
31 struct xml_attribute_list *attributes);
32
33 STATIC void _fprint_escaped_data(FILE * xmlfile, const char *data);
34
35 /*
36 * Write the XML declaration.
37 */
38 void
lxw_xml_declaration(FILE * xmlfile)39 lxw_xml_declaration(FILE * xmlfile)
40 {
41 fprintf(xmlfile, "<?xml version=\"1.0\" "
42 "encoding=\"UTF-8\" standalone=\"yes\"?>\n");
43 }
44
45 /*
46 * Write an XML start tag with optional attributes.
47 */
48 void
lxw_xml_start_tag(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)49 lxw_xml_start_tag(FILE * xmlfile,
50 const char *tag, struct xml_attribute_list *attributes)
51 {
52 fprintf(xmlfile, "<%s", tag);
53
54 _fprint_escaped_attributes(xmlfile, attributes);
55
56 fprintf(xmlfile, ">");
57 }
58
59 /*
60 * Write an XML start tag with optional, unencoded, attributes.
61 * This is a minor speed optimization for elements that don't need encoding.
62 */
63 void
lxw_xml_start_tag_unencoded(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)64 lxw_xml_start_tag_unencoded(FILE * xmlfile,
65 const char *tag,
66 struct xml_attribute_list *attributes)
67 {
68 struct xml_attribute *attribute;
69
70 fprintf(xmlfile, "<%s", tag);
71
72 if (attributes) {
73 STAILQ_FOREACH(attribute, attributes, list_entries) {
74 fprintf(xmlfile, " %s=\"%s\"", attribute->key, attribute->value);
75 }
76 }
77
78 fprintf(xmlfile, ">");
79 }
80
81 /*
82 * Write an XML end tag.
83 */
84 void
lxw_xml_end_tag(FILE * xmlfile,const char * tag)85 lxw_xml_end_tag(FILE * xmlfile, const char *tag)
86 {
87 fprintf(xmlfile, "</%s>", tag);
88 }
89
90 /*
91 * Write an empty XML tag with optional attributes.
92 */
93 void
lxw_xml_empty_tag(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)94 lxw_xml_empty_tag(FILE * xmlfile,
95 const char *tag, struct xml_attribute_list *attributes)
96 {
97 fprintf(xmlfile, "<%s", tag);
98
99 _fprint_escaped_attributes(xmlfile, attributes);
100
101 fprintf(xmlfile, "/>");
102 }
103
104 /*
105 * Write an XML start tag with optional, unencoded, attributes.
106 * This is a minor speed optimization for elements that don't need encoding.
107 */
108 void
lxw_xml_empty_tag_unencoded(FILE * xmlfile,const char * tag,struct xml_attribute_list * attributes)109 lxw_xml_empty_tag_unencoded(FILE * xmlfile,
110 const char *tag,
111 struct xml_attribute_list *attributes)
112 {
113 struct xml_attribute *attribute;
114
115 fprintf(xmlfile, "<%s", tag);
116
117 if (attributes) {
118 STAILQ_FOREACH(attribute, attributes, list_entries) {
119 fprintf(xmlfile, " %s=\"%s\"", attribute->key, attribute->value);
120 }
121 }
122
123 fprintf(xmlfile, "/>");
124 }
125
126 /*
127 * Write an XML element containing data with optional attributes.
128 */
129 void
lxw_xml_data_element(FILE * xmlfile,const char * tag,const char * data,struct xml_attribute_list * attributes)130 lxw_xml_data_element(FILE * xmlfile,
131 const char *tag,
132 const char *data, struct xml_attribute_list *attributes)
133 {
134 fprintf(xmlfile, "<%s", tag);
135
136 _fprint_escaped_attributes(xmlfile, attributes);
137
138 fprintf(xmlfile, ">");
139
140 _fprint_escaped_data(xmlfile, data);
141
142 fprintf(xmlfile, "</%s>", tag);
143 }
144
145 /*
146 * Write an XML <si> element for rich strings, without encoding.
147 */
148 void
lxw_xml_rich_si_element(FILE * xmlfile,const char * string)149 lxw_xml_rich_si_element(FILE * xmlfile, const char *string)
150 {
151 fprintf(xmlfile, "<si>%s</si>", string);
152 }
153
154 /*
155 * Escape XML characters in attributes.
156 */
157 STATIC char *
_escape_attributes(struct xml_attribute * attribute)158 _escape_attributes(struct xml_attribute *attribute)
159 {
160 char *encoded = (char *) calloc(LXW_MAX_ENCODED_ATTRIBUTE_LENGTH, 1);
161 char *p_encoded = encoded;
162 char *p_attr = attribute->value;
163
164 while (*p_attr) {
165 switch (*p_attr) {
166 case '&':
167 memcpy(p_encoded, LXW_AMP, sizeof(LXW_AMP) - 1);
168 p_encoded += sizeof(LXW_AMP) - 1;
169 break;
170 case '<':
171 memcpy(p_encoded, LXW_LT, sizeof(LXW_LT) - 1);
172 p_encoded += sizeof(LXW_LT) - 1;
173 break;
174 case '>':
175 memcpy(p_encoded, LXW_GT, sizeof(LXW_GT) - 1);
176 p_encoded += sizeof(LXW_GT) - 1;
177 break;
178 case '"':
179 memcpy(p_encoded, LXW_QUOT, sizeof(LXW_QUOT) - 1);
180 p_encoded += sizeof(LXW_QUOT) - 1;
181 break;
182 case '\n':
183 memcpy(p_encoded, LXW_NL, sizeof(LXW_NL) - 1);
184 p_encoded += sizeof(LXW_NL) - 1;
185 break;
186 default:
187 *p_encoded = *p_attr;
188 p_encoded++;
189 break;
190 }
191 p_attr++;
192 }
193
194 return encoded;
195 }
196
197 /*
198 * Escape XML characters in data sections of tags.
199 * Note, this is different from _escape_attributes()
200 * in that double quotes are not escaped by Excel.
201 */
202 char *
lxw_escape_data(const char * data)203 lxw_escape_data(const char *data)
204 {
205 size_t encoded_len = (strlen(data) * 5 + 1);
206
207 char *encoded = (char *) calloc(encoded_len, 1);
208 char *p_encoded = encoded;
209
210 while (*data) {
211 switch (*data) {
212 case '&':
213 memcpy(p_encoded, LXW_AMP, sizeof(LXW_AMP) - 1);
214 p_encoded += sizeof(LXW_AMP) - 1;
215 break;
216 case '<':
217 memcpy(p_encoded, LXW_LT, sizeof(LXW_LT) - 1);
218 p_encoded += sizeof(LXW_LT) - 1;
219 break;
220 case '>':
221 memcpy(p_encoded, LXW_GT, sizeof(LXW_GT) - 1);
222 p_encoded += sizeof(LXW_GT) - 1;
223 break;
224 default:
225 *p_encoded = *data;
226 p_encoded++;
227 break;
228 }
229 data++;
230 }
231
232 return encoded;
233 }
234
235 /*
236 * Check for control characters in strings.
237 */
238 uint8_t
lxw_has_control_characters(const char * string)239 lxw_has_control_characters(const char *string)
240 {
241 while (string) {
242 /* 0xE0 == 0b11100000 masks values > 0x19 == 0b00011111. */
243 if (!(*string & 0xE0) && *string != 0x0A && *string != 0x09)
244 return LXW_TRUE;
245
246 string++;
247 }
248
249 return LXW_FALSE;
250 }
251
252 /*
253 * Escape control characters in strings with _xHHHH_.
254 */
255 char *
lxw_escape_control_characters(const char * string)256 lxw_escape_control_characters(const char *string)
257 {
258 size_t escape_len = sizeof("_xHHHH_") - 1;
259 size_t encoded_len = (strlen(string) * escape_len + 1);
260
261 char *encoded = (char *) calloc(encoded_len, 1);
262 char *p_encoded = encoded;
263
264 while (*string) {
265 switch (*string) {
266 case '\x01':
267 case '\x02':
268 case '\x03':
269 case '\x04':
270 case '\x05':
271 case '\x06':
272 case '\x07':
273 case '\x08':
274 case '\x0B':
275 case '\x0C':
276 case '\x0D':
277 case '\x0E':
278 case '\x0F':
279 case '\x10':
280 case '\x11':
281 case '\x12':
282 case '\x13':
283 case '\x14':
284 case '\x15':
285 case '\x16':
286 case '\x17':
287 case '\x18':
288 case '\x19':
289 case '\x1A':
290 case '\x1B':
291 case '\x1C':
292 case '\x1D':
293 case '\x1E':
294 case '\x1F':
295 lxw_snprintf(p_encoded, escape_len + 1, "_x%04X_", *string);
296 p_encoded += escape_len;
297 break;
298 default:
299 *p_encoded = *string;
300 p_encoded++;
301 break;
302 }
303 string++;
304 }
305
306 return encoded;
307 }
308
309 /*
310 * Escape special characters in URL strings with with %XX.
311 */
312 char *
lxw_escape_url_characters(const char * string,uint8_t escape_hash)313 lxw_escape_url_characters(const char *string, uint8_t escape_hash)
314 {
315
316 size_t escape_len = sizeof("%XX") - 1;
317 size_t encoded_len = (strlen(string) * escape_len + 1);
318
319 char *encoded = (char *) calloc(encoded_len, 1);
320 char *p_encoded = encoded;
321
322 while (*string) {
323 switch (*string) {
324 case ' ':
325 case '"':
326 case '<':
327 case '>':
328 case '[':
329 case ']':
330 case '`':
331 case '^':
332 case '{':
333 case '}':
334 lxw_snprintf(p_encoded, escape_len + 1, "%%%2x", *string);
335 p_encoded += escape_len;
336 break;
337 case '#':
338 /* This is only escaped for "external:" style links. */
339 if (escape_hash) {
340 lxw_snprintf(p_encoded, escape_len + 1, "%%%2x", *string);
341 p_encoded += escape_len;
342 }
343 else {
344 *p_encoded = *string;
345 p_encoded++;
346 }
347 break;
348 case '%':
349 /* Only escape % if it isn't already an escape. */
350 if (!isxdigit(*(string + 1)) || !isxdigit(*(string + 2))) {
351 lxw_snprintf(p_encoded, escape_len + 1, "%%%2x", *string);
352 p_encoded += escape_len;
353 }
354 else {
355 *p_encoded = *string;
356 p_encoded++;
357 }
358 break;
359 default:
360 *p_encoded = *string;
361 p_encoded++;
362 break;
363 }
364 string++;
365 }
366
367 return encoded;
368 }
369
370 /* Write out escaped attributes. */
371 STATIC void
_fprint_escaped_attributes(FILE * xmlfile,struct xml_attribute_list * attributes)372 _fprint_escaped_attributes(FILE * xmlfile,
373 struct xml_attribute_list *attributes)
374 {
375 struct xml_attribute *attribute;
376
377 if (attributes) {
378 STAILQ_FOREACH(attribute, attributes, list_entries) {
379 fprintf(xmlfile, " %s=", attribute->key);
380
381 if (!strpbrk(attribute->value, "&<>\"\n")) {
382 fprintf(xmlfile, "\"%s\"", attribute->value);
383 }
384 else {
385 char *encoded = _escape_attributes(attribute);
386
387 if (encoded) {
388 fprintf(xmlfile, "\"%s\"", encoded);
389
390 free(encoded);
391 }
392 }
393 }
394 }
395 }
396
397 /* Write out escaped XML data. */
398 STATIC void
_fprint_escaped_data(FILE * xmlfile,const char * data)399 _fprint_escaped_data(FILE * xmlfile, const char *data)
400 {
401 /* Escape the data section of the XML element. */
402 if (!strpbrk(data, "&<>")) {
403 fprintf(xmlfile, "%s", data);
404 }
405 else {
406 char *encoded = lxw_escape_data(data);
407 if (encoded) {
408 fprintf(xmlfile, "%s", encoded);
409 free(encoded);
410 }
411 }
412 }
413
414 /* Create a new string XML attribute. */
415 struct xml_attribute *
lxw_new_attribute_str(const char * key,const char * value)416 lxw_new_attribute_str(const char *key, const char *value)
417 {
418 struct xml_attribute *attribute = malloc(sizeof(struct xml_attribute));
419
420 LXW_ATTRIBUTE_COPY(attribute->key, key);
421 LXW_ATTRIBUTE_COPY(attribute->value, value);
422
423 return attribute;
424 }
425
426 /* Create a new integer XML attribute. */
427 struct xml_attribute *
lxw_new_attribute_int(const char * key,uint32_t value)428 lxw_new_attribute_int(const char *key, uint32_t value)
429 {
430 struct xml_attribute *attribute = malloc(sizeof(struct xml_attribute));
431
432 LXW_ATTRIBUTE_COPY(attribute->key, key);
433 lxw_snprintf(attribute->value, LXW_MAX_ATTRIBUTE_LENGTH, "%d", value);
434
435 return attribute;
436 }
437
438 /* Create a new double XML attribute. */
439 struct xml_attribute *
lxw_new_attribute_dbl(const char * key,double value)440 lxw_new_attribute_dbl(const char *key, double value)
441 {
442 struct xml_attribute *attribute = malloc(sizeof(struct xml_attribute));
443
444 LXW_ATTRIBUTE_COPY(attribute->key, key);
445 lxw_sprintf_dbl(attribute->value, value);
446
447 return attribute;
448 }
449