1 /*****************************************************************************
2 * shared_strings - A library for creating Excel XLSX sst files.
3 *
4 * Used in conjunction with the libxlsxwriter library.
5 *
6 * Copyright 2014-2021, John McNamara, jmcnamara@cpan.org. See LICENSE.txt.
7 *
8 */
9
10 #include "xlsxwriter/xmlwriter.h"
11 #include "xlsxwriter/shared_strings.h"
12 #include "xlsxwriter/utility.h"
13 #include <ctype.h>
14
15 /*
16 * Forward declarations.
17 */
18
19 STATIC int _element_cmp(struct sst_element *element1,
20 struct sst_element *element2);
21
22 #ifndef __clang_analyzer__
23 LXW_RB_GENERATE_ELEMENT(sst_rb_tree, sst_element, sst_tree_pointers,
24 _element_cmp);
25 #endif
26
27 /*****************************************************************************
28 *
29 * Private functions.
30 *
31 ****************************************************************************/
32
33 /*
34 * Create a new SST SharedString object.
35 */
36 lxw_sst *
lxw_sst_new(void)37 lxw_sst_new(void)
38 {
39 /* Create the new shared string table. */
40 lxw_sst *sst = calloc(1, sizeof(lxw_sst));
41 RETURN_ON_MEM_ERROR(sst, NULL);
42
43 /* Add the sst RB tree. */
44 sst->rb_tree = calloc(1, sizeof(struct sst_rb_tree));
45 GOTO_LABEL_ON_MEM_ERROR(sst->rb_tree, mem_error);
46
47 /* Add a list for tracking the insertion order. */
48 sst->order_list = calloc(1, sizeof(struct sst_order_list));
49 GOTO_LABEL_ON_MEM_ERROR(sst->order_list, mem_error);
50
51 /* Initialize the order list. */
52 STAILQ_INIT(sst->order_list);
53
54 /* Initialize the RB tree. */
55 RB_INIT(sst->rb_tree);
56
57 return sst;
58
59 mem_error:
60 lxw_sst_free(sst);
61 return NULL;
62 }
63
64 /*
65 * Free a SST SharedString table object.
66 */
67 void
lxw_sst_free(lxw_sst * sst)68 lxw_sst_free(lxw_sst *sst)
69 {
70 struct sst_element *sst_element;
71 struct sst_element *sst_element_temp;
72
73 if (!sst)
74 return;
75
76 /* Free the sst_elements and their data using the ordered linked list. */
77 if (sst->order_list) {
78 STAILQ_FOREACH_SAFE(sst_element, sst->order_list, sst_order_pointers,
79 sst_element_temp) {
80
81 if (sst_element && sst_element->string)
82 free(sst_element->string);
83 if (sst_element)
84 free(sst_element);
85 }
86 }
87
88 free(sst->order_list);
89 free(sst->rb_tree);
90 free(sst);
91 }
92
93 /*
94 * Comparator for the element structure
95 */
96 STATIC int
_element_cmp(struct sst_element * element1,struct sst_element * element2)97 _element_cmp(struct sst_element *element1, struct sst_element *element2)
98 {
99 return strcmp(element1->string, element2->string);
100 }
101
102 /*****************************************************************************
103 *
104 * XML functions.
105 *
106 ****************************************************************************/
107 /*
108 * Write the XML declaration.
109 */
110 STATIC void
_sst_xml_declaration(lxw_sst * self)111 _sst_xml_declaration(lxw_sst *self)
112 {
113 lxw_xml_declaration(self->file);
114 }
115
116 /*
117 * Write the <t> element.
118 */
119 STATIC void
_write_t(lxw_sst * self,char * string)120 _write_t(lxw_sst *self, char *string)
121 {
122 struct xml_attribute_list attributes;
123 struct xml_attribute *attribute;
124
125 LXW_INIT_ATTRIBUTES();
126
127 /* Add attribute to preserve leading or trailing whitespace. */
128 if (isspace((unsigned char) string[0])
129 || isspace((unsigned char) string[strlen(string) - 1]))
130 LXW_PUSH_ATTRIBUTES_STR("xml:space", "preserve");
131
132 lxw_xml_data_element(self->file, "t", string, &attributes);
133
134 LXW_FREE_ATTRIBUTES();
135 }
136
137 /*
138 * Write the <si> element.
139 */
140 STATIC void
_write_si(lxw_sst * self,char * string)141 _write_si(lxw_sst *self, char *string)
142 {
143 uint8_t escaped_string = LXW_FALSE;
144
145 lxw_xml_start_tag(self->file, "si", NULL);
146
147 /* Look for and escape control chars in the string. */
148 if (lxw_has_control_characters(string)) {
149 string = lxw_escape_control_characters(string);
150 escaped_string = LXW_TRUE;
151 }
152
153 /* Write the t element. */
154 _write_t(self, string);
155
156 lxw_xml_end_tag(self->file, "si");
157
158 if (escaped_string)
159 free(string);
160 }
161
162 /*
163 * Write the <si> element for rich strings.
164 */
165 STATIC void
_write_rich_si(lxw_sst * self,char * string)166 _write_rich_si(lxw_sst *self, char *string)
167 {
168 lxw_xml_rich_si_element(self->file, string);
169 }
170
171 /*
172 * Write the <sst> element.
173 */
174 STATIC void
_write_sst(lxw_sst * self)175 _write_sst(lxw_sst *self)
176 {
177 struct xml_attribute_list attributes;
178 struct xml_attribute *attribute;
179 char xmlns[] =
180 "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
181
182 LXW_INIT_ATTRIBUTES();
183 LXW_PUSH_ATTRIBUTES_STR("xmlns", xmlns);
184 LXW_PUSH_ATTRIBUTES_INT("count", self->string_count);
185 LXW_PUSH_ATTRIBUTES_INT("uniqueCount", self->unique_count);
186
187 lxw_xml_start_tag(self->file, "sst", &attributes);
188
189 LXW_FREE_ATTRIBUTES();
190 }
191
192 /*****************************************************************************
193 *
194 * XML file assembly functions.
195 *
196 ****************************************************************************/
197
198 /*
199 * Assemble and write the XML file.
200 */
201 STATIC void
_write_sst_strings(lxw_sst * self)202 _write_sst_strings(lxw_sst *self)
203 {
204 struct sst_element *sst_element;
205
206 STAILQ_FOREACH(sst_element, self->order_list, sst_order_pointers) {
207 /* Write the si element. */
208 if (sst_element->is_rich_string)
209 _write_rich_si(self, sst_element->string);
210 else
211 _write_si(self, sst_element->string);
212
213 }
214 }
215
216 /*
217 * Assemble and write the XML file.
218 */
219 void
lxw_sst_assemble_xml_file(lxw_sst * self)220 lxw_sst_assemble_xml_file(lxw_sst *self)
221 {
222 /* Write the XML declaration. */
223 _sst_xml_declaration(self);
224
225 /* Write the sst element. */
226 _write_sst(self);
227
228 /* Write the sst strings. */
229 _write_sst_strings(self);
230
231 /* Close the sst tag. */
232 lxw_xml_end_tag(self->file, "sst");
233 }
234
235 /*****************************************************************************
236 *
237 * Public functions.
238 *
239 ****************************************************************************/
240 /*
241 * Add to or find a string in the SST SharedString table and return it's index.
242 */
243 struct sst_element *
lxw_get_sst_index(lxw_sst * sst,const char * string,uint8_t is_rich_string)244 lxw_get_sst_index(lxw_sst *sst, const char *string, uint8_t is_rich_string)
245 {
246 struct sst_element *element;
247 struct sst_element *existing_element;
248
249 /* Create an sst element to potentially add to the table. */
250 element = calloc(1, sizeof(struct sst_element));
251 if (!element)
252 return NULL;
253
254 /* Create potential new element with the string and its index. */
255 element->index = sst->unique_count;
256 element->string = lxw_strdup(string);
257 element->is_rich_string = is_rich_string;
258
259 /* Try to insert it and see whether we already have that string. */
260 existing_element = RB_INSERT(sst_rb_tree, sst->rb_tree, element);
261
262 /* If existing_element is not NULL, then it already existed. */
263 /* Free new created element. */
264 if (existing_element) {
265 free(element->string);
266 free(element);
267 sst->string_count++;
268 return existing_element;
269 }
270
271 /* If it didn't exist, also add it to the insertion order linked list. */
272 STAILQ_INSERT_TAIL(sst->order_list, element, sst_order_pointers);
273
274 /* Update SST string counts. */
275 sst->string_count++;
276 sst->unique_count++;
277 return element;
278 }
279