1 /*****************************************************************************
2  * shared_strings - A library for creating Excel XLSX sst files.
3  *
4  * Used in conjunction with the libxlsxwriter library.
5  *
6  * Copyright 2014-2021, John McNamara, jmcnamara@cpan.org. See LICENSE.txt.
7  *
8  */
9 
10 #include "xlsxwriter/xmlwriter.h"
11 #include "xlsxwriter/shared_strings.h"
12 #include "xlsxwriter/utility.h"
13 #include <ctype.h>
14 
15 /*
16  * Forward declarations.
17  */
18 
19 STATIC int _element_cmp(struct sst_element *element1,
20                         struct sst_element *element2);
21 
22 #ifndef __clang_analyzer__
23 LXW_RB_GENERATE_ELEMENT(sst_rb_tree, sst_element, sst_tree_pointers,
24                         _element_cmp);
25 #endif
26 
27 /*****************************************************************************
28  *
29  * Private functions.
30  *
31  ****************************************************************************/
32 
33 /*
34  * Create a new SST SharedString object.
35  */
36 lxw_sst *
lxw_sst_new(void)37 lxw_sst_new(void)
38 {
39     /* Create the new shared string table. */
40     lxw_sst *sst = calloc(1, sizeof(lxw_sst));
41     RETURN_ON_MEM_ERROR(sst, NULL);
42 
43     /* Add the sst RB tree. */
44     sst->rb_tree = calloc(1, sizeof(struct sst_rb_tree));
45     GOTO_LABEL_ON_MEM_ERROR(sst->rb_tree, mem_error);
46 
47     /* Add a list for tracking the insertion order. */
48     sst->order_list = calloc(1, sizeof(struct sst_order_list));
49     GOTO_LABEL_ON_MEM_ERROR(sst->order_list, mem_error);
50 
51     /* Initialize the order list. */
52     STAILQ_INIT(sst->order_list);
53 
54     /* Initialize the RB tree. */
55     RB_INIT(sst->rb_tree);
56 
57     return sst;
58 
59 mem_error:
60     lxw_sst_free(sst);
61     return NULL;
62 }
63 
64 /*
65  * Free a SST SharedString table object.
66  */
67 void
lxw_sst_free(lxw_sst * sst)68 lxw_sst_free(lxw_sst *sst)
69 {
70     struct sst_element *sst_element;
71     struct sst_element *sst_element_temp;
72 
73     if (!sst)
74         return;
75 
76     /* Free the sst_elements and their data using the ordered linked list. */
77     if (sst->order_list) {
78         STAILQ_FOREACH_SAFE(sst_element, sst->order_list, sst_order_pointers,
79                             sst_element_temp) {
80 
81             if (sst_element && sst_element->string)
82                 free(sst_element->string);
83             if (sst_element)
84                 free(sst_element);
85         }
86     }
87 
88     free(sst->order_list);
89     free(sst->rb_tree);
90     free(sst);
91 }
92 
93 /*
94  * Comparator for the element structure
95  */
96 STATIC int
_element_cmp(struct sst_element * element1,struct sst_element * element2)97 _element_cmp(struct sst_element *element1, struct sst_element *element2)
98 {
99     return strcmp(element1->string, element2->string);
100 }
101 
102 /*****************************************************************************
103  *
104  * XML functions.
105  *
106  ****************************************************************************/
107 /*
108  * Write the XML declaration.
109  */
110 STATIC void
_sst_xml_declaration(lxw_sst * self)111 _sst_xml_declaration(lxw_sst *self)
112 {
113     lxw_xml_declaration(self->file);
114 }
115 
116 /*
117  * Write the <t> element.
118  */
119 STATIC void
_write_t(lxw_sst * self,char * string)120 _write_t(lxw_sst *self, char *string)
121 {
122     struct xml_attribute_list attributes;
123     struct xml_attribute *attribute;
124 
125     LXW_INIT_ATTRIBUTES();
126 
127     /* Add attribute to preserve leading or trailing whitespace. */
128     if (isspace((unsigned char) string[0])
129         || isspace((unsigned char) string[strlen(string) - 1]))
130         LXW_PUSH_ATTRIBUTES_STR("xml:space", "preserve");
131 
132     lxw_xml_data_element(self->file, "t", string, &attributes);
133 
134     LXW_FREE_ATTRIBUTES();
135 }
136 
137 /*
138  * Write the <si> element.
139  */
140 STATIC void
_write_si(lxw_sst * self,char * string)141 _write_si(lxw_sst *self, char *string)
142 {
143     uint8_t escaped_string = LXW_FALSE;
144 
145     lxw_xml_start_tag(self->file, "si", NULL);
146 
147     /* Look for and escape control chars in the string. */
148     if (lxw_has_control_characters(string)) {
149         string = lxw_escape_control_characters(string);
150         escaped_string = LXW_TRUE;
151     }
152 
153     /* Write the t element. */
154     _write_t(self, string);
155 
156     lxw_xml_end_tag(self->file, "si");
157 
158     if (escaped_string)
159         free(string);
160 }
161 
162 /*
163  * Write the <si> element for rich strings.
164  */
165 STATIC void
_write_rich_si(lxw_sst * self,char * string)166 _write_rich_si(lxw_sst *self, char *string)
167 {
168     lxw_xml_rich_si_element(self->file, string);
169 }
170 
171 /*
172  * Write the <sst> element.
173  */
174 STATIC void
_write_sst(lxw_sst * self)175 _write_sst(lxw_sst *self)
176 {
177     struct xml_attribute_list attributes;
178     struct xml_attribute *attribute;
179     char xmlns[] =
180         "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
181 
182     LXW_INIT_ATTRIBUTES();
183     LXW_PUSH_ATTRIBUTES_STR("xmlns", xmlns);
184     LXW_PUSH_ATTRIBUTES_INT("count", self->string_count);
185     LXW_PUSH_ATTRIBUTES_INT("uniqueCount", self->unique_count);
186 
187     lxw_xml_start_tag(self->file, "sst", &attributes);
188 
189     LXW_FREE_ATTRIBUTES();
190 }
191 
192 /*****************************************************************************
193  *
194  * XML file assembly functions.
195  *
196  ****************************************************************************/
197 
198 /*
199  * Assemble and write the XML file.
200  */
201 STATIC void
_write_sst_strings(lxw_sst * self)202 _write_sst_strings(lxw_sst *self)
203 {
204     struct sst_element *sst_element;
205 
206     STAILQ_FOREACH(sst_element, self->order_list, sst_order_pointers) {
207         /* Write the si element. */
208         if (sst_element->is_rich_string)
209             _write_rich_si(self, sst_element->string);
210         else
211             _write_si(self, sst_element->string);
212 
213     }
214 }
215 
216 /*
217  * Assemble and write the XML file.
218  */
219 void
lxw_sst_assemble_xml_file(lxw_sst * self)220 lxw_sst_assemble_xml_file(lxw_sst *self)
221 {
222     /* Write the XML declaration. */
223     _sst_xml_declaration(self);
224 
225     /* Write the sst element. */
226     _write_sst(self);
227 
228     /* Write the sst strings. */
229     _write_sst_strings(self);
230 
231     /* Close the sst tag. */
232     lxw_xml_end_tag(self->file, "sst");
233 }
234 
235 /*****************************************************************************
236  *
237  * Public functions.
238  *
239  ****************************************************************************/
240 /*
241  * Add to or find a string in the SST SharedString table and return it's index.
242  */
243 struct sst_element *
lxw_get_sst_index(lxw_sst * sst,const char * string,uint8_t is_rich_string)244 lxw_get_sst_index(lxw_sst *sst, const char *string, uint8_t is_rich_string)
245 {
246     struct sst_element *element;
247     struct sst_element *existing_element;
248 
249     /* Create an sst element to potentially add to the table. */
250     element = calloc(1, sizeof(struct sst_element));
251     if (!element)
252         return NULL;
253 
254     /* Create potential new element with the string and its index. */
255     element->index = sst->unique_count;
256     element->string = lxw_strdup(string);
257     element->is_rich_string = is_rich_string;
258 
259     /* Try to insert it and see whether we already have that string. */
260     existing_element = RB_INSERT(sst_rb_tree, sst->rb_tree, element);
261 
262     /* If existing_element is not NULL, then it already existed. */
263     /* Free new created element. */
264     if (existing_element) {
265         free(element->string);
266         free(element);
267         sst->string_count++;
268         return existing_element;
269     }
270 
271     /* If it didn't exist, also add it to the insertion order linked list. */
272     STAILQ_INSERT_TAIL(sst->order_list, element, sst_order_pointers);
273 
274     /* Update SST string counts. */
275     sst->string_count++;
276     sst->unique_count++;
277     return element;
278 }
279