1 /* ====================================================================
2 * The Kannel Software License, Version 1.0
3 *
4 * Copyright (c) 2001-2014 Kannel Group
5 * Copyright (c) 1998-2001 WapIT Ltd.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Kannel Group (http://www.kannel.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Kannel" and "Kannel Group" must not be used to
28 * endorse or promote products derived from this software without
29 * prior written permission. For written permission, please
30 * contact org@kannel.org.
31 *
32 * 5. Products derived from this software may not be called "Kannel",
33 * nor may "Kannel" appear in their name, without prior written
34 * permission of the Kannel Group.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Kannel Group. For more information on
51 * the Kannel Group, please see <http://www.kannel.org/>.
52 *
53 * Portions of this software are based upon software originally written at
54 * WapIT Ltd., Helsinki, Finland for the Kannel project.
55 */
56
57 /*
58 * xml_shared.c: Common functions of xml compilers (mainly charset handling
59 * and operations with wbxml binary not using a string table)
60 *
61 * By Tuomas Luttinen & Aarno Syv�nen (for Wiral Ltd)
62 */
63
64 #include <ctype.h>
65
66 #include "xml_shared.h"
67 #include "xml_definitions.h"
68
69 #include <string.h>
70
71 struct charset_t {
72 char *charset;
73 char *nro;
74 unsigned int MIBenum;
75 };
76
77 charset_t character_sets[] = {
78 { "ISO", "8859-1", 4 },
79 { "ISO", "8859-2", 5 },
80 { "ISO", "8859-3", 6 },
81 { "ISO", "8859-4", 7 },
82 { "ISO", "8859-5", 8 },
83 { "ISO", "8859-6", 9 },
84 { "ISO", "8859-7", 10 },
85 { "ISO", "8859-8", 11 },
86 { "ISO", "8859-9", 12 },
87 { "WINDOWS", "1250", 2250 },
88 { "WINDOWS", "1251", 2251 },
89 { "WINDOWS", "1252", 2252 },
90 { "WINDOWS", "1253", 2253 },
91 { "WINDOWS", "1254", 2254 },
92 { "WINDOWS", "1255", 2255 },
93 { "WINDOWS", "1256", 2256 },
94 { "WINDOWS", "1257", 2257 },
95 { "WINDOWS", "1258", 2258 },
96 { "UTF", "8", 106 },
97 { NULL }
98 };
99
100 /****************************************************************************
101 *
102 * Implementation of external functions
103 */
104
105
106 /*
107 * set_charset - if xml doesn't have an <?xml..encoding=something>,
108 * converts body from argument charset to UTF-8
109 */
110
set_charset(Octstr * document,Octstr * charset)111 void set_charset(Octstr *document, Octstr *charset)
112 {
113 long gt = 0, enc = 0;
114 Octstr *encoding = NULL, *text = NULL, *temp = NULL;
115
116 if (octstr_len(charset) == 0)
117 return;
118
119 encoding = octstr_create(" encoding");
120 enc = octstr_search(document, encoding, 0);
121 gt = octstr_search_char(document, '>', 0);
122
123 if (enc < 0 || enc > gt) {
124 gt++;
125 text = octstr_copy(document, gt, octstr_len(document) - gt);
126 if (charset_to_utf8(text, &temp, charset) >= 0) {
127 octstr_delete(document, gt, octstr_len(document) - gt);
128 octstr_append_data(document, octstr_get_cstr(temp),
129 octstr_len(temp));
130 }
131
132 octstr_destroy(temp);
133 octstr_destroy(text);
134 }
135
136 octstr_destroy(encoding);
137 }
138
139
140 /*
141 * find_charset_encoding -- parses for a encoding argument within
142 * the xml preabmle, ie. <?xml verion="xxx" encoding="ISO-8859-1"?>
143 */
144
find_charset_encoding(Octstr * document)145 Octstr *find_charset_encoding(Octstr *document)
146 {
147 long gt = 0, enc = 0;
148 Octstr *encoding = NULL, *temp = NULL;
149
150 enc = octstr_search(document, octstr_imm(" encoding="), 0);
151 gt = octstr_search(document, octstr_imm("?>"), 0);
152
153 /* in case there is no encoding argument, assume always UTF-8 */
154 if (enc < 0 || enc + 10 > gt)
155 return NULL;
156
157 temp = octstr_copy(document, enc + 10, gt - (enc + 10));
158 octstr_strip_blanks(temp);
159 encoding = octstr_copy(temp, 1, octstr_len(temp) - 2);
160 octstr_destroy(temp);
161
162 return encoding;
163 }
164
165
166 /*
167 * only_blanks - checks if a text node contains only white space, when it can
168 * be left out as a element content.
169 */
170
only_blanks(const char * text)171 int only_blanks(const char *text)
172 {
173 int blank = 1;
174 int j=0;
175 int len = strlen(text);
176
177 while ((j<len) && blank) {
178 blank = blank && isspace((int)text[j]);
179 j++;
180 }
181
182 return blank;
183 }
184
185 /*
186 * Parses the character set of the document.
187 */
188
parse_charset(Octstr * os)189 int parse_charset(Octstr *os)
190 {
191 Octstr *charset = NULL;
192 Octstr *number = NULL;
193 int i, j, cut = 0, ret = 0;
194
195 gw_assert(os != NULL);
196 charset = octstr_duplicate(os);
197
198 /* The charset might be in lower case, so... */
199 octstr_convert_range(charset, 0, octstr_len(charset), toupper);
200
201 /*
202 * The character set is handled in two parts to make things easier.
203 * The cutting.
204 */
205 if ((cut = octstr_search_char(charset, '_', 0)) > 0) {
206 number = octstr_copy(charset, cut + 1, (octstr_len(charset) - (cut + 1)));
207 octstr_truncate(charset, cut);
208 }
209 else if ((cut = octstr_search_char(charset, '-', 0)) > 0) {
210 number = octstr_copy(charset, cut + 1, (octstr_len(charset) - (cut + 1)));
211 octstr_truncate(charset, cut);
212 }
213
214 /* And table search. */
215 for (i = 0; character_sets[i].charset != NULL; i++)
216 if (octstr_str_compare(charset, character_sets[i].charset) == 0) {
217 for (j = i; octstr_str_compare(charset,
218 character_sets[j].charset) == 0; j++)
219 if (octstr_str_compare(number, character_sets[j].nro) == 0) {
220 ret = character_sets[j].MIBenum;
221 break;
222 }
223 break;
224 }
225
226 /* UTF-8 is the default value */
227 if (character_sets[i].charset == NULL)
228 ret = character_sets[i-1].MIBenum;
229
230 octstr_destroy(number);
231 octstr_destroy(charset);
232
233 return ret;
234 }
235
236 /*
237 * element_check_content - a helper function for parse_element for checking
238 * if an element has content or attributes. Returns status bit for attributes
239 * (0x80) and another for content (0x40) added into one octet.
240 */
241
element_check_content(xmlNodePtr node)242 unsigned char element_check_content(xmlNodePtr node)
243 {
244 unsigned char status_bits = 0x00;
245
246 if ((node->children != NULL) &&
247 !((node->children->next == NULL) &&
248 (node->children->type == XML_TEXT_NODE) &&
249 (only_blanks((char *)node->children->content))))
250 status_bits = WBXML_CONTENT_BIT;
251
252 if (node->properties != NULL)
253 status_bits = status_bits | WBXML_ATTR_BIT;
254
255 return status_bits;
256 }
257
258 /*
259 * Return the character sets supported by the WML compiler, as a List
260 * of Octstrs, where each string is the MIME identifier for one charset.
261 */
wml_charsets(void)262 List *wml_charsets(void)
263 {
264 int i;
265 List *result;
266 Octstr *charset;
267
268 result = gwlist_create();
269 for (i = 0; character_sets[i].charset != NULL; i++) {
270 charset = octstr_create(character_sets[i].charset);
271 octstr_append_char(charset, '-');
272 octstr_append(charset, octstr_imm(character_sets[i].nro));
273 gwlist_append(result, charset);
274 }
275
276 return result;
277 }
278
279 /*
280 * Functions working with simple binary data type (no string table). No
281 * variables are present either.
282 */
283
simple_binary_create(void)284 simple_binary_t *simple_binary_create(void)
285 {
286 simple_binary_t *binary;
287
288 binary = gw_malloc(sizeof(simple_binary_t));
289
290 binary->wbxml_version = 0x00;
291 binary->public_id = 0x00;
292 binary->charset = 0x00;
293 binary->binary = octstr_create("");
294
295 return binary;
296 }
297
simple_binary_destroy(simple_binary_t * binary)298 void simple_binary_destroy(simple_binary_t *binary)
299 {
300 if (binary == NULL)
301 return;
302
303 octstr_destroy(binary->binary);
304 gw_free(binary);
305 }
306
307 /*
308 * Output the wbxml content field after field into octet string os. We add
309 * string table length 0 (meaning no string table) before the content.
310 */
simple_binary_output(Octstr * os,simple_binary_t * binary)311 void simple_binary_output(Octstr *os, simple_binary_t *binary)
312 {
313 gw_assert(octstr_len(os) == 0);
314 octstr_format_append(os, "%c", binary->wbxml_version);
315 octstr_format_append(os, "%c", binary->public_id);
316 octstr_append_uintvar(os, binary->charset);
317 octstr_format_append(os, "%c", 0x00);
318 octstr_format_append(os, "%S", binary->binary);
319 }
320
parse_end(simple_binary_t ** binary)321 void parse_end(simple_binary_t **binary)
322 {
323 output_char(WBXML_END, binary);
324 }
325
output_char(int byte,simple_binary_t ** binary)326 void output_char(int byte, simple_binary_t **binary)
327 {
328 octstr_append_char((**binary).binary, byte);
329 }
330
parse_octet_string(Octstr * os,simple_binary_t ** binary)331 void parse_octet_string(Octstr *os, simple_binary_t **binary)
332 {
333 output_octet_string(os, binary);
334 }
335
336 /*
337 * Add global tokens to the start and to the end of an inline string.
338 */
parse_inline_string(Octstr * temp,simple_binary_t ** binary)339 void parse_inline_string(Octstr *temp, simple_binary_t **binary)
340 {
341 Octstr *startos;
342
343 octstr_insert(temp, startos = octstr_format("%c", WBXML_STR_I), 0);
344 octstr_destroy(startos);
345 octstr_format_append(temp, "%c", WBXML_STR_END);
346 parse_octet_string(temp, binary);
347 }
348
output_octet_string(Octstr * os,simple_binary_t ** sibxml)349 void output_octet_string(Octstr *os, simple_binary_t **sibxml)
350 {
351 octstr_insert((*sibxml)->binary, os, octstr_len((*sibxml)->binary));
352 }
353