1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 *
16 * Copyright (C) 2003 - The Authors
17 *
18 * Author : Richard GAYRAUD - 04 Nov 2003
19 * From Hewlett Packard Company.
20 */
21
22 /*
23 * Mini xml parser:
24 *
25 * WARNING 1: Only supports printable
26 * ASCII characters in xml files. '\0'
27 * is not a valid character. Returned string are
28 * NULL-terminated.
29 *
30 * WARNING 2: Does not supports multithreading. Works
31 * with static buffer, no memory allocation.
32 */
33
34 /******************* Include files *********************/
35
36 #include <stdio.h>
37 #include <string.h>
38 #include <ctype.h>
39
40 #include "xp_parser.h"
41
42 #define strstartswith(haystack, needle) \
43 (!strncmp(haystack, needle, sizeof(needle) - 1))
44
45 /************* Constants and Global variables ***********/
46
47 #define XP_MAX_NAME_LEN 256
48 #define XP_MAX_FILE_LEN 65536
49 #define XP_MAX_STACK_LEN 256
50
51 char xp_file [XP_MAX_FILE_LEN + 1];
52 char *xp_position [XP_MAX_STACK_LEN];
53 int xp_stack = 0;
54
55 /****************** Internal routines ********************/
56
xp_find_escape(const char * escape,size_t len)57 static const char *xp_find_escape(const char *escape, size_t len)
58 {
59 static struct escape {
60 const char *name;
61 const char *value;
62 } html_escapes[] = {
63 { "amp", "&" },
64 { "gt", ">" },
65 { "lt", "<" },
66 { "quot", "\"" },
67 { NULL, NULL }
68 };
69
70 struct escape *n;
71 for (n = html_escapes; n->name; ++n) {
72 if (strncmp(escape, n->name, len) == 0)
73 return n->value;
74 }
75 return NULL;
76 }
77
78 /* This finds the end of something like <send foo="bar">, and does not recurse
79 * into other elements. */
xp_find_start_tag_end(char * ptr)80 static char *xp_find_start_tag_end(char *ptr)
81 {
82 while (*ptr) {
83 if (*ptr == '<') {
84 if (strstartswith(ptr, "<!--")) {
85 char *comment_end = strstr(ptr, "-->");
86 if (!comment_end)
87 return NULL;
88 ptr = comment_end + 3;
89 } else {
90 return NULL;
91 }
92 } else if ((*ptr == '/') && (*(ptr+1) == '>')) {
93 return ptr;
94 } else if (*ptr == '"') {
95 ptr++;
96 while (*ptr) {
97 if (*ptr == '\\') {
98 ptr += 2;
99 } else if (*ptr == '"') {
100 ptr++;
101 break;
102 } else {
103 ptr++;
104 }
105 }
106 } else if (*ptr == '>') {
107 return ptr;
108 } else {
109 ptr++;
110 }
111 }
112 return ptr;
113 }
114
xp_find_local_end()115 static char *xp_find_local_end()
116 {
117 char *ptr = xp_position[xp_stack];
118 int level = 0;
119
120 while (*ptr) {
121 if (*ptr == '<') {
122 if (strstartswith(ptr, "<![CDATA[")) {
123 char *cdata_end = strstr(ptr, "]]>");
124 if (!cdata_end)
125 return NULL;
126 ptr = cdata_end + 3;
127 } else if (strstartswith(ptr, "<!--")) {
128 char *comment_end = strstr(ptr, "-->");
129 if (!comment_end)
130 return NULL;
131 ptr = comment_end + 3;
132 } else if (*(ptr+1) == '/') {
133 level--;
134 if (level < 0)
135 return ptr;
136 } else {
137 level++;
138 }
139 } else if ((*ptr == '/') && (*(ptr+1) == '>')) {
140 level--;
141 if (level < 0)
142 return ptr;
143 } else if (*ptr == '"') {
144 ptr++;
145 while (*ptr) {
146 if (*ptr == '\\') {
147 ptr++; /* Skip the slash. */
148 } else if (*ptr == '"') {
149 break;
150 }
151 ptr++;
152 }
153 }
154 ptr++;
155 }
156 return ptr;
157 }
158
159 /********************* Interface routines ********************/
160
xp_unescape(const char * source,char * dest)161 int xp_unescape(const char *source, char *dest)
162 {
163 const char *from;
164 char *to;
165 size_t pos;
166
167 if (!source || !dest) {
168 return -1;
169 }
170
171 from = source;
172 to = dest;
173 pos = strcspn(from, "&");
174
175 for (; from[pos] != '\0'; pos = strcspn(from, "&")) {
176 size_t term;
177 size_t escape_len;
178 const char *escape;
179 const char c = from[pos];
180
181 memcpy(to, from, pos);
182 to += pos;
183 from += pos + 1;
184
185 if (c != '&')
186 continue;
187
188 term = strcspn(from, ";");
189 if (from[term] == '\0') {
190 *to++ = '&';
191 pos = term;
192 break;
193 }
194
195 escape = xp_find_escape(from, term);
196 if (!escape) {
197 *to++ = '&';
198 continue;
199 }
200
201 escape_len = strlen(escape);
202 memcpy(to, escape, escape_len);
203 to += escape_len;
204 from += term + 1;
205 }
206
207 if (pos) {
208 memcpy(to, from, pos);
209 to += pos;
210 }
211
212 to[0] = '\0';
213 return to - dest;
214 }
215
xp_set_xml_buffer_from_string(const char * str)216 int xp_set_xml_buffer_from_string(const char *str)
217 {
218 size_t len = strlen(str);
219
220 if (len > XP_MAX_FILE_LEN) {
221 return 0;
222 }
223
224 strcpy(xp_file, str);
225 xp_stack = 0;
226 xp_position[xp_stack] = xp_file;
227
228 if (!strstartswith(xp_position[xp_stack], "<?xml"))
229 return 0;
230 if (!strstr(xp_position[xp_stack], "?>"))
231 return 0;
232 xp_position[xp_stack] = xp_position[xp_stack] + 2;
233
234 return 1;
235 }
236
xp_set_xml_buffer_from_file(const char * filename)237 int xp_set_xml_buffer_from_file(const char *filename)
238 {
239 FILE *f = fopen(filename, "rb");
240 char *pos;
241 int index = 0;
242 int c;
243
244 if (!f) {
245 return 0;
246 }
247
248 while ((c = fgetc(f)) != EOF) {
249 if (c == '\r')
250 continue;
251 xp_file[index++] = c;
252 if (index >= XP_MAX_FILE_LEN) {
253 xp_file[index++] = 0;
254 xp_stack = 0;
255 xp_position[xp_stack] = xp_file;
256 fclose(f);
257 return 0;
258 }
259 }
260 xp_file[index++] = 0;
261 fclose(f);
262
263 xp_stack = 0;
264 xp_position[xp_stack] = xp_file;
265
266 if (!strstartswith(xp_position[xp_stack], "<?xml"))
267 return 0;
268 if (!(pos = strstr(xp_position[xp_stack], "?>")))
269 return 0;
270 xp_position[xp_stack] = pos + 2;
271
272 return 1;
273 }
274
xp_open_element(int index)275 char *xp_open_element(int index)
276 {
277 char *ptr = xp_position[xp_stack];
278 int level = 0;
279 static char name[XP_MAX_NAME_LEN];
280
281 while (*ptr) {
282 if (*ptr == '<') {
283 if ((*(ptr+1) == '!') &&
284 (*(ptr+2) == '[') &&
285 (strstr(ptr, "<![CDATA[") == ptr)) {
286 char *cdata_end = strstr(ptr, "]]>");
287 if (!cdata_end)
288 return NULL;
289 ptr = cdata_end + 2;
290 } else if ((*(ptr+1) == '!') &&
291 (*(ptr+2) == '-') &&
292 (strstr(ptr, "<!--") == ptr)) {
293 char *comment_end = strstr(ptr, "-->");
294 if (!comment_end)
295 return NULL;
296 ptr = comment_end + 2;
297 } else if (strstartswith(ptr, "<!DOCTYPE")) {
298 char *doctype_end = strstr(ptr, ">");
299 if (!doctype_end)
300 return NULL;
301 ptr = doctype_end;
302 } else if (*(ptr+1) == '/') {
303 level--;
304 if (level < 0)
305 return NULL;
306 } else {
307 if (level == 0) {
308 if (index) {
309 index--;
310 } else {
311 char *end = xp_find_start_tag_end(ptr + 1);
312 char *p;
313 if (!end) {
314 return NULL;
315 }
316 p = strchr(ptr, ' ');
317 if (p && (p < end)) {
318 end = p;
319 }
320 p = strchr(ptr, '\t');
321 if (p && (p < end)) {
322 end = p;
323 }
324 p = strchr(ptr, '\r');
325 if (p && (p < end)) {
326 end = p;
327 }
328 p = strchr(ptr, '\n');
329 if (p && (p < end)) {
330 end = p;
331 }
332 p = strchr(ptr, '/');
333 if (p && (p < end)) {
334 end = p;
335 }
336
337 memcpy(name, ptr + 1, end-ptr-1);
338 name[end-ptr-1] = 0;
339
340 xp_position[++xp_stack] = end;
341 return name;
342 }
343 }
344
345 /* We want to skip over this particular element .*/
346 ptr = xp_find_start_tag_end(ptr + 1);
347 if (!ptr)
348 return NULL;
349 ptr--;
350 level++;
351 }
352 } else if ((*ptr == '/') && (*(ptr+1) == '>')) {
353 level--;
354 if (level < 0)
355 return NULL;
356 }
357 ptr++;
358 }
359 return NULL;
360 }
361
xp_close_element(void)362 void xp_close_element(void)
363 {
364 if (xp_stack) {
365 xp_stack--;
366 }
367 }
368
xp_get_value(const char * name)369 char *xp_get_value(const char *name)
370 {
371 int index = 0;
372 static char buffer[XP_MAX_FILE_LEN + 1];
373 char *ptr, *end, *check;
374
375 end = xp_find_start_tag_end(xp_position[xp_stack] + 1);
376 if (!end)
377 return NULL;
378
379 ptr = xp_position[xp_stack];
380
381 while (*ptr) {
382 ptr = strstr(ptr, name);
383
384 if (!ptr)
385 return NULL;
386 if (ptr > end)
387 return NULL;
388 /* FIXME: potential BUG in parser: we must retrieve full word,
389 * so the use of strstr as it is is not enough.
390 * we should check that the retrieved word is not a piece of
391 * another one. */
392 check = ptr - 1;
393 if (check >= xp_position[xp_stack]) {
394 if ((*check != '\r') &&
395 (*check != '\n') &&
396 (*check != '\t') &&
397 (*check != ' ' )) {
398 ptr += strlen(name);
399 continue;
400 }
401 } else
402 return(NULL);
403
404 ptr += strlen(name);
405 while ((*ptr == '\r') ||
406 (*ptr == '\n') ||
407 (*ptr == '\t') ||
408 (*ptr == ' ' ) ) {
409 ptr++;
410 }
411 if (*ptr != '=')
412 continue;
413 ptr++;
414 while ((*ptr == '\r') ||
415 (*ptr == '\n') ||
416 (*ptr == '\t') ||
417 (*ptr == ' ') ) {
418 ptr++;
419 }
420 ptr++;
421 if (*ptr) {
422 while (*ptr) {
423 if (*ptr == '\\') {
424 ptr++;
425 switch(*ptr) {
426 case '\\':
427 buffer[index++] = '\\';
428 break;
429 case '"':
430 buffer[index++] = '"';
431 break;
432 case 'n':
433 buffer[index++] = '\n';
434 break;
435 case 't':
436 buffer[index++] = '\t';
437 break;
438 case 'r':
439 buffer[index++] = '\r';
440 break;
441 default:
442 buffer[index++] = '\\';
443 buffer[index++] = *ptr;
444 break;
445 }
446 ptr++;
447 } else if (*ptr == '"') {
448 break;
449 } else {
450 buffer[index++] = *ptr++;
451 }
452 if (index > XP_MAX_FILE_LEN)
453 return NULL;
454 }
455 buffer[index] = 0;
456 return buffer;
457 }
458 }
459 return NULL;
460 }
461
xp_get_cdata(void)462 char* xp_get_cdata(void)
463 {
464 static char buffer[XP_MAX_FILE_LEN + 1];
465 const char *end = xp_find_local_end();
466 const char *ptr;
467
468 ptr = strstr(xp_position[xp_stack], "<![CDATA[");
469 if (!ptr) {
470 return NULL;
471 }
472 ptr += 9;
473 if (ptr > end)
474 return NULL;
475 end = strstr(ptr, "]]>");
476 if (!end) {
477 return NULL;
478 }
479 if ((end - ptr) > XP_MAX_FILE_LEN)
480 return NULL;
481 memcpy(buffer, ptr, (end - ptr));
482 buffer[end-ptr] = 0;
483 return buffer;
484 }
485
xp_get_content_length(const char * P_buffer)486 int xp_get_content_length(const char *P_buffer)
487 {
488 const char *L_ctl_hdr;
489 int L_content_length = -1;
490 unsigned char short_form;
491
492 short_form = 0;
493
494 L_ctl_hdr = strstr(P_buffer, "\nContent-Length:");
495 if (!L_ctl_hdr) {
496 L_ctl_hdr = strstr(P_buffer, "\nContent-length:");
497 }
498 if (!L_ctl_hdr) {
499 L_ctl_hdr = strstr(P_buffer, "\ncontent-Length:");
500 }
501 if (!L_ctl_hdr) {
502 L_ctl_hdr = strstr(P_buffer, "\ncontent-length:");
503 }
504 if (!L_ctl_hdr) {
505 L_ctl_hdr = strstr(P_buffer, "\nCONTENT-LENGTH:");
506 }
507 if (!L_ctl_hdr) {
508 L_ctl_hdr = strstr(P_buffer, "\nl:");
509 short_form = 1;
510 }
511
512 if (L_ctl_hdr) {
513 if (short_form) {
514 L_ctl_hdr += 3;
515 } else {
516 L_ctl_hdr += 16;
517 }
518 while (isspace(*L_ctl_hdr))
519 L_ctl_hdr++;
520 sscanf(L_ctl_hdr, "%d", &L_content_length);
521 }
522 /* L_content_length = -1 the message does not contain content-length */
523 return (L_content_length);
524 }
525