1 /*
2  *  This program is free software; you can redistribute it and/or modify
3  *  it under the terms of the GNU General Public License as published by
4  *  the Free Software Foundation; either version 2 of the License, or
5  *  (at your option) any later version.
6  *
7  *  This program is distributed in the hope that it will be useful,
8  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *  GNU General Public License for more details.
11  *
12  *  You should have received a copy of the GNU General Public License
13  *  along with this program; if not, write to the Free Software
14  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15  *
16  *  Copyright (C) 2003 - The Authors
17  *
18  *  Author : Richard GAYRAUD - 04 Nov 2003
19  *           From Hewlett Packard Company.
20  */
21 
22 /*
23  * Mini xml parser:
24  *
25  * WARNING 1: Only supports printable
26  * ASCII characters in xml files. '\0'
27  * is not a valid character. Returned string are
28  * NULL-terminated.
29  *
30  * WARNING 2: Does not supports multithreading. Works
31  * with static buffer, no memory allocation.
32  */
33 
34 /*******************  Include files *********************/
35 
36 #include <stdio.h>
37 #include <string.h>
38 #include <ctype.h>
39 
40 #include "xp_parser.h"
41 
42 #define strstartswith(haystack, needle) \
43     (!strncmp(haystack, needle, sizeof(needle) - 1))
44 
45 /************* Constants and Global variables ***********/
46 
47 #define XP_MAX_NAME_LEN   256
48 #define XP_MAX_FILE_LEN   65536
49 #define XP_MAX_STACK_LEN  256
50 
51 char  xp_file     [XP_MAX_FILE_LEN + 1];
52 char *xp_position [XP_MAX_STACK_LEN];
53 int   xp_stack    = 0;
54 
55 /****************** Internal routines ********************/
56 
xp_find_escape(const char * escape,size_t len)57 static const char *xp_find_escape(const char *escape, size_t len)
58 {
59     static struct escape {
60         const char *name;
61         const char *value;
62     } html_escapes[] = {
63         { "amp", "&" },
64         { "gt", ">" },
65         { "lt", "<" },
66         { "quot", "\"" },
67         { NULL, NULL }
68     };
69 
70     struct escape *n;
71     for (n = html_escapes; n->name; ++n) {
72         if (strncmp(escape, n->name, len) == 0)
73             return n->value;
74     }
75     return NULL;
76 }
77 
78 /* This finds the end of something like <send foo="bar">, and does not recurse
79  * into other elements. */
xp_find_start_tag_end(char * ptr)80 static char *xp_find_start_tag_end(char *ptr)
81 {
82     while (*ptr) {
83         if (*ptr == '<') {
84             if (strstartswith(ptr, "<!--")) {
85                 char *comment_end = strstr(ptr, "-->");
86                 if (!comment_end)
87                     return NULL;
88                 ptr = comment_end + 3;
89             } else {
90                 return NULL;
91             }
92         } else if ((*ptr == '/') && (*(ptr+1) == '>')) {
93             return ptr;
94         } else if (*ptr == '"') {
95             ptr++;
96             while (*ptr) {
97                 if (*ptr == '\\') {
98                     ptr += 2;
99                 } else if (*ptr == '"') {
100                     ptr++;
101                     break;
102                 } else {
103                     ptr++;
104                 }
105             }
106         } else if (*ptr == '>') {
107             return ptr;
108         } else {
109             ptr++;
110         }
111     }
112     return ptr;
113 }
114 
xp_find_local_end()115 static char *xp_find_local_end()
116 {
117     char *ptr = xp_position[xp_stack];
118     int level = 0;
119 
120     while (*ptr) {
121         if (*ptr == '<') {
122             if (strstartswith(ptr, "<![CDATA[")) {
123                 char *cdata_end = strstr(ptr, "]]>");
124                 if (!cdata_end)
125                     return NULL;
126                 ptr = cdata_end + 3;
127             } else if (strstartswith(ptr, "<!--")) {
128                 char *comment_end = strstr(ptr, "-->");
129                 if (!comment_end)
130                     return NULL;
131                 ptr = comment_end + 3;
132             } else if (*(ptr+1) == '/') {
133                 level--;
134                 if (level < 0)
135                     return ptr;
136             } else {
137                 level++;
138             }
139         } else if ((*ptr == '/') && (*(ptr+1) == '>')) {
140             level--;
141             if (level < 0)
142                 return ptr;
143         } else if (*ptr == '"') {
144             ptr++;
145             while (*ptr) {
146                 if (*ptr == '\\') {
147                     ptr++; /* Skip the slash. */
148                 } else if (*ptr == '"') {
149                     break;
150                 }
151                 ptr++;
152             }
153         }
154         ptr++;
155     }
156     return ptr;
157 }
158 
159 /********************* Interface routines ********************/
160 
xp_unescape(const char * source,char * dest)161 int xp_unescape(const char *source, char *dest)
162 {
163     const char *from;
164     char *to;
165     size_t pos;
166 
167     if (!source || !dest) {
168         return -1;
169     }
170 
171     from = source;
172     to = dest;
173     pos = strcspn(from, "&");
174 
175     for (; from[pos] != '\0'; pos = strcspn(from, "&")) {
176         size_t term;
177         size_t escape_len;
178         const char *escape;
179         const char c = from[pos];
180 
181         memcpy(to, from, pos);
182         to += pos;
183         from += pos + 1;
184 
185         if (c != '&')
186             continue;
187 
188         term = strcspn(from, ";");
189         if (from[term] == '\0') {
190             *to++ = '&';
191             pos = term;
192             break;
193         }
194 
195         escape = xp_find_escape(from, term);
196         if (!escape) {
197             *to++ = '&';
198             continue;
199         }
200 
201         escape_len = strlen(escape);
202         memcpy(to, escape, escape_len);
203         to += escape_len;
204         from += term + 1;
205     }
206 
207     if (pos) {
208         memcpy(to, from, pos);
209         to += pos;
210     }
211 
212     to[0] = '\0';
213     return to - dest;
214 }
215 
xp_set_xml_buffer_from_string(const char * str)216 int xp_set_xml_buffer_from_string(const char *str)
217 {
218     size_t len = strlen(str);
219 
220     if (len > XP_MAX_FILE_LEN) {
221         return 0;
222     }
223 
224     strcpy(xp_file, str);
225     xp_stack = 0;
226     xp_position[xp_stack] = xp_file;
227 
228     if (!strstartswith(xp_position[xp_stack], "<?xml"))
229         return 0;
230     if (!strstr(xp_position[xp_stack], "?>"))
231         return 0;
232     xp_position[xp_stack] = xp_position[xp_stack] + 2;
233 
234     return 1;
235 }
236 
xp_set_xml_buffer_from_file(const char * filename)237 int xp_set_xml_buffer_from_file(const char *filename)
238 {
239     FILE *f = fopen(filename, "rb");
240     char *pos;
241     int index = 0;
242     int c;
243 
244     if (!f) {
245         return 0;
246     }
247 
248     while ((c = fgetc(f)) != EOF) {
249         if (c == '\r')
250             continue;
251         xp_file[index++] = c;
252         if (index >= XP_MAX_FILE_LEN) {
253             xp_file[index++] = 0;
254             xp_stack = 0;
255             xp_position[xp_stack] = xp_file;
256             fclose(f);
257             return 0;
258         }
259     }
260     xp_file[index++] = 0;
261     fclose(f);
262 
263     xp_stack = 0;
264     xp_position[xp_stack] = xp_file;
265 
266     if (!strstartswith(xp_position[xp_stack], "<?xml"))
267         return 0;
268     if (!(pos = strstr(xp_position[xp_stack], "?>")))
269         return 0;
270     xp_position[xp_stack] = pos + 2;
271 
272     return 1;
273 }
274 
xp_open_element(int index)275 char *xp_open_element(int index)
276 {
277     char *ptr = xp_position[xp_stack];
278     int level = 0;
279     static char name[XP_MAX_NAME_LEN];
280 
281     while (*ptr) {
282         if (*ptr == '<') {
283             if ((*(ptr+1) == '!') &&
284                     (*(ptr+2) == '[') &&
285                     (strstr(ptr, "<![CDATA[") == ptr)) {
286                 char *cdata_end = strstr(ptr, "]]>");
287                 if (!cdata_end)
288                     return NULL;
289                 ptr = cdata_end + 2;
290             } else if ((*(ptr+1) == '!') &&
291                        (*(ptr+2) == '-') &&
292                        (strstr(ptr, "<!--") == ptr)) {
293                 char *comment_end = strstr(ptr, "-->");
294                 if (!comment_end)
295                     return NULL;
296                 ptr = comment_end + 2;
297             } else if (strstartswith(ptr, "<!DOCTYPE")) {
298                 char *doctype_end = strstr(ptr, ">");
299                 if (!doctype_end)
300                     return NULL;
301                 ptr = doctype_end;
302             } else if (*(ptr+1) == '/') {
303                 level--;
304                 if (level < 0)
305                     return NULL;
306             } else {
307                 if (level == 0) {
308                     if (index) {
309                         index--;
310                     } else {
311                         char *end = xp_find_start_tag_end(ptr + 1);
312                         char *p;
313                         if (!end) {
314                             return NULL;
315                         }
316                         p = strchr(ptr, ' ');
317                         if (p && (p < end))  {
318                             end = p;
319                         }
320                         p = strchr(ptr, '\t');
321                         if (p && (p < end))  {
322                             end = p;
323                         }
324                         p = strchr(ptr, '\r');
325                         if (p && (p < end))  {
326                             end = p;
327                         }
328                         p = strchr(ptr, '\n');
329                         if (p && (p < end))  {
330                             end = p;
331                         }
332                         p = strchr(ptr, '/');
333                         if (p && (p < end))  {
334                             end = p;
335                         }
336 
337                         memcpy(name, ptr + 1, end-ptr-1);
338                         name[end-ptr-1] = 0;
339 
340                         xp_position[++xp_stack] = end;
341                         return name;
342                     }
343                 }
344 
345                 /* We want to skip over this particular element .*/
346                 ptr = xp_find_start_tag_end(ptr + 1);
347                 if (!ptr)
348                     return NULL;
349                 ptr--;
350                 level++;
351             }
352         } else if ((*ptr == '/') && (*(ptr+1) == '>')) {
353             level--;
354             if (level < 0)
355                 return NULL;
356         }
357         ptr++;
358     }
359     return NULL;
360 }
361 
xp_close_element(void)362 void xp_close_element(void)
363 {
364     if (xp_stack) {
365         xp_stack--;
366     }
367 }
368 
xp_get_value(const char * name)369 char *xp_get_value(const char *name)
370 {
371     int index = 0;
372     static char buffer[XP_MAX_FILE_LEN + 1];
373     char *ptr, *end, *check;
374 
375     end = xp_find_start_tag_end(xp_position[xp_stack] + 1);
376     if (!end)
377         return NULL;
378 
379     ptr = xp_position[xp_stack];
380 
381     while (*ptr) {
382         ptr = strstr(ptr, name);
383 
384         if (!ptr)
385             return NULL;
386         if (ptr > end)
387             return NULL;
388         /* FIXME: potential BUG in parser: we must retrieve full word,
389          * so the use of strstr as it is is not enough.
390          * we should check that the retrieved word is not a piece of
391          * another one. */
392         check = ptr - 1;
393         if (check >= xp_position[xp_stack]) {
394             if ((*check != '\r') &&
395                     (*check != '\n') &&
396                     (*check != '\t') &&
397                     (*check != ' ' )) {
398                 ptr += strlen(name);
399                 continue;
400             }
401         } else
402             return(NULL);
403 
404         ptr += strlen(name);
405         while ((*ptr == '\r') ||
406                 (*ptr == '\n') ||
407                 (*ptr == '\t') ||
408                 (*ptr == ' ' )    ) {
409             ptr++;
410         }
411         if (*ptr != '=')
412             continue;
413         ptr++;
414         while ((*ptr == '\r') ||
415                 (*ptr == '\n') ||
416                 (*ptr == '\t') ||
417                 (*ptr ==  ' ')    ) {
418             ptr++;
419         }
420         ptr++;
421         if (*ptr) {
422             while (*ptr) {
423                 if (*ptr == '\\') {
424                     ptr++;
425                     switch(*ptr) {
426                     case '\\':
427                         buffer[index++] = '\\';
428                         break;
429                     case '"':
430                         buffer[index++] = '"';
431                         break;
432                     case 'n':
433                         buffer[index++] = '\n';
434                         break;
435                     case 't':
436                         buffer[index++] = '\t';
437                         break;
438                     case 'r':
439                         buffer[index++] = '\r';
440                         break;
441                     default:
442                         buffer[index++] = '\\';
443                         buffer[index++] = *ptr;
444                         break;
445                     }
446                     ptr++;
447                 } else if (*ptr == '"') {
448                     break;
449                 } else {
450                     buffer[index++] = *ptr++;
451                 }
452                 if (index > XP_MAX_FILE_LEN)
453                     return NULL;
454             }
455             buffer[index] = 0;
456             return buffer;
457         }
458     }
459     return NULL;
460 }
461 
xp_get_cdata(void)462 char* xp_get_cdata(void)
463 {
464     static char buffer[XP_MAX_FILE_LEN + 1];
465     const char *end = xp_find_local_end();
466     const char *ptr;
467 
468     ptr = strstr(xp_position[xp_stack], "<![CDATA[");
469     if (!ptr) {
470         return NULL;
471     }
472     ptr += 9;
473     if (ptr > end)
474         return NULL;
475     end = strstr(ptr, "]]>");
476     if (!end) {
477         return NULL;
478     }
479     if ((end - ptr) > XP_MAX_FILE_LEN)
480         return NULL;
481     memcpy(buffer, ptr, (end - ptr));
482     buffer[end-ptr] = 0;
483     return buffer;
484 }
485 
xp_get_content_length(const char * P_buffer)486 int xp_get_content_length(const char *P_buffer)
487 {
488     const char *L_ctl_hdr;
489     int L_content_length = -1;
490     unsigned char short_form;
491 
492     short_form = 0;
493 
494     L_ctl_hdr = strstr(P_buffer, "\nContent-Length:");
495     if (!L_ctl_hdr) {
496         L_ctl_hdr = strstr(P_buffer, "\nContent-length:");
497     }
498     if (!L_ctl_hdr) {
499         L_ctl_hdr = strstr(P_buffer, "\ncontent-Length:");
500     }
501     if (!L_ctl_hdr) {
502         L_ctl_hdr = strstr(P_buffer, "\ncontent-length:");
503     }
504     if (!L_ctl_hdr) {
505         L_ctl_hdr = strstr(P_buffer, "\nCONTENT-LENGTH:");
506     }
507     if (!L_ctl_hdr) {
508         L_ctl_hdr = strstr(P_buffer, "\nl:");
509         short_form = 1;
510     }
511 
512     if (L_ctl_hdr) {
513         if (short_form) {
514             L_ctl_hdr += 3;
515         } else {
516             L_ctl_hdr += 16;
517         }
518         while (isspace(*L_ctl_hdr))
519             L_ctl_hdr++;
520         sscanf(L_ctl_hdr, "%d", &L_content_length);
521     }
522     /* L_content_length = -1 the message does not contain content-length */
523     return (L_content_length);
524 }
525