1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 // This file contains two XML parsers:
19 //
20 // 1) a very crude one, which assumes all elements are either single-line or
21 // have start and end tags on separate lines.
22 // This is meant to be used ONLY for parsing XML files produced
23 // by the BOINC scheduling server or client.
24 //
25 // 2) a better one (class XML_PARSER) which parses arbitrary XML
26
27 #if defined(_WIN32) && !defined(__STDWX_H__)
28 #include "boinc_win.h"
29 #elif defined(_WIN32) && defined(__STDWX_H__)
30 #include "stdwx.h"
31 #else
32 #include "config.h"
33 #include <cstring>
34 #include <cstdlib>
35 #include <string>
36 #include <cmath>
37 #include <ctype.h>
38 #include <errno.h>
39 #if HAVE_IEEEFP_H
40 #include <ieeefp.h>
41 #endif
42 #endif
43
44 #ifdef _USING_FCGI_
45 #include "boinc_fcgi.h"
46 #endif
47
48 #include "error_numbers.h"
49 #include "str_replace.h"
50 #include "str_util.h"
51 #include "util.h"
52
53 #include "parse.h"
54
55 using std::string;
56
57 // Parse a boolean; tag is of form "foobar"
58 // Accept either <foobar/>, <foobar />, or <foobar>0|1</foobar>
59 // (possibly with leading/trailing white space)
60 //
parse_bool(const char * buf,const char * tag,bool & result)61 bool parse_bool(const char* buf, const char* tag, bool& result) {
62 char tag2[256], tag3[256];
63 int x;
64 // quick check to reject most cases
65 //
66 if (!strstr(buf, tag)) {
67 return false;
68 }
69 snprintf(tag2, sizeof(tag2), "<%s/>", tag);
70 snprintf(tag3, sizeof(tag3), "<%s />", tag);
71 if (match_tag(buf, tag2) || match_tag(buf, tag3)) {
72 result = true;
73 return true;
74 }
75 snprintf(tag2, sizeof(tag2), "<%s>", tag);
76 if (parse_int(buf, tag2, x)) {
77 result = (x != 0);
78 return true;
79 }
80 return false;
81 }
82
83 // parse a string of the form ...<tag attrs>string</tag>...;
84 // returns the "string" part.
85 // Does XML unescaping (replace < with <)
86 // "string" may not include '<'
87 // Strips white space from ends.
88 // Use "<tag", not "<tag>", if there might be attributes
89 //
parse_str(const char * buf,const char * tag,char * dest,int destlen)90 bool parse_str(const char* buf, const char* tag, char* dest, int destlen) {
91 string str;
92 const char* p;
93 int len;
94
95 p = strstr(buf, tag);
96 if (!p) return false;
97 p = strchr(p, '>');
98 if (!p) return false;
99 p++;
100 const char* q = strchr(p, '<');
101 if (!q) return false;
102 len = (int)(q-p);
103 if (len >= destlen) len = destlen-1;
104 memcpy(dest, p, len);
105 dest[len] = 0;
106 strip_whitespace(dest);
107 xml_unescape(dest);
108 return true;
109 }
110
parse_str(const char * buf,const char * tag,string & dest)111 bool parse_str(const char* buf, const char* tag, string& dest) {
112 char tempbuf[1024];
113 if (!parse_str(buf, tag, tempbuf, 1024)) return false;
114 dest = tempbuf;
115 return true;
116 }
117
118 // parse a string of the form 'xxx name="value" xxx';
119 // returns value in dest
120 //
parse_attr(const char * buf,const char * name,char * dest,int len)121 void parse_attr(const char* buf, const char* name, char* dest, int len) {
122 const char* p;
123 const char *q;
124
125 strcpy(dest, "");
126 p = strstr(buf, name);
127 if (!p) return;
128 p = strchr(p, '"');
129 if (!p) return;
130 q = strchr(p+1, '"');
131 if (!q) return;
132 if (len > q-p) len = (int)(q-p);
133 strlcpy(dest, p+1, len);
134 }
135
copy_stream(FILE * in,FILE * out)136 int copy_stream(FILE* in, FILE* out) {
137 char buf[1024];
138 int n, m;
139 while (1) {
140 n = (int)fread(buf, 1, 1024, in);
141 m = (int)fwrite(buf, 1, n, out);
142 if (m != n) return ERR_FWRITE;
143 if (n < 1024) break;
144 }
145 return 0;
146 }
147
148 // append to a malloc'd string
149 // If reallocation fails, the pointer p remains unchanged, and the data will
150 // not be freed. (strong exception safety)
151 //
strcatdup(char * & p,char * buf)152 int strcatdup(char*& p, char* buf) {
153 char* new_p = (char*)realloc(p, strlen(p) + strlen(buf)+1);
154 if (!new_p) {
155 return ERR_MALLOC;
156 }
157 p = new_p;
158 strcat(p, buf);
159 return 0;
160 }
161
162 // Copy from a file to a malloc'd string until the end tag is reached
163 // Does NOT copy the start and end tags.
164 //
dup_element_contents(FILE * in,const char * end_tag,char ** pp)165 int dup_element_contents(FILE* in, const char* end_tag, char** pp) {
166 char line[256];
167 int bufsize = 4000000;
168 int nused=0; // not counting ending NULL
169 char* buf = (char*)malloc(bufsize);
170
171 // Start with a big buffer.
172 // When done, copy to an exact-size buffer
173 //
174 while (fgets(line, 256, in)) {
175 if (strstr(line, end_tag)) {
176 *pp = (char*)malloc(nused+1);
177 strcpy(*pp, buf);
178 free(buf);
179 return 0;
180 }
181 int n = (int)strlen(line);
182 if (nused + n >= bufsize) {
183 bufsize *= 2;
184 char *b = buf;
185 buf = (char*)realloc(b, bufsize);
186 if (!buf) {
187 free(b);
188 return ERR_XML_PARSE;
189 }
190 }
191 strcpy(buf+nused, line);
192 nused += n;
193 }
194 free(buf);
195 return ERR_XML_PARSE;
196 }
197
dup_element(FILE * in,const char * tag_name,char ** pp)198 int dup_element(FILE* in, const char* tag_name, char** pp) {
199 char buf[256], end_tag[256];
200 int retval;
201
202 snprintf(buf, sizeof(buf), "<%s>\n", tag_name);
203 snprintf(end_tag, sizeof(end_tag), "</%s>", tag_name);
204
205 char* p = strdup(buf);
206 while (fgets(buf, 256, in)) {
207 if (strstr(buf, end_tag)) {
208 snprintf(buf, sizeof(buf), "</%s>\n", tag_name);
209 retval = strcatdup(p, buf);
210 if (retval) {
211 free(p);
212 return retval;
213 }
214 *pp = p;
215 return 0;
216 }
217 retval = strcatdup(p, buf);
218 if (retval) {
219 free(p);
220 return retval;
221 }
222 }
223 free(p);
224 return ERR_XML_PARSE;
225 }
226
227 // copy from a file to static buffer
228 //
copy_element_contents(FILE * in,const char * end_tag,char * p,int len)229 int copy_element_contents(FILE* in, const char* end_tag, char* p, int len) {
230 char buf[256];
231 int n;
232 int retval = 0;
233
234 strcpy(p, "");
235 while (fgets(buf, 256, in)) {
236 if (strstr(buf, end_tag)) {
237 return retval;
238 }
239 n = (int)strlen(buf);
240 if (n >= len-1) {
241 retval = ERR_XML_PARSE;
242 continue;
243 }
244 strcat(p, buf);
245 len -= n;
246 }
247 return ERR_XML_PARSE;
248 }
249
copy_element_contents(FILE * in,const char * end_tag,string & str)250 int copy_element_contents(FILE* in, const char* end_tag, string& str) {
251 int c;
252 size_t end_tag_len = strlen(end_tag);
253 size_t n = 0;
254
255 str = "";
256 while (1) {
257 c = fgetc(in);
258 if (c == EOF) break;
259 if (n >= end_tag_len) {
260 const char* p = str.c_str() + n - end_tag_len;
261 if (!strcmp(p, end_tag)) {
262 str.erase(n-end_tag_len, end_tag_len);
263 return 0;
264 }
265 }
266 str += c;
267 n++;
268 }
269 return ERR_XML_PARSE;
270 }
271
272 // replace XML element contents (element must be present)
273 //
replace_element_contents(char * buf,const char * start,const char * end,const char * replacement)274 void replace_element_contents(
275 char* buf, const char* start, const char* end, const char* replacement
276 ) {
277 char temp[4096], *p, *q;
278
279 p = strstr(buf, start);
280 p += strlen(start);
281 q = strstr(p, end);
282 strlcpy(temp, q, sizeof(temp));
283 strcpy(p, replacement);
284 strcat(p, temp);
285 }
286
287 // if the string contains a substring of the form X...Y,
288 // remove the first such.
remove_element(char * buf,const char * start,const char * end)289 bool remove_element(char* buf, const char* start, const char* end) {
290 char* p, *q;
291 p = strstr(buf, start);
292 if (!p) return false;
293 q = strstr(p+strlen(start), end);
294 if (!q) return false;
295 strcpy_overlap(p, q+strlen(end));
296 return true;
297 }
298
299 // replace a substring. Do at most one instance.
300 //
str_replace(char * str,const char * substr,const char * replacement)301 bool str_replace(char* str, const char* substr, const char* replacement) {
302 char temp[4096], *p;
303
304 p = strstr(str, substr);
305 if (!p) return false;
306 int n = (int)strlen(substr);
307 safe_strcpy(temp, p+n);
308 strcpy(p, replacement);
309 strcat(p, temp);
310 return true;
311 }
312
313 // if the given XML has an element of the form
314 // <venue name="venue_name">
315 // ...
316 // </venue>
317 // then return the contents of that element.
318 // Otherwise strip out all <venue> elements
319 //
extract_venue(const char * in,const char * venue_name,char * out,int len)320 void extract_venue(const char* in, const char* venue_name, char* out, int len) {
321 const char* p, *q;
322 char* wp;
323 char buf[256];
324 snprintf(buf, sizeof(buf), "<venue name=\"%s\">", venue_name);
325 p = strstr(in, buf);
326 if (p) {
327 // prefs contain the specified venue
328 //
329 p += strlen(buf);
330 strlcpy(out, p, len);
331 wp = strstr(out, "</venue");
332 if (wp) *wp = 0;
333 } else {
334 // prefs don't contain the specified venue
335 //
336 q = in;
337 strcpy(out, "");
338 while (1) {
339 p = strstr(q, "<venue");
340 if (!p) {
341 strlcat(out, q, len);
342 break;
343 }
344 strncat(out, q, p-q);
345 q = strstr(p, "</venue>");
346 if (!q) break;
347 q += strlen("</venue>");
348 }
349 }
350 }
351
352 // copy a line from the given string.
353 // kinda like fgets() when you're reading from a string
354 //
sgets(char * buf,int len,char * & in)355 char* sgets(char* buf, int len, char*& in) {
356 char* p;
357
358 p = strstr(in, "\n");
359 if (!p) return NULL;
360 *p = 0;
361 strlcpy(buf, in, len);
362 *p = '\n';
363 in = p+1;
364 return buf;
365 }
366
non_ascii_escape(const char * in,char * out,int len)367 void non_ascii_escape(const char* in, char* out, int len) {
368 char buf[256], *p;
369
370 p = out;
371
372 for (; *in; in++) {
373 int x = (int) *in;
374 x &= 0xff; // just in case
375 if (x>127) {
376 snprintf(buf, sizeof(buf), "&#%d;", x);
377 strcpy(p, buf);
378 p += strlen(buf);
379 } else {
380 *p++ = x;
381 }
382 if (p > out + len - 8) break;
383 }
384 *p = 0;
385 }
386
387 // NOTE: these used to take std::string instead of char* args.
388 // But this performed poorly.
389 //
390 // NOTE: output buffer should be 6X size of input
391 //
xml_escape(const char * in,char * out,int len)392 void xml_escape(const char* in, char* out, int len) {
393 char buf[256], *p;
394
395 p = out;
396
397 for (; *in; in++) {
398 int x = (int) *in;
399 x &= 0xff; // just in case
400 if (x == '<') {
401 strcpy(p, "<");
402 p += 4;
403 } else if (x == '&') {
404 strcpy(p, "&");
405 p += 5;
406 } else if (x>127) {
407 snprintf(buf, sizeof(buf), "&#%d;", x);
408 strcpy(p, buf);
409 p += strlen(buf);
410 } else if (x<32) {
411 switch(x) {
412 case 9:
413 case 10:
414 case 13:
415 snprintf(buf, sizeof(buf), "&#%d;", x);
416 strcpy(p, buf);
417 p += strlen(buf);
418 break;
419 }
420 } else {
421 *p++ = x;
422 }
423 if (p > out + len - 8) break;
424 }
425 *p = 0;
426 }
427
428 // Note: XML unescaping never increases string length
429 //
xml_unescape(string & in)430 void xml_unescape(string& in) {
431 int n = (int)in.size()+1+16; // +16 avoids valgrind warnings
432 char* buf = (char*)malloc(n);
433 strcpy(buf, in.c_str());
434 xml_unescape(buf);
435 in = buf;
436 free(buf);
437 }
438
xml_unescape(char * buf)439 void xml_unescape(char* buf) {
440 char* out = buf;
441 char* in = buf;
442 char* p;
443 while (*in) {
444 if (*in != '&') { // avoid strncmp's if possible
445 *out++ = *in++;
446 } else if (!strncmp(in, "<", 4)) {
447 *out++ = '<';
448 in += 4;
449 } else if (!strncmp(in, ">", 4)) {
450 *out++ = '>';
451 in += 4;
452 } else if (!strncmp(in, """, 4)) {
453 *out++ = '"';
454 in += 6;
455 } else if (!strncmp(in, "'", 4)) {
456 *out++ = '\'';
457 in += 6;
458 } else if (!strncmp(in, "&", 5)) {
459 *out++ = '&';
460 in += 5;
461 } else if (!strncmp(in, "&#", 2)) {
462 in += 2;
463 char c = atoi(in);
464 *out++ = c;
465 p = strchr(in, ';');
466 if (p) {
467 in = p+1;
468 } else {
469 while (isdigit(*in)) in++;
470 }
471 } else {
472 *out++ = *in++;
473 }
474 }
475 *out = 0;
476 }
477
478 // we got an unrecognized line.
479 // If it has two <'s (e.g. <foo>xx</foo>) return 0.
480 // If it's of the form <foo/> return 0.
481 // If it's of the form <foo> then scan for </foo> and return 0.
482 // Otherwise return ERR_XML_PARSE
483 //
skip_unrecognized(char * buf,MIOFILE & fin)484 int skip_unrecognized(char* buf, MIOFILE& fin) {
485 char* p, *q, buf2[256];
486 std::string close_tag;
487
488 p = strchr(buf, '<');
489 if (!p) {
490 return ERR_XML_PARSE;
491 }
492 if (strchr(p+1, '<')) {
493 return 0;
494 }
495 q = strchr(p+1, '>');
496 if (!q) {
497 return ERR_XML_PARSE;
498 }
499 if (q[-1] == '/') return 0;
500 *q = 0;
501 close_tag = string("</") + string(p+1) + string(">");
502 while (fin.fgets(buf2, 256)) {
503 if (strstr(buf2, close_tag.c_str())) {
504 return 0;
505 }
506 }
507 return ERR_XML_PARSE;
508 }
509
XML_PARSER(MIOFILE * _f)510 XML_PARSER::XML_PARSER(MIOFILE* _f) {
511 strcpy(parsed_tag, "");
512 is_tag = false;
513 f = _f;
514 }
515
scan_comment()516 int XML_PARSER::scan_comment() {
517 char buf[256];
518 char* p = buf;
519 while (1) {
520 int c = f->_getc();
521 if (!c || c == EOF) return XML_PARSE_EOF;
522 *p++ = c;
523 *p = 0;
524 if (strstr(buf, "-->")) {
525 return XML_PARSE_COMMENT;
526 }
527 if (strlen(buf) > 32) {
528 strcpy_overlap(buf, buf+16);
529 p -= 16;
530 }
531 }
532 }
533
scan_cdata(char * buf,int len)534 int XML_PARSER::scan_cdata(char* buf, int len) {
535 char* p = buf;
536 len--;
537 while (1) {
538 int c = f->_getc();
539 if (!c || c == EOF) return XML_PARSE_EOF;
540 if (len) {
541 *p++ = c;
542 len--;
543 }
544 if (c == '>') {
545 *p = 0;
546 char* q = strstr(buf, "]]>");
547 if (q) {
548 *q = 0;
549 return XML_PARSE_CDATA;
550 }
551 }
552 }
553 }
554
is_empty_string(char * parsed_tag,const char * start_tag)555 static inline bool is_empty_string(char* parsed_tag, const char* start_tag) {
556 size_t n = strlen(parsed_tag);
557 char tag[TAG_BUF_LEN];
558
559 // handle the archaic form <tag/>, which means empty string
560 //
561 if (parsed_tag[n-1] == '/') {
562 strcpy(tag, parsed_tag);
563 tag[n-1] = 0;
564 if (!strcmp(tag, start_tag)) {
565 return true;
566 }
567 }
568 return false;
569 }
570
571 // we've parsed the start tag of a string; parse the string itself.
572 //
parse_str_aux(const char * start_tag,char * buf,int len)573 bool XML_PARSER::parse_str_aux(const char* start_tag, char* buf, int len) {
574 bool eof;
575 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
576
577 end_tag[0] = '/';
578 strcpy(end_tag+1, start_tag);
579
580 // get text after start tag
581 //
582 int retval = get_aux(buf, len, 0, 0);
583 if (retval == XML_PARSE_EOF) return false;
584 if (retval == XML_PARSE_OVERFLOW) return false;
585
586 // if it's the end tag, return empty string
587 //
588 if (retval == XML_PARSE_TAG) {
589 if (strcmp(buf, end_tag)) {
590 return false;
591 } else {
592 strcpy(buf, "");
593 return true;
594 }
595 }
596
597 eof = get(tag, sizeof(tag), is_tag);
598 if (eof) return false;
599 if (!is_tag) return false;
600 if (strcmp(tag, end_tag)) return false;
601 if (retval != XML_PARSE_CDATA) {
602 xml_unescape(buf);
603 }
604 return true;
605 }
606
607 // We just parsed "parsed_tag".
608 // If it matches "start_tag", and is followed by a string
609 // and by the matching close tag, return the string in "buf",
610 // and return true.
611 //
parse_str(const char * start_tag,char * buf,int len)612 bool XML_PARSER::parse_str(const char* start_tag, char* buf, int len) {
613 if (is_empty_string(parsed_tag, start_tag)) {
614 strcpy(buf, "");
615 return true;
616 }
617 if (strcmp(parsed_tag, start_tag)) return false;
618 return parse_str_aux(start_tag, buf, len);
619 }
620
621 #define MAX_XML_STRING 262144
622
623 // same, for std::string
624 //
parse_string(const char * start_tag,string & str)625 bool XML_PARSER::parse_string(const char* start_tag, string& str) {
626 if (is_empty_string(parsed_tag, start_tag)) {
627 str = "";
628 return true;
629 }
630 if (strcmp(parsed_tag, start_tag)) return false;
631 char *buf=(char *)malloc(MAX_XML_STRING);
632 bool flag = parse_str_aux(start_tag, buf, MAX_XML_STRING);
633 if (flag) {
634 str = buf;
635 }
636 free(buf);
637 return flag;
638 }
639
640 // Same, for integers
641 //
parse_int(const char * start_tag,int & i)642 bool XML_PARSER::parse_int(const char* start_tag, int& i) {
643 char buf[256], *end;
644 bool eof;
645 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
646
647 if (strcmp(parsed_tag, start_tag)) return false;
648
649 end_tag[0] = '/';
650 strcpy(end_tag+1, start_tag);
651
652 eof = get(buf, sizeof(buf), is_tag);
653 if (eof) return false;
654 if (is_tag) {
655 if (!strcmp(buf, end_tag)) {
656 i = 0; // treat <foo></foo> as <foo>0</foo>
657 return true;
658 } else {
659 return false;
660 }
661 }
662 errno = 0;
663 int val = strtol(buf, &end, 0);
664 if (errno) return false;
665 if (end != buf+strlen(buf)) return false;
666
667 eof = get(tag, sizeof(tag), is_tag);
668 if (eof) return false;
669 if (!is_tag) return false;
670 if (strcmp(tag, end_tag)) return false;
671 i = val;
672 return true;
673 }
674
675 // Same, for long
676 //
parse_long(const char * start_tag,long & i)677 bool XML_PARSER::parse_long(const char* start_tag, long& i) {
678 char buf[256], *end;
679 bool eof;
680 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
681
682 if (strcmp(parsed_tag, start_tag)) return false;
683
684 end_tag[0] = '/';
685 strcpy(end_tag+1, start_tag);
686
687 eof = get(buf, sizeof(buf), is_tag);
688 if (eof) return false;
689 if (is_tag) {
690 if (!strcmp(buf, end_tag)) {
691 i = 0; // treat <foo></foo> as <foo>0</foo>
692 return true;
693 } else {
694 return false;
695 }
696 }
697 errno = 0;
698 long val = strtol(buf, &end, 0);
699 if (errno) return false;
700 if (end != buf+strlen(buf)) return false;
701
702 eof = get(tag, sizeof(tag), is_tag);
703 if (eof) return false;
704 if (!is_tag) return false;
705 if (strcmp(tag, end_tag)) return false;
706 i = val;
707 return true;
708 }
709
710 // Same, for doubles
711 //
parse_double(const char * start_tag,double & x)712 bool XML_PARSER::parse_double(const char* start_tag, double& x) {
713 char buf[256], *end;
714 bool eof;
715 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
716
717 if (strcmp(parsed_tag, start_tag)) return false;
718
719 end_tag[0] = '/';
720 strcpy(end_tag+1, start_tag);
721
722 eof = get(buf, sizeof(buf), is_tag);
723 if (eof) return false;
724 if (is_tag) {
725 if (!strcmp(buf, end_tag)) {
726 x = 0; // treat <foo></foo> as <foo>0</foo>
727 return true;
728 } else {
729 return false;
730 }
731 }
732 errno = 0;
733 double val = strtod(buf, &end);
734 if (errno) return false;
735 if (end != buf+strlen(buf)) return false;
736
737 eof = get(tag, sizeof(tag), is_tag);
738 if (eof) return false;
739 if (!is_tag) return false;
740 if (strcmp(tag, end_tag)) return false;
741 x = val;
742 return true;
743 }
744
745 // Same, for unsigned long
746 //
parse_ulong(const char * start_tag,unsigned long & x)747 bool XML_PARSER::parse_ulong(const char* start_tag, unsigned long& x) {
748 char buf[256], *end;
749 bool eof;
750 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
751
752 if (strcmp(parsed_tag, start_tag)) return false;
753
754 end_tag[0] = '/';
755 strcpy(end_tag+1, start_tag);
756
757 eof = get(buf, sizeof(buf), is_tag);
758 if (eof) return false;
759 if (is_tag) {
760 if (!strcmp(buf, end_tag)) {
761 x = 0; // treat <foo></foo> as <foo>0</foo>
762 return true;
763 } else {
764 return false;
765 }
766 }
767 errno = 0;
768 unsigned long val = strtoul(buf, &end, 0);
769 if (errno) return false;
770 if (end != buf+strlen(buf)) return false;
771
772 eof = get(tag, sizeof(tag), is_tag);
773 if (eof) return false;
774 if (!is_tag) return false;
775 if (strcmp(tag, end_tag)) return false;
776 x = val;
777 return true;
778 }
779
780 // Same, for unsigned long long
781 //
parse_ulonglong(const char * start_tag,unsigned long long & x)782 bool XML_PARSER::parse_ulonglong(const char* start_tag, unsigned long long& x) {
783 char buf[256], *end=0;
784 bool eof;
785 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
786
787 if (strcmp(parsed_tag, start_tag)) return false;
788
789 end_tag[0] = '/';
790 strcpy(end_tag+1, start_tag);
791
792 eof = get(buf, sizeof(buf), is_tag);
793 if (eof) return false;
794 if (is_tag) {
795 if (!strcmp(buf, end_tag)) {
796 x = 0; // treat <foo></foo> as <foo>0</foo>
797 return true;
798 } else {
799 return false;
800 }
801 }
802 errno = 0;
803 unsigned long long val = boinc_strtoull(buf, &end, 0);
804 if (errno) return false;
805 if (end != buf+strlen(buf)) return false;
806
807 eof = get(tag, sizeof(tag), is_tag);
808 if (eof) return false;
809 if (!is_tag) return false;
810 if (strcmp(tag, end_tag)) return false;
811 x = val;
812 return true;
813 }
814
815 // Same, for bools
816 //
parse_bool(const char * start_tag,bool & b)817 bool XML_PARSER::parse_bool(const char* start_tag, bool& b) {
818 char buf[256], *end;
819 bool eof;
820 char end_tag[TAG_BUF_LEN], tag[TAG_BUF_LEN];
821
822 // handle the archaic form <tag/>, which means true
823 //
824 safe_strcpy(tag, start_tag);
825 strcat(tag, "/");
826 if (!strcmp(parsed_tag, tag)) {
827 b = true;
828 return true;
829 }
830
831 // otherwise look for something of the form <tag>int</tag>
832 //
833 if (strcmp(parsed_tag, start_tag)) return false;
834
835 eof = get(buf, sizeof(buf), is_tag);
836 if (eof) return false;
837 if (is_tag) return false;
838 bool val = (strtol(buf, &end, 0) != 0);
839 if (end != buf+strlen(buf)) return false;
840
841 end_tag[0] = '/';
842 strcpy(end_tag+1, start_tag);
843 eof = get(tag, sizeof(tag), is_tag);
844 if (eof) return false;
845 if (!is_tag) return false;
846 if (strcmp(tag, end_tag)) return false;
847 b = val;
848 return true;
849 }
850
851 // parse a start tag (optionally preceded by <?xml>)
852 //
parse_start(const char * start_tag)853 bool XML_PARSER::parse_start(const char* start_tag) {
854 char tag[TAG_BUF_LEN];
855 bool eof;
856
857 eof = get(tag, sizeof(tag), is_tag);
858 if (eof || !is_tag ) {
859 return false;
860 }
861 if (strstr(tag, "?xml")) {
862 eof = get(tag, sizeof(tag), is_tag);
863 if (eof || !is_tag ) {
864 return false;
865 }
866 }
867 if (strcmp(tag, start_tag)) {
868 return false;
869 }
870 return true;
871 }
872
873 // We got an unexpected tag.
874 // If it's an end tag, do nothing.
875 // Otherwise skip until the end tag, if any
876 //
skip_unexpected(const char * start_tag,bool verbose,const char * where)877 void XML_PARSER::skip_unexpected(
878 const char* start_tag, bool verbose, const char* where
879 ) {
880 char buf[TAG_BUF_LEN], end_tag[TAG_BUF_LEN];
881
882 if (verbose) {
883 fprintf(stderr,
884 "%s: Unrecognized XML tag '<%s>' in %s; skipping\n",
885 time_to_string(dtime()), start_tag, where
886 );
887 }
888 if (strchr(start_tag, '/')) return;
889 snprintf(end_tag, sizeof(end_tag), "/%s", start_tag);
890
891 while (1) {
892 int c;
893 bool eof = scan_nonws(c);
894 if (eof) return;
895 if (c == '<') {
896 int retval = scan_tag(buf, sizeof(buf), 0, 0);
897 if (retval != XML_PARSE_TAG) continue;
898 if (!strcmp(buf, end_tag)) return;
899 skip_unexpected(buf, false, where);
900 }
901 }
902 }
903
904 // we just parsed a tag.
905 // copy this entire element, including start and end tags, to the buffer
906 //
copy_element(string & out)907 int XML_PARSER::copy_element(string& out) {
908 char end_tag[TAG_BUF_LEN], buf[ELEMENT_BUF_LEN];
909
910 // handle <foo/> case
911 //
912 size_t n = strlen(parsed_tag);
913 if (parsed_tag[n-1] == '/') {
914 out = "<";
915 out += parsed_tag;
916 out += ">";
917 return 0;
918 }
919 if (strchr(parsed_tag, '/')) return ERR_XML_PARSE;
920 out = "<";
921 out += parsed_tag;
922 out += ">";
923 snprintf(end_tag, sizeof(end_tag), "</%s>", parsed_tag);
924 int retval = element_contents(end_tag, buf, sizeof(buf));
925 if (retval) return retval;
926 out += buf;
927 out += end_tag;
928 return 0;
929 }
930